diff --git a/SimplifyEdits21082015Jacco b/SimplifyEdits21082015Jacco new file mode 100644 index 0000000..d993fb9 --- /dev/null +++ b/SimplifyEdits21082015Jacco @@ -0,0 +1,699 @@ +#------ short explanation ------- + +# this code simplifies unnecessarily complicated edits and removes redundant edits +# Step 0: Feasibility check: Are there any contradictory constraints? + +# For feasible edit sets: +# Step 1a: Determine Fixed variables: find variables that can attain only value. +# Step 1b: Simplify compound edits. Find out whether one or more conditional / compound edits can be replaced by single / nonconditional edit statements. +# Step 2: Identify and remove redundant edits. For example: the edit x<5 is redundant if there is also an edit x < 6. + +# for Infeasible edit sets: +# step 3: find a subset of edits whose removal restores feasibility +# step 4: find an IIS (irreducible inconsistent subset)- a small set of contradicting edits + +# The mainfunction is "CleanEdits". + +# Two parameters are : MCondConstr and MSoftconstr. These are so-called big M values, used in the MIP-formulation +# (McondConstr is used for conditional constraints and Msoftconstr for soft constraints in step 3 above) +# defaultvalue is 10,000. +# if numerical problems occur a smaller value may be desirable. +# however for constraints with large coefficients, it may be necessary to use a larger value than 10,000. + +# important: this code assumes that there are no variables with a point in its name. (other than categorical variables defined by editrules) + +#----Inititalisation + +setwd("G:/onderhanden_werk/gaafmaakonderzoek2013/editregelssander/openissue") +setwd("G:/onderhanden_werk/gaafmaakonderzoek2013") +path <- paste(getwd(),"/", sep="") +.libPaths("//dmkv1f/dmk1/kennR/R/R2.15") +library(editrules) +library(deducorrect) +library(lpSolveAPI) + +#----General helpfunctions ----------------------------------------------------------------------------------------# + +# isCatVar is an indicator for categorical variables in the editmatrix E. +isCatVar <- function(E){ + CatVar <- grepl(".",getVars(E), fixed=TRUE) #Categorical variables are variables with a point in its name. + return(CatVar)} + +# isNumEdits is an indicator for numerical edits in editmatrix E +isNumEdit <- function (E) { + numericals <- grepl("num",rownames(E), fixed=TRUE) #Numerical edits are edits with "num" in its name + return(numericals)} + +# isCatEdit is an indicator for categorical edits in editmatrix E +isCatEdit <- function(E){ + catEdits <- !isNumEdit(E) + return(catEdits)} + +# ContainsCatVar is an indicator for the presence of categorical variables in the edits of an editmatrix E +ContainsCatVar <- function(E){ + EditCatVar <- FALSE + if (sum(isCatVar(E)) > 0 ) { + EditCatVar <-rowSums(contains(E,var=getVars(E)[isCatVar(E),drop=F]))>0 + } + return(EditCatVar)} + +#DeleteEdit removes edit i from an editmatrix E. +DeleteEdit <- function (E, i){ + return(E[c(1:nrow(E))!=i]) +} + +# isVarinEdit is an indicator for the presence of variables "var" in the edits of editmatrix E. +# if a variable appears in one of the edits belonging to a conditional constraint, the indicator value will also be one for all other constraints that belong to the same conditional edit. +isVarinEdit <- function(E, var){ + isVarinEdit <- as.matrix(rowSums(contains(E,var=var, drop=FALSE)) > 0) # indicator that shows whether an edit contains at least one of the variables in var + isVarinCatEdit <- isVarinEdit & isCatEdit(E) + if (sum(isVarinCatEdit)>0){ #if variables in var appear in a categorical edit belonging to conditional constraint, the numerical edits belonging to the same conditional constraints are also selected. + NamesofCatEditswithVars <- rownames(E) [ isVarinCatEdit , drop=F] + isCondNumEditwithVar <- isNumEdit(E) & ContainsCatVar(E) & as.matrix (rowSums(contains(E, var= NamesofCatEditswithVars) > 0 )) + isVarinEdit <- isVarinEdit | isCondNumEditwithVar + } + isCondNumEditwithVar <- isNumEdit(E) & ContainsCatVar(E) & isVarinEdit + CatVarsinCondEdit <- names(which (colSums(contains(E[isCondNumEditwithVar,,drop=FALSE], var= getVars(E)[isCatVar(E)])) > 0 )) # all categorical constraints are selected that belong to the same conditional edits + if (length(CatVarsinCondEdit)>0){isVarinEdit <- isVarinEdit | (rownames(E) %in% CatVarsinCondEdit) } + return(isVarinEdit) +} + +#--------------Functions for the LP/MIP solver +# AdaptToMip changes the operators of the edits in an editmatrix E +# the operator '==' is replaced by '=' +# the operator '<' is replaced by '<= rhs - epsilon' +# the function is needed because the LP-solver can only deal with "<= and "="edits; not with "<" type of edits. +AdaptToMip <- function(E, epsilon=as.mip(E)$epsilon) { + E <- normalize(E) + A <- getA(E) + ops <- getOps(E) # possible operators: "==" "'<", "<=" , as edits are in normalform) + rhs <- getb(E) # right hand side of the edits + rhs[ops== "<"] <- rhs[ops== "<"] - epsilon + ops[ops== "<"] <- "<=" + ops[ops== "=="] <- "=" + E <- as.editmatrix(A=A,ops=ops,b=rhs) + return(E) +} + +# FillMip creates a new lpSolve linear programming object p from an editmatrix E. objfunc is a vector containing coefficients for the objective function. +FillMip <- function(E, objfunc=rep(0,ncol(E)-1)) { + A <- getA(E) + b <- getb(E) + ops <-getOps(E) + iscat <- isCatVar(E) + nvar <- ncol(A) # number of variables + ncon <- nrow(A) # number of constraints + p <- make.lp(ncon, nvar) # a new lp object is created with nvar variables and ncon constraints + for (j in 1: nvar) {set.column(p, j, A[,j])} # fill the constraintsmatrix column-wise + set.constr.type(p,ops) + set.rhs(p,b) + set.objfn(p,objfunc) # define objective function - the coefficients of the variables in the objective function are given in objfunc + set.bounds(p,lower= rep(-Inf,nvar)) # lower bound of each variable is -infinity + set.bounds(p,upper= rep(Inf,nvar)) # upper bound of each variabele is +infinity + set.type(p,which(iscat),"binary") # define binary variables + return(p) +} + +# IsFeasible is an indicator for the feasiblity of an optimization problem p. +isFeasible <- function (p) { + lp.control(p, break.at.first = TRUE, epsint= 1.0e-15, epspivot=1.0e-15) #we only need to know whether or not one feasible solution exists. Therefore, we can stop if a solution is found. Break.at.first=TRUE + result <-solve(p) # solve optimization problem + feas <- (result !=2) # result = 2 means infeasibile. + return(feas) +} + +#-------------------General functions for finding the negate of an edit + +#NegateSingelEdit replaces the i-th edit of an editmatrix E by the negate of that edit. +NegateSingleEdit <- function (E, i, epsilon =as.mip(E)$epsilon) { + A <- getA(E) + ops_in <-getOps(E) + ops_out <-ops_in + b <- getb(E) + ops_out[i][ops_in[i]=="<="] <- ">=" # the negate of an "<=" edit is an ">" edit. But, because the solver cannot deal with ">" constraints, this type of constraints is converted into ">=". + b[i][ops_in[i]=="<="] <- b[i] + epsilon + ops_out[i][ops_in[i]==">="] <- "<=" + b[i][ops_in[i]==">="] <- b[i] - epsilon + E <- as.editmatrix(A=A, ops=ops_out, b=b) + return(E) +} + +#NegateSingelEdit replaces the i-th edit of an editmatrix E by the negate of that edit. +# edit i is assumed to be a composed edit, i.e. an ifthen edit, with categorical variables +# Explanation: A composed edit is modelled as: C1 OR C2 or C3 or.... +# the negate of such an edit is given by: NOT C1 AND NOT C2 AND NOT C3 AND.. +# +NegateConditionalEdit <- function (E, i, epsilon =as.mip(E)$epsilon) { + A <- getA(E) + ops <-getOps(E) + b <- getb(E) + iscat<-isCatVar(E) + CatVarsinEditi <- names(which(A[i,]*iscat!=0)) # the names of the categorical variables in edit i + CatEdits<- CatVarsinEditi # For each categorical variable an categorical edit exists. + A[, CatVarsinEditi]<-0 # each of the categorical variabeles in edit i will get the value 0. By doing is a conditional edit is replaced by a number of unconditional edits. + ops[CatEdits] <- ">=" # replace each categorical edit, in Catedits by the negate of that edit. + b[CatEdits] <- b[CatEdits] + epsilon # the negate of a <= edit is a > edit. We use >= type of edits. There we need to add epsilon to the right hand side + E <- as.editmatrix(A=A, ops=ops, b=b) + return(E) +} + + +#------------------------------------------------------------------------------------------------------------------ +# functions for the transformation of the in- and output. + +# PrepareEdits creates an editmatrix E from an editset E and expresses the editmatrix in normalform. +PrepareEdits<-function (E, epsilon=as.mip(E)$epsilon,MCondConstr=10000) { + E <- as.mip(E, epsilon )$E + E <- normalize(E) + A <- getA(E) + A[A==-as.mip(E)$M]<- -MCondConstr # the big M values of the initial MIP problem are changed in order to prevent numerical problems + E <- as.editmatrix(A=A,ops=getOps(E), b=getb(E)) + return(E) +} + +# EditMatrixToEditSet coerces an editmatrix E to an editlist. +# function will be used to write logfiles +EditMatrixToEditSet <- function (E){ + Es <- "" + if (nrow(E)>0) { + isNumEditNoCatVar <- isNumEdit(E)& !ContainsCatVar(E) + isNumEditWithCatVar <- isNumEdit(E)& ContainsCatVar(E) + NNumEditWithCatVar <- sum(isNumEditWithCatVar) + NNumEditNoCatVar <- sum(isNumEditNoCatVar) + EsNumEditsNoCatVar <- as.matrix(as.data.frame(as.editset(E[isNumEditNoCatVar,,drop=F]))) # Numerical edits without categorical variables in matrixformat + EsNumEditsWithCatVar <- as.matrix(as.data.frame(as.editset(E[isNumEditWithCatVar,,drop=F]))) # numerical edits with categorical variables in matrixformat + if (NNumEditWithCatVar >0){ # additional operations are needed for numerical edits with categorical varibiables. These are transformed into conditional `If THEN´ edits + for (i in 1 : NNumEditWithCatVar) { + IndexEdit <- which(isNumEditWithCatVar)[i] + CatVarsinEdit <- getVars(E)[isCatVar(E)][contains(E[IndexEdit,isCatVar(E)])] + for (j in 1: (length(CatVarsinEdit))) { + CatVar <- CatVarsinEdit[j] + CatEdit <- substValue(E[CatVar,,drop=F],CatVar,0, reduce=F, removeredundant=F) # the categorical edit that has been defined for a categorical variable. In this edit the categorical variable is eliminated + if (j==1) { + CatEdit <- NegateSingleEdit(CatEdit,1) # the first part will become the `if´ part. for this part the edit needs to be negated, i.e. C1 or C2 is expressed as IF NOT C1 then C2 + dfEdit <- as.data.frame(CatEdit)$edit + dfEdit <- paste(" if (", dfEdit, ")", sep=" ") + } + if (j>1){ + dfEditNewPart <- as.data.frame(CatEdit)$edit + dfEdit <- paste (dfEdit, dfEditNewPart , sep =" " ) + if (j < length(CatVarsinEdit)) { dfEdit <- paste(dfEdit, "|", sep=" ") } + } + } + EsNumEditsWithCatVar[i,2]<-dfEdit + } + } + if ((NNumEditWithCatVar>0) & (NNumEditNoCatVar >0)) {Es <- rbind(EsNumEditsNoCatVar,EsNumEditsWithCatVar)} + if ((NNumEditWithCatVar>0) & (NNumEditNoCatVar ==0)) {Es <-EsNumEditsWithCatVar} + if ((NNumEditWithCatVar==0) & (NNumEditNoCatVar >0)) {Es <-EsNumEditsNoCatVar} + } + return(Es) +} + +#------Specific functions for step 0----feasibilitity of constraints + +# isEditsFeasible in an indicator for the feasibility of the edits in editmatrix E +isEditsFeasible<-function(E) { + E <- AdaptToMip(E) + p <- FillMip(E) # lpsolve object is made + feasible <- isFeasible(p) # test for feasilbility + return(feasible) +} + +#------Specific functions for Step 1a---fixed variables, i.e. variables that can only attain one value. + +#MinimumValue gives the minimum value for variable i in editmatrix E +MinimumValue <-function(E ,i){ + minval <- -9999 + objective <- rep(0, (ncol(E)-1)) # coefficients of objective function + objective[i] <- 1 + p <- FillMip(E, objfunc=objective) + result <-solve(p) + if (result ==0) {minval <- get.objective(p)} # result=0 means that a minimum value was found. + if (result > 0) {minval <- -9999} # if no minimum value is found, the result of the function is -9999 + return(minval) +} + +#MaximumValue gives the maximum value for variable i in Editmatrix E +MaximumValue <-function(E ,i){ + maxval <- 9999 + objective <- rep(0, (ncol(E)-1)) + objective[i] <- 1 #coefficients of objective function + p <- FillMip(E, objfunc=objective) + lp.control(p, sense="max") + result <-solve(p) + if (result ==0) {maxval <- get.objective(p)} # result=0 means that a maximum value was found. + if (result > 0) {maxval <- 9999} # if no maximum value is found, the result of the function is +9999 + return(maxval) +} + +# MinimizeEachVariable gives the minimum value for each numerical variable in Editmatrix E +MinimizeEachVariable <- function (E){ + smallest <- rep(-9999, (ncol(E)-1)) # initialisation + for (i in 1: (ncol(E)-1)) { + if (!isCatVar(E)[i]) { smallest[i] <- MinimumValue(E, i)} + } + return(smallest)} + +# MaximizeEachVariable gives the maximum value for each numerical variable in Editmatrix E +MaximizeEachVariable <- function (E){ + largest <- rep(9999, (ncol(E)-1)) # initialisation + for (i in 1: (ncol(E)-1)) { + if (!isCatVar(E)[i]) {largest[i] <- MaximumValue(E, i)} + } + return(largest)} + +# FixedValues gives the names of the fixed variables in editmatrix E, together with their values +FixedValues <- function (E){ + E <- AdaptToMip(E) # + minima <- MinimizeEachVariable (E) # a vector with minimum values + maxima <- MaximizeEachVariable (E) # a vector with maximum values + return(list("variables"=getVars(E)[minima==maxima],"values"=minima[minima==maxima])) # as output are given: the names of the variables and their values +} + +# Adds a constraint in editmatrix E for each fixed value. For example if the value of x has to be 10, the constraint x =10 is added. +AddFixedValuesAsConstraints<-function(E, fixvars, fixvalues){ + nfixed <- length(fixvars) + An <- matrix(0,nrow=nfixed, ncol=(ncol(E)-1)) + rownames(An)<- rep("num",nrow(An)) + colnames(An)<-colnames(getA(E)) + for (i in 1:(nrow(An))) {An[i,colnames(An)==fixvars[i]]<-1} + opsn <- rep("==", nfixed) + bn<-fixvalues + An <- rbind(getA(E), An) + opsn <- c(getOps(E),opsn) + bn <- c(getb(E),bn) + En <- as.editmatrix(A=An,ops=opsn,b=bn) + return("E"=En) +} + + +# Substitutes fixed values in editmatrix E. Fixed values are represented by a single constraint (e.g x=10) In all other constraints fixed variable are substituted (e.g. the value 10 is filled in for x) +SubstituteFixedValues<-function(E){ + fixedvars <- "" # initialise + fixedvals <- "" # initialise + LogOriginalEditswithFixedVars <- "" #initialise + LogAdjustedEditswithFixedVars <- "" + fixed <- FixedValues(E) + nedits <- nrow(E) + if (length (fixed$variables) > 0 ) { + fixedvars <-fixed$variables + fixedvals <-fixed$values + isEditsWithFixedvars <- isVarinEdit(E,fixedvars) + LogOriginalEditswithFixedVars<-EditMatrixToEditSet(E[isEditsWithFixedvars,,drop=F]) + E<-substValue(E,fixedvars,fixedvals,reduce=FALSE, removeredundant=FALSE) + E <- AddFixedValuesAsConstraints(E,fixedvars,fixedvals) + LogAdjustedEditswithFixedVars<-EditMatrixToEditSet(E[isEditsWithFixedvars,,drop=F]) + } + return(list("E"=E,"variables"=fixedvars,"values"=fixedvals, "OldEditsWithFixedVars"=LogOriginalEditswithFixedVars, "NewEditsWithFixedVars"= LogAdjustedEditswithFixedVars )) +} + +#------functions for Step 1b---simplify conditional edits / compound constraints +# Conditional edits are written in the form C1 or C2 or ....which is also called a compound statement +# isPartofConditionalEditAlwaysTrue is an indicator for non constraining components; components that are always satisfied (given the Edits in E) + +isPartofConditionalEditAlwaysTrue<-function (E, i){ # i is a categorical edit that refers to a component of a compound edit + A <-getA(E) + AlwaysSatisfied <- FALSE + iscat <- isCatVar(E) + if (isCatEdit(E)[i]==T) { # THE COMPonents of a composed edits are expressed as categorical variables. + nedits <- nrow(E) + CatHelpVariableInEdit <- names(which(A[i,]*iscat!=0)) # name of the categorical variable in edit i + E <- as.editmatrix(A=rbind(getA(E),getA(E)[i,,drop=F]), ops=c(getOps(E),getOps(E[i])), b=c(getb(E),getb(E)[i])) # replicates edit i + E[(nedits+1),]<-substValue(E[(nedits+1),,drop=F], CatHelpVariableInEdit , 0, reduce = FALSE, removeredundant = FALSE) # by substitution of the categorical variable the conditional edit is replaced by an unconditional edit + E <- AdaptToMip(E) + E <- NegateSingleEdit (E, (nedits+1)) #the statement is always satisfied means that it is redundant. IN order to check for redundancy we replace the edit by the negate of that edit. + p <- FillMip(E) + AlwaysSatisfied <- !isFeasible(p) # if there is no solution than the statement of edit i is redundant (or in other words: always satisfied) + } + return(AlwaysSatisfied)} + + # isPartofConditionalEditAlwaysViolated is an indicator for nonrelaxing components of a compound statement. These are components for which the edits in E imply that these are always violated ('false') . +isPartofConditionalEditAlwaysViolated<-function (E, i){ + A <-getA(E) + AlwaysViolated <- FALSE + iscat <- isCatVar(E) + if (isCatEdit(E)[i]==T) {# COMPonents of a composed edits are expressed as categorical variables. + nedits <- nrow(E) + CatVarInEdit <- names(which(A[i,]*iscat!=0)) # name of the categorical variable in edit i + E <- as.editmatrix(A=rbind(getA(E),getA(E)[i,,drop=F]), ops=c(getOps(E),getOps(E[i])), b=c(getb(E),getb(E)[i])) # replicates edit i + E[(nedits+1),]<-substValue(E[(nedits+1),,drop=F], CatVarInEdit , 0, reduce = FALSE, removeredundant = FALSE) # by substitution of the categorical variable the conditional edit is replaced by an unconditional edit + E <- AdaptToMip(E) + p <- FillMip(E) + AlwaysViolated <- !isFeasible(p) # if there is no solution, then the statement of edit i is always violated. Implicitly, it is assumed that the initial edits are noncontradictory. + } + return(AlwaysViolated)} + +# LogRedundantParts creates an editset from an editmatrix E, containing the edits that are called Catvar. +LogCatVarEdit<-function(E, CatVar){ + CatVarEdits <- E[rownames(E) %in% CatVar,,drop=F] #selection of edits with catvar in the name + CatVarEdits <- substValue( CatVarEdits ,CatVar ,rep(0,length( CatVar)),reduce=TRUE, removeredundant=TRUE) # by this substitution a conditional edit is transfered into an unconditional edit. + rownames(CatVarEdits)<-gsub(".l", "num", rownames(CatVarEdits) ) # after substitution of the categorical variable, the edit becomes numeric + CatVarEdits <- EditMatrixToEditSet(CatVarEdits) # creating the editset + return(CatVarEdits) +} + +#Transform_CategoricalEdit_into_NumericEdit replaces a conditional, categorical edits by a numeric, unconditional edit, if possible. +# After simplification of a conditional edit an unconditional edit may be obtained. +# for example: the original edit may be C1 or C2 or C3. However, if it turns out that c2 and C3 cannot occur; the edit can be simplified. It will be expressed as a single edit C1. This function transforms conditional edits into numerical, unconditional edits (if possible) +Transform_CategoricalEdit_into_NumericEdit<- function(E){ + LogNewUnconditional <-NA + LogCatVarUnconditionalEdit <-NA + A <-getA(E) + b <-getb(E) + iscat <- isCatVar(E) + NumEdits <- isNumEdit(E) + isNewUnconditionalEdit <- NumEdits & ((rowSums(A[, iscat, drop=FALSE])>0) & (b[, drop=FALSE]==0)) + if (sum(isNewUnconditionalEdit )>0) { + CatVarUnconditionalEdit <- getVars(E)[colSums(contains(E[ isNewUnconditionalEdit,,drop=F]))>0, drop=F] + LogNewUnconditional <- LogCatVarEdit(E, CatVarUnconditionalEdit) + RedundantNumEdit <- NumEdits & (rowSums(A[, CatVarUnconditionalEdit, drop=FALSE])>0) + LogCatVarUnconditionalEdit <-CatVarUnconditionalEdit + E <- E[!RedundantNumEdit,] + if (length( CatVarUnconditionalEdit)>0){ + E <- substValue(E,CatVarUnconditionalEdit,rep(0, length(CatVarUnconditionalEdit)),reduce=TRUE, removeredundant=TRUE) # by substitution of the categorical variable a conditional edit becomes an unconditional edit + rownames(E)[rownames(E)%in% CatVarUnconditionalEdit]<-paste ("num", gsub(".", "", rownames(E)[rownames(E)%in% CatVarUnconditionalEdit], fixed=TRUE ), sep= "") # the name of the edit is changed. The new name shows that the edit is numerical now. + } + } + return(list(E=E,LogNewUnconditional=LogNewUnconditional,LogCatVarUnconditionalEdit =LogCatVarUnconditionalEdit )) +} + +#SimplifyCOnditionalEdits removes non-relaxing components of compound edits. (i.e. parts of a conditional edit that are always violated) +SimplifyConditionalEdits <- function (E){ + LogNewUnconditional <- NA + LogRedundantNumEdit <-NA + iscat <- isCatVar(E) + isEditAlwaysViolated <- rep(FALSE,nrow(E)) #initialise + AlwaysViolatedEdits <- "" #initialise + RedundantPartsinConditionalEdits <- "" #initialise + ESimplified <- E #initialise + LogSimplifiedEdits <- "" + LogRedundantPartsinConditionalEdits<-"" + if (nrow(E)>0) { + for (i in 1:nrow(E)){ + if (isCatEdit(E)[i]== TRUE) {isEditAlwaysViolated[i]<-isPartofConditionalEditAlwaysViolated(E, i)} #for every categorical variable it is evaluated whether or it not it belongs to a statement (a part of a conditional edit) that is always violated. + } + AlwaysViolatedEdits <- rownames(E)[isEditAlwaysViolated,drop=F] #names of categorical edits that are always violated + CatVarAlwaysViolated <- AlwaysViolatedEdits #names of categorical variables. The names of the categorical variables are the same as the names of the edits. + ESimplified <- E[isEditAlwaysViolated==FALSE,,drop=F] + if (length(AlwaysViolatedEdits ) >0) { + ESimplified <- substValue(ESimplified , CatVarAlwaysViolated ,rep(1,length( CatVarAlwaysViolated )),reduce=TRUE, removeredundant=TRUE) # by this substitution, redundant parts of a conditional edit are removed from that edit + Transform <- Transform_CategoricalEdit_into_NumericEdit(ESimplified ) # conditional edits are replaced by unconditional edits, if possible (i.e. if there is only one component) + LogNewUnconditional<-Transform$LogNewUnconditional + ESimplified <-Transform$E + if (!is.na(Transform$LogCatVarUnconditionalEdit[1])) { LogRedundantNumEdit <-EditMatrixToEditSet(E[isVarinEdit(E,Transform$LogCatVarUnconditionalEdit),,drop=F])} + LogSimplifiedEdits <- EditMatrixToEditSet (E[isVarinEdit(E,CatVarAlwaysViolated),,drop=F]) # make a log file of the simplified edits + LogRedundantPartsinConditionalEdits <- LogCatVarEdit(E, CatVarAlwaysViolated) + } + } + return(list("E"= ESimplified,"Simplifiededits"= LogSimplifiedEdits,"SimplifiededitsRedundant" = LogRedundantPartsinConditionalEdits, "NewUnconditional"=LogNewUnconditional,"RedundantConditional"=LogRedundantNumEdit) ) +} + +# ReplaceConditionalbyUnconditional replaces a redundant conditional edit by a numeric, nonconditional edit. +# Conditional edits are stated as: C1 or C2 or.... +# if C1 is always true, then the conditional edit C1 or C2 or....is redundant and can be replaced by the unconditional edit C1. +ReplaceConditionalbyUnconditional <- function (E){ + iscat <- isCatVar(E) + NumEdit <- isNumEdit(E) + LogSimplifiedEdits <- "" #initialise + LogNewUnconditional <- "" #initialise + isEditSimplified <- rep(FALSE,nrow(E)) #initialise + ESimplified <-E #initialisatie + if (nrow(E)>0 ){ + for (i in 1:nrow(E)){ + if (NumEdit[i]== FALSE) {isEditSimplified[i]<-isPartofConditionalEditAlwaysTrue(E, i)} #for every statement in a categorical edit it is checked whether it is always true + } + CatVarNewUnconditional <- rownames(E[isEditSimplified,,drop=F]) #names of the categorical edits/variables belonging to conditions that are always true. + if (length(CatVarNewUnconditional ) > 0) { + isNumEditRedundant<- NumEdit & rowSums( contains(E[,,drop=F], var=CatVarNewUnconditional))>0 # indicator for redundant conditional edits; conditional edits that are always true. + ESimplified <- E[!isNumEditRedundant,,drop=FALSE] + ESimplified <- substValue(ESimplified,CatVarNewUnconditional ,rep(0,length(CatVarNewUnconditional )),reduce=TRUE, removeredundant=TRUE) # the imputation transforms a conditional edit into an unconditional edit + rownames(ESimplified)[rownames(ESimplified)%in% CatVarNewUnconditional]<-paste ("num", gsub(".", "", rownames(ESimplified)[rownames(ESimplified)%in% CatVarNewUnconditional], fixed=TRUE ), sep= "") # it is shown in the editname that the edit is numerical. + RedundantCatVar <- names(which(colSums(ESimplified [isNumEdit(ESimplified),isCatVar(ESimplified)])==0)) #categorical variables that do not appear (anymore) in numerical edits can be removed. + ESimplified <- ESimplified[!(rownames(ESimplified) %in% RedundantCatVar),, drop=F] # removing redundant categorical edits + LogSimplifiedEdits <- EditMatrixToEditSet (E[isVarinEdit(E,CatVarNewUnconditional),,drop=F]) # Create logfile containing all edits that include a categorical variable belonging to a statement that is always true + LogNewUnconditional <- LogCatVarEdit(E,CatVarNewUnconditional ) # create log of the parts of conditional edits that are always satisfied. + } + } + return(list("E"= ESimplified ,"ReplacedConditional"=LogSimplifiedEdits, "NewUnconditional"= LogNewUnconditional )) +} + +# Main function for step 1b +SimplifyCondEdits<- function(E){ + Simplify<-SimplifyConditionalEdits(E) + Replace<-ReplaceConditionalbyUnconditional(E=Simplify$E) + return(list("E"=Replace$E,"Simplified"=Simplify$Simplifiededits, "RedundantParts_alwaysFALSE"=Simplify$SimplifiededitsRedundant,"NewUnconditional_step1"=Simplify$NewUnconditional,"RedundantConditional_Step1"=Simplify$RedundantConditional, "NewUnconditional_step2" = Replace$NewUnconditional,"RedundantConditional_step2"=Replace$ReplacedConditional )) +} + +#------functions for step 2----- remove redundant edits. + +# isUnconditionalEditRedundant is an indicator for redundancy of an edit i in editmatrix E, where edit i is a numeric, unconditional edit +# An edit is redundant if the problem that is obtained by replacing an edit by its negate edit leads to a contradictory set of edits. +# Equality constraints are repaced by two edits: a "<=" edit and a ">=" edit. An equality edit is infeasible if the corresponding "<=" and ">=" edits are infeasible. +isUnconditionalEditRedundant <- function (E, i){ + OpsIn <- getOps(E) + OpsOut1 <- OpsIn + if (OpsOut1[i] %in% c("=", "==")) OpsOut1[i]<-"<=" + E1 <- as.editmatrix(A=getA(E),b=getb(E), ops=OpsOut1) + En <- NegateSingleEdit(E1,i) + p <- FillMip (En) + redundant <- !isFeasible(p) + if (OpsIn[i] == "=" & redundant==TRUE) { + OpsOut2 <- OpsIn + if (OpsOut1[i] %in% c("=", "==")) OpsOut1[i]<-">=" + E2 <- as.editmatrix(A=getA(E),b=getb(E), ops=OpsOut2) + En <- NegateSingleEdit(E2,i) + p <- FillMip(En) + redundant <- !isFeasible(p) + } + return(redundant) +} + +# isConditionalEditRedundant is an indicator for redundancy of an edit i in editmatrix E, where edit i is a numeric, conditional edit +isConditionalEditRedundant <- function (E, i ){ + E2 <- NegateConditionalEdit(E,i) + p <- FillMip(E2) + redundant <- !isFeasible(p) + return(redundant) +} + +# isEditRedundant is an indicator for redundancy of a numerical edit i in editmatrix E, where i is a numerical edit +isEditRedundant<-function(E, i) { + redundant<-FALSE #initialise + E <- AdaptToMip(E) + NumEditwithCatVars <- isNumEdit(E) & ContainsCatVar(E) + if (isNumEdit(E)[i]==TRUE) { + if (NumEditwithCatVars[i]==FALSE) { + redundant<-isUnconditionalEditRedundant(E, i ) + } else { + redundant<- isConditionalEditRedundant(E, i ) + } + } + return(redundant) +} + +# isRedundant is an indicator for redundancy for all edits in editmatrix E +isRedundant<-function(E) { + redundant<-rep(FALSE,nrow(E)) #initialiseer op FALSE + for (i in 1:nrow(E)){ redundant[i] <- isEditRedundant(E,i)} + return(redundant) +} + +# RemoveRedundantEdits deletes all redundant edits from an editmatrix E +RemoveRedundantEdits<-function(E){ + Estart <- E + isRedundant<-rep(FALSE,nrow(E)) #initialise + LogRedundant <- "" #initialise + NumberRedundant<-0 #initialise + if (nrow(E)>0) { + nedits <- nrow(E) + for (i in nedits:1){ + if( isEditRedundant(E,i)){ + NumberRedundant <-NumberRedundant +1 + E <- DeleteEdit(E,i) + isRedundant[i]<-TRUE + RedundantCatVar <- names(which(colSums(E [isNumEdit(E),isCatVar(E)])==0)) #categorical variables that do not appear in numerical edits (anymore) can be left out. + E <- E[!(rownames(E) %in% RedundantCatVar),, drop=F] + } + } + } + if (NumberRedundant >0) { + CatVarinRedundantEdits <- names(colSums(Estart[isRedundant,isCatVar(Estart)])) + isRedundantext <- isRedundant | (rownames(Estart) %in% CatVarinRedundantEdits) # isRedundant is extended with all categorical edits that belong to the same conditional edits as the edits in inRedundant + LogRedundant <- EditMatrixToEditSet(Estart[isRedundantext,,drop=FALSE]) + } + return(list("E"=E, "redundant"=LogRedundant)) +} + + +#------------------------------------------------------------------------------------------------------------------- + +# function for step 3: find a subset of edits whose removal restores feasibility + +#--------------------------------------------------------------------------------- + +#ReplaceEqualitiesbyTwoInequlities replaces all equality constraints by two "<=" inequality constraints. For example x = 6 is replaced by x <=6 and -x<=-6. The last edit is equivalent to x>= 6. Multiplication by -1 transforms the edit in normal form THe columns in the constraintmatrix with includevariable=False are NOT multiplied bij -1. +# the latter feature is necessary for soft constraints. The binary variable that is included to make a constraint a soft constraints does not has to be multiplied by -1 (e.g. the soft version of x=6 is given by x <= 6 + Mp1 and x>= -6 + Mp1. In both inequalities p1 has the same coefficient ) +ReplaceEqualitiesbyTwoInequalities<-function(E, IncludeVariable =rep(TRUE,ncol(E)-1)){ + E <- AdaptToMip(E) + A <- getA(E) + ops <- getOps(E) + b <- getb(E) + neq <- sum(ops=="=") + if (neq >0){ + A <- rbind(A, A[ops=="=", ,drop=F]) + A[((length(b)+1):nrow(A)),IncludeVariable]<--1* A[((length(b)+1):nrow(A)),IncludeVariable] + b <- c(b,-1*b[ops=="="]) + ops <-c(ops,ops[ops=="="]) + ops[ops=="="] <- "<=" + } + E <- as.editmatrix(A=A,b=b,ops=ops) + return(E) +} + +# TransformHardConstraintsintoSoftConstraints creates an editmatrix E in which all hard constraints are transformed into soft constraints. New binary variables are introduced that show whether or not an edit is violated +# however constraints with forcehard=1 will not be transformed into a soft constraint. +TransformHardConstraintsintoSoftConstraints <- function(E, ForceHard=rep(FALSE, sum(isNumEdit(E))),MSoftConstr=as.mip(E)$M,MCondConstr=as.mip(E)$M) { + iscat <- isCatVar(E) + nvar <- length(iscat) + isnumEdit<-isNumEdit(E) + NnumEdit <- sum(isnumEdit) + A <- getA(E) + colnamesA <-c(colnames(A),paste(".l",seq(from=(nvar+1), to=(nvar+NnumEdit)), sep="")) # new colnames for new categorical variables. + A <- cbind(A, matrix(0,nrow=nrow(A),ncol=NnumEdit)) #extending the coefficientsmatrix..nw columns for new binary variables with value =1 for violated constraints + colnames(A)<- colnamesA + coef <- -MSoftConstr + A[isnumEdit,(nvar+1):(ncol(A))]<-diag(coef,NnumEdit) + E <- as.editmatrix(A=A,ops=getOps(E), b=getb(E)) + if (sum(ForceHard)>0) { #enforcing that certain constraints cannot be violated + subvar <- getVars(E)[(nvar+1):(ncol(E)-1)][ForceHard==TRUE] + E<- substValue(E,var=subvar, value= rep(0,length(subvar)),reduce=FALSE,removeredundant=FALSE)} + return(E) +} + +#FindInconsistentEdits creates an editmatrix with a minimum number of edits that need to be deleted from editset E in order to obtain a consistent editset. +# Edits for which forcenotinconsistent=1 cannot be included in this editmatrix. +FindInconsistentEdits<-function(E, ForceNotInconsistent=rep(FALSE, sum(isNumEdit(E))),MSoftConstr=10000,MCondConstr=10000) { + EInitial <-E + nedit <- nrow(E) + nvar <-ncol(E)-1 + IsInconsistentEdit <- rep(NA, nrow(E)) + E<- AdaptToMip(E) + E<- TransformHardConstraintsintoSoftConstraints (E, ForceHard=ForceNotInconsistent, MSoftConstr=MSoftConstr, MCondConstr=MCondConstr) + nvarnew<- ncol(E)-1 + isNewCatVar <- c(rep(FALSE,nvar),rep(TRUE,(nvarnew-nvar))) + E<- ReplaceEqualitiesbyTwoInequalities(E, IncludeVariable=!isNewCatVar) + ForceNotInconsistent <- c(ForceNotInconsistent,ForceNotInconsistent[getOps(EInitial)%in% c("=","==")]) # each equality constraint will be duplicated + ObjFuncCoef <- rep(1,(nvarnew)) + ObjFuncCoef[1:nvar] <-0 # the last variables in E appear in the obtjective function. These variables correspond to the binary variables that indicate whether an edit is violated + p <- FillMip(E,ObjFuncCoef) + q<-solve(p) + if (q==0) { + q <- get.variables(p)[(nvar+1):(ncol(E)-1)] # THE RANge (nvar+1):(ncol(E)-1) is for the added categorical variables - índicators for violence of edits + if (sum(q)>0) { + InconsistentNumEdit <- rownames(EInitial[isNumEdit(EInitial),,drop=F][q==1]) + IsInconsistentNumEdit <- rownames(EInitial) %in% InconsistentNumEdit + CatVarinInconsistentEdits <-names(which(colSums(contains(EInitial[InconsistentNumEdit,,drop=F]))>0 & isCatVar(EInitial)==TRUE) ) # if a numerical edit contains categorical variables, the edits corresponding to these categorical variables are also selected + IsInconsistentEdit <- IsInconsistentNumEdit | (rownames(EInitial) %in% CatVarinInconsistentEdits) + } + } + return(IsInconsistentEdit) +} + +#-------------------------------------------------------------------------------------------- +# functions for step 4; determining an IIS - irreducible infeasible set +# the main algorithms consists of two substeps, implemented in FindIISstep1 and FindIISstep2. SubStep 1 may be omitted. +# SubStep 1 is a preselection step. A number of edits are selected that contain at least one IIS. +# SubStep 2 determines all edits that belong to one IIS . + +# Substep 1. selection of edits will be determined by applying the 'elastic filter' . +# it may happen that step 3 above was not succesfull, because of numerical problems. +# in that case all edits will be selected in this step. Actually, this means that only step 2 is conducted. +# +FindIISStep1<-function(E,MSoftConstr=10000,MCondConstr=10000){ + isEditSelected <- rep(TRUE, nrow(E)) + isInconsistentEdit <- FindInconsistentEdits(E,MSoftConstr=MSoftConstr,MCondConstr=MCondConstr) #find a minimum of edits that need to be removed in order to repair the contradiction. + if (is.na( isInconsistentEdit[1])) {print("Numerical problems! An IIS will be found. But it may contain more edits than necessary")} + if (!is.na( isInconsistentEdit[1])){ + if (sum(isInconsistentEdit)>0 ) { + isEditSelected <- rep(FALSE, nrow(E)) + ForceNotInconsistent <- isInconsistentEdit[isNumEdit(E)] + isInconsistentEdit_i <-isInconsistentEdit + while (is.na(isInconsistentEdit_i[1])!=TRUE) { + isInconsistentEdit_i <- FindInconsistentEdits(E, ForceNotInconsistent=ForceNotInconsistent,MSoftConstr=MSoftConstr,MCondConstr=MCondConstr )[isNumEdit(E)==TRUE] + if (is.na(isInconsistentEdit_i[1] )!=TRUE) { ForceNotInconsistent <- ForceNotInconsistent | isInconsistentEdit_i==TRUE } + } + isEditSelected[isNumEdit(E)] <-ForceNotInconsistent + CatVarinIISEdits <-names(which(colSums(contains(E[isEditSelected,,drop=F]))>0 & isCatVar(E)==TRUE) ) # if a numerical edit contains categorical variables, the edits corresponding to these categorical variables are also selected + isEditSelected <- isEditSelected | (rownames(E) %in% CatVarinIISEdits) + } + } + return(isEditSelected) +} + +# this second step is an implementation of the ' deletion filter' All edits not belonging to an IIS are deleted. +FindIISStep2<-function(E,MSoftConstr=10000,MCondConstr=10000){ + if (isEditsFeasible(E)==TRUE) {isEditInIIS<- rep(FALSE, nrow(E))} + if (isEditsFeasible(E)==FALSE) { + isEditInIIS<- rep(TRUE, nrow(E)) + for (i in 1: nrow(E)){ #for each edit in insetInconsistent it is checked whether it can be removed. + if (isNumEdit(E)[i]==TRUE) { + rowindex <- isEditInIIS + rowindex[i] <- F + if (isEditsFeasible(E[rowindex,,drop=F])==F) {isEditInIIS[i]<-FALSE} # if the removal of an edit still leaves behind an infeasible set, then that edit will be definitively removed + } + } + isEditInIIS[!isNumEdit(E)] <-FALSE + CatVarinIISEdits <-names(which(colSums(contains(E[isEditInIIS,,drop=F]))>0 & isCatVar(E)==TRUE) ) # if a numerical edit contains categorical variables, the edits corresponding to these categorical variables are also selected + isEditInIIS <- isEditInIIS | (rownames(E) %in% CatVarinIISEdits) + } + return (isEditInIIS) +} + +# if Step1=False, only step 2 will be perfomed. +FindIIS <-function(E,MSoftConstr=10000,MCondConstr=10000, Step1=TRUE){ + if (isEditsFeasible(E) == TRUE ) { isinIIS <- rep(FALSE,nrow(E))} # function is meant to be applied to infeasible edit sets. + if (isEditsFeasible(E) == FALSE ) { + isinIISSelection <- rep(TRUE,nrow(E)) + if (Step1==T) {isinIISSelection<-FindIISStep1(E,MSoftConstr=MSoftConstr,MCondConstr=MCondConstr)} + isinIIS <- isinIISSelection + isinIIS[isinIIS==TRUE]<-FindIISStep2(E[isinIISSelection,,drop=F],MSoftConstr=MSoftConstr,MCondConstr=MCondConstr) + } + return(isinIIS) +} + + + + + + +#Main FUNCTION--------------------------------------------------------------------------------------------------- +CleanEdits <- function(E, MSoftConstr=10000 ,MCondConstr=10000){ + E<-PrepareEdits(E, MCondConstr=MCondConstr) + if (isEditsFeasible(E)) { + SubstFixed <- SubstituteFixedValues(E) + SimpleConditional <- SimplifyCondEdits(SubstFixed$E) + RemoveRedundant <- RemoveRedundantEdits(SimpleConditional$E) + E <- EditMatrixToEditSet (RemoveRedundant$E) + ListResult <- list("CleanedEdits" =E,"Fixedvariables"=SubstFixed$variables, "Fixedvalues"=SubstFixed$values, "Edits_Simplified_BecauseOfFixedVars"=SubstFixed$OldEditsWithFixedVars,"NewEdits_after_imputation_fixedvars"=SubstFixed$NewEditsWithFixed, "RedundantParts_alwaysFALSE_partofCompoundstatement"=SimpleConditional$RedundantParts_alwaysFALSE, "Edits_Simplified_BecauseOf_Components_always_False"=SimpleConditional$Simplified, "NewUnconditionalEdits_afterremovalcomponentsalwaysFalse"=SimpleConditional$NewUnconditional_step1,"RedundantConditionalEdits_becauseOfAlwaysFalseComponents"=SimpleConditional$RedundantConditional_Step1, "NewUnconditionalEdits_ObtainedfromCompoundstatementsAlwaysTRUE" =SimpleConditional$NewUnconditional_step2,"RedundantConditionalEdits_becauseofcomponentsalwaysTRUE"=SimpleConditional$RedundantConditional_step2 ,"RemovedEdits_BecauseofRedundancy"=RemoveRedundant$redundant) + } else { + ContradictingEdits <- EditMatrixToEditSet (E[FindInconsistentEdits(E,MSoftConstr=MSoftConstr,MCondConstr=MCondConstr),,drop=F]) + IIS <- EditMatrixToEditSet (E[FindIIS(E,MSoftConstr=MSoftConstr,MCondConstr=MCondConstr),,drop=F]) + ListResult <- list("ContradictingEdit"=ContradictingEdits, "IIS"=IIS ) + } + return(ListResult )} + + + + +#----------------------------------------------------------VOORBEELDEN van een aanroep van CleanEdits----------------------------- + +E<-editfile("hardtegkort.txt") +E<-editfile("testjeIn3.txt") +CleanEdits(E,MSoftConstr=10000,MCondConstr=10000) + + +#filename<-paste(path,"Set2Cleaned.txt") +#write.csv2(HCE,file=filename, row.names=FALSE, sep=" ", quote=FALSE) + + diff --git a/pkg/R/removeRedundantJDAS.R b/pkg/R/removeRedundantJDAS.R new file mode 100644 index 0000000..53fd726 --- /dev/null +++ b/pkg/R/removeRedundantJDAS.R @@ -0,0 +1,764 @@ +#----------------- Korte Uitleg ------------------------------------------------# +# Onderstaande code vereenvoudigt onnodig ingewikkelde edits en verwijdert overbodige edits. +# +# Het proces bestaat uit vier stappen +# +# Stap 1: Controleer of de edits feasible zijn, d.w.z. controleer of er minimaal 1 oplossing is die aan alle edits voldoet. +# De stappen 2, 3 en 4 worden alleen uitgevoerd als de edits feasible zijn. +# Stap 2: Het opsporen van de 'fixed values', variabelen die slechts één mogelijke waarde mogen aannemen. +# bijv: x3 die alleen de waarde 0 mag aannemen. +# die variabelen worden in eerste instantie geimputeerd en zodoende verwijderd. +# aan het eind wordt er een edit toegevoegd die de fixed value weergeeft (bijv. x3=0) +# Stap 3: Aanpassen van conditionele edits +# 3.A. Vereenvoudigen van samengestelde conditionele edits +# Samengestelde conditionele edits zijn van de vorm: +# "IF A1 en A2 en ... THEN B1 of B2 of ...." +# Componenten ( A1, A2, B1, B2, etc) die op grond van de andere edits niet voor kunnen komen worden verwijderd +# bijv: edit1 x>0; edit 2 "if y > 0 then x <0 of z<0"; impliceren dat edit 2 vereenvoudigd kan worden tot if y >0 then z <0 +# 3.B Vervangen van conditionele edits door onconditionele edit (waar mogelijk) +# bijv.de edits ( if x< 0 then y>=0) en (if x>=0 then y>=0) impliceren de onconditionele edit y>=0. +# de onconditionele edit wordt toegevoegd. Vervolgens worden de overbodige conditionele edits verwijderd. +# Stap 4. Verwijderen van alle overige overbodige edits, bijv: de edit x < 5 is overbodig als er ook een edit x<6 bestaat. +# +# De belangrijkste functie is CleanEdits. Die functie roept alle andere functies aan. +# Hieronder worden de functies per type weergegeven. De functies CLeanedits staat achteraan. + +#----Inititalisatie: werkdirectory en libraries benoemen...aanpassen indien nodig + +setwd("G:/onderhanden_werk/gaafmaakonderzoek2013") +path <- paste(getwd(),"/", sep="") +.libPaths("//dmkv1f/dmk1/kennR/R/R2.15") +library(editrules) +library(deducorrect) +library(lpSolveAPI) + + +#----Algemene functies, eenvoudige hulpfuncties----------------------------------------------------------------------------------------# + +# isCategoricalVariable geeft aan of een variabele geheeltallig is. +isCategoricalVariable <- function(E){ + categoricalvar <- grepl(".",getVars(E), fixed=TRUE) #Variabelen met een punt in de naam zijn geheeltallig. + return(categoricalvar)} + +# isNumericalEdits geeft aan welke edits numeriek zijn +isNumericalEdit <- function (E) { + numericals <- grepl("num",rownames(E), fixed=TRUE) + return(numericals)} + +# isMixedEdit geeft aan of een edit een conditionele (dwz IF-THEN) edit is. +isMixedEdit <- function (E){ + numericals <- grepl("num",rownames(E), fixed=TRUE) # numerieke edit + mixed <- rep(FALSE,nrow(E)) + if (sum(isCategoricalVariable(E)) > 0 ) { + containscategorical <-rowSums(contains(E,var=getVars(E)[isCategoricalVariable(E),drop=F]))>0 + mixed <- numericals & containscategorical + } +return(mixed)} + +# isCatEdit geeft weer of een edit categoriaal is +isCatEdit <- function(E){ + catEdits <- grepl("num",rownames(E), fixed=TRUE) ==FALSE + return(catEdits) +} + +#DeleteEdit verwijdert een edit uit de editset E +DeleteEdit <- function (E, i){ + return(E[c(1:nrow(E))!=i]) +} + +#--------------Algemene functies, aanroep van de solver + +# AdaptToMip verandert de operatoren van edits, met als doel de edits leesbaar te maken voor de solver: +# == wordt = +# '< rhs' wordt '<= rhs - epsilon' +# De lp solver kan alleen '<=', '>=' en '=' operatoren aan, dus geen < en == +AdaptToMip <- function(E, epsilon=as.mip(E)$epsilon) { + ops <- getOps(E) # operator van de edits in de invoer "==" "'<" OF "<=" (normaalvorm) + rhs <- getb(E) # right hand side van de editmatrix (de b in het stelsel Ax <= b) + rhs[ops== "<"] <- rhs[ops== "<"] - epsilon + ops[ops== "<"] <- "<=" # "kleiner dan wordt vervangen door kleiner of gelijk dan" + ops[ops== "=="] <- "=" # dubbel gelijkteken wordt vervangen door enkel gelijkteken + return(list("A"=getA(E),"ops"=ops,"rhs"=rhs)) +} + + +# FillMip creates a new lpSolve linear program model object en vult deze. NB: De doelfunctie wordt leeggelaten. +FillMip <- function(A, ops, b, iscat, epsilon=as.mip(E)$epsilon) { + nvar <- ncol(A) # aantal variabelen + ncon <- nrow(A) # aantal constraints + p <- make.lp(ncon, nvar) # lp-probleem wordt aangemaakt met nvar variabelen en ncon constraints + for (j in 1: nvar) {set.column(p, j, A[,j])} # kolomsgewijs inlezen van de coefficienten matrix van de editmatrix + set.constr.type(p,ops) + set.rhs(p,b) + set.bounds(p,lower= rep(-Inf,nvar)) # lower bound van iedere variabele is min oneinding + set.bounds(p,upper= rep(Inf,nvar)) # upper bound van iedere variabele is plus oneinding + set.type(p,which(iscat),"binary") # benoemen van de geheeltallige variabelen; iscat wordt gebruikt als indicator. + return(p) +} + + + +isFeasible <- function (p) { + lp.control(p, break.at.first = TRUE, epsint= 1.0e-15, epspivot=1.0e-15) #Zodra er een toegelaten oplossing is gevonden kan de zoektocht naar een optimale oplossing worden afgebroken; we hoeven namelijk alleen te weten of het probleem 'feasible' is + result <-solve(p) # start optimalisatie + feas <- (result !=2) # result = 2 geeft aan dat probleem infeasible is; iedere andere code betekent 'feasible'. + return(feas) +} + + + +#-------------------Algemene functie voor het veranderen van het teken van de edits (bepalen van de negatie)-------- + +# ChangeOperatorToLargerThan verandert het teken van edit i: de edit wordt van het type "groter dan". +# De functie is bedoeld om toe te passen op "<=" edits. De functie bepaalt dus de negatie van de edit. +# Aangezien de solver geen ">" edits aankan, wordt de operator ">=" en wordt er epsilon opgeteld bij de right hand side van de edit. +ChangeOperatorToLargerThan <- function (A, ops, b, i, iscat, epsilon =as.mip(E)$epsilon) { + ops[i] <- ">=" + b[i] <- b[i] + epsilon + return(list("A"=A,"ops"=ops,"b"=b)) +} + +# ChangeOperatorToSmallerThan verandert het teken van edit i: de edit wordt van het type "kleiner dan" +# De functie is bedoeld om toe te passen op ">=" edits. De functie bepaalt dus de negatie van de edit. +# Aangezien de solver geen "<" edits aankan, wordt de operator "<=" en wordt er epsilon afgetrokken van de right hand side van de edit. +ChangeOperatorToSmallerThan <- function (A, ops, b, i, epsilon=as.mip(E)$epsilon) { + ops[i] <- "<=" + b[i] <- b[i] - epsilon + return(list("A"=A,"ops"=ops,"b"=b)) +} + +# In de MIp-formulering worden IF-THEN edits omgezet in een aantal categoriale edits. +# De geheeltallige variabelen worden gebruikt om aan te geven dat aan één van een aantal lineaire restricties moet worden voldaan. +# In ChangeOperatorsMixedEdits wordt de negatie van de conditionele edits bepaald. +# Die negatie luidt dat aan alle lineaire restricties behorende bij een IF_THEN restrictie niet wordt voldaan. +# Van alle lineaire restricties die bij één IF-THEN statement horen wordt het teken veranderd: een <= restrictie wordt veranderd in een > restrictie. +# Aangezien de solver geen ">" edits aankan, wordt de operator ">=" en wordt er epsilon opgeteld bij de right hand side van de edit. +# Bovendien worden de geheeltallige variabelen verwijderd uit de betreffende lineaire restricties. +# De geheeltallige variabelen zijn hier niet nodig omdat aan ieder van de omgekeerde lineaire edits moet worden voldaan. +ChangeOperatorsMixedEdits <- function (A, ops, b, i, iscat, epsilon =as.mip(E)$epsilon) { + catvarsInEdit <- which(A[i,]*iscat!=0) # geeft aan welke categoriale variabelen voorkomen in edit i + catnames <- names(catvarsInEdit) # de namen van deze variabelen + catedits <- rownames(A) %in% catnames #In de MIP-formulering wordt er voor iedere categoriale variabele een linaire edit gedefinieerd. De naam van die lineaire edit is de naam van de categoriale variabele. Catedits verwijst naar de hulpedits die horen bij edit i . + A[, A[i,]*iscat!=0]<-0 # de categoriale variabelen die in edit i voorkomen worden verwijderd in alle edits. HUn coefficienten worden nul in alle edits + ops[catedits] <- ">=" # teken wordt 'groter dan' + b[catedits] <- b[catedits] + epsilon + return(list("A"=A,"ops"=ops,"b"=b)) +} + +#---------------Algemene functies, opschonen van edits +# Na aanpassing van één of meerdere edits, kunnen er mogelijk nog meer edits worden aangepast. +# Hieronder staan enkele functies die aangepaste edits verder vereenvoudigen. +# De hoofdfunctie is SimplyEdits. + +# Imputeer nul voor alle categoriale variabelen die in mixed edits voorkomen met een rhs van nul. +# bijv. l1 + l2 <= 0. dan l1 =0 en l2=0. +ImputeFixedCatVarsZero <- function(E){ + A <-getA(E) + b <-getb(E) + iscat <- isCategoricalVariable(E) + numericalEdits <- isNumericalEdit(E) + ANumEditsCatvars <- A[numericalEdits, iscat, drop=FALSE] + bNumEditsCatvars <- b[numericalEdits, drop=FALSE] + Redundant <- (rowSums(ANumEditsCatvars)>0) & (bNumEditsCatvars==0) + if (sum(Redundant)>0) { + RedundantEdits <- which(Redundant) + varsinRedundantEdits <- contains(E[RedundantEdits,,drop=F]) + RedundantVar <- getVars(E)[colSums(varsinRedundantEdits)>0, drop=F] + if (length( RedundantVar)>0){E <- substValue(E,RedundantVar,rep(0, length(RedundantVar)),reduce=TRUE, removeredundant=TRUE) } + } + return(E) +} + +# Categorical variables that necesarily have to be equal to one are imputed +# if-then edits resulteren in mixed edits met de eigenschap dat de rhs kleiner is dan het aantal categoriale variabelen +# Categoriale variabelen die niet (meer) in zulke edits voorkomen kunnen worden geelimineerd +# stel bijv dat .l1 en .l2 uitsluitend in de volgende mix edits voorkomen: .l1 + .l2 <= 2 +# in dat geval mag voor .l1 en .l2 de waarde 1 worden gesubstitueerd. +# als daarnaast de edit .l1 + .l3 <= 1 zou voorkomen dan mag alleen .l2 worden gesubstitueerd. .l1 komt namelijk voor in een conditionele edit +ImputeFixedCatVarsOne <- function(E){ + iscat <- isCategoricalVariable(E) + A <-getA(E) + b <-getb(E) + numericalEdits <- isNumericalEdit(E) + ANumEditsCatvars <- A[numericalEdits, iscat, drop=FALSE] + bNumEditsCatvars <- b[numericalEdits, drop=FALSE] + Enum <- E[numericalEdits,,drop=F] + RedundantMixEdits <- (rowSums(ANumEditsCatvars)== b[numericalEdits]) & (rowSums(ANumEditsCatvars)>0) + if (sum(RedundantMixEdits)>0) { + NonRedundantVars <- colSums(contains(Enum[!RedundantMixEdits ,,drop=F], var= getVars(E)[iscat]))>0 + RedundantVar <- !NonRedundantVars + if (length(RedundantVar)>0){E <- substValue(E,names(which(RedundantVar)),rep(1, sum(RedundantVar)),reduce=TRUE, removeredundant=TRUE) } + } + return(E) +} + +# Categoriale edits met categoriale variabelen die niet (meer) in een numerieke edit voorkomen kunnen worden weggelaten. +DeleteRedundantCatEdits <- function(E){ + iscat <- isCategoricalVariable(E) + catvarnames <- getVars(E)[iscat,drop=F] + isnumericalEdits <- isNumericalEdit(E) + NumericalEdit <-E[isnumericalEdits,,drop=F] + if (sum(iscat)>0) { + isRedundantCatvar <-colSums(contains(NumericalEdit,var=catvarnames ))==0 + RedundantCatvar <- catvarnames[isRedundantCatvar, drop=F] + RedundantCatEdit <- RedundantCatvar + if (length(RedundantCatEdit) >0) { + E<-E[rownames(E) %in% RedundantCatEdit==F,,drop=F] + } + } + return(E) +} + +#categoriale edits, die na een imputatie, geen categoriale variabelen meer omvatten, zijn in feite numerical edits geworden. +# de naamgeving wordt hierop aangepast. +ChangeEditNameintoNumerical <- function(E){ + iscat <- isCategoricalVariable(E) + iscatEdits <- isCatEdit(E) + if (sum(iscatEdits)>0) { + isHiddenNumerical <- iscatEdits[iscatEdits,drop=F] + if (sum(iscat)>0 ) { + isHiddenNumerical <-rowSums(contains(E[iscatEdits,,drop=F], var=getVars(E)[iscat, drop=F]))==0 + } + #nieuwe naam wordt Num plus de naam van de categoriale variabele, maar dan zonder punt. bijv .l4 wordt numl4 + rownames(E)[iscatEdits][isHiddenNumerical]<-paste ("num", gsub(".", "", rownames(E[iscatEdits,,drop=F])[isHiddenNumerical,drop=F], fixed=TRUE ), sep= "") + } + return(E) +} + +#(nieuwe) numerieke edits met een rhs groter dan 0.5*M of kleiner dan -0,5*M worden overbodig verondersteld. +DeleteRedundantNumEdits <- function(E){ + iscat <- isCategoricalVariable(E) + isnumericalEdits <- isNumericalEdit(E) + isRedundant <- rep(FALSE, nrow(E)) + b <-as.matrix(getb(E)) + if (sum(isnumericalEdits)>0 ) { + isRedundant[isnumericalEdits] <- abs(b[isnumericalEdits])>= 0.5*as.mip(E)$M + E <-E [!isRedundant, drop=FALSE] + } + return(E) +} + +# edits die overbodig zijn geworden worden verwijderd. +SimplifyNewEdits <- function(E){ + E <- ImputeFixedCatVarsZero(E) + E <- ImputeFixedCatVarsOne(E) + E<-DeleteRedundantCatEdits(E) + E<-ChangeEditNameintoNumerical(E) + E<-DeleteRedundantNumEdits(E) + return(E) +} + +#--------------Algemene functies, transformaties van editset naar editmatrix en vice versa +# De in- en uitvoer is een editset. De bewerkingen voor het vereenvoudigen van de edits vinden plaats op een editmatrix. +# Het is dan ook nodig om de editset uit de invoerbestanden om te zetten in een editmatrix. +# Nadat de edits zijn bewerkt is het ook nodig om de editmatrix terug te vertalen naar een editset. +# Hieronder staan twee functies voor deze transformaties. + +# PrepareEdits creeert een editmatrix in de zogenaamde normaalvorm. +PrepareEdits<-function (E, epsilon=as.mip(E)$epsilon) { + E <- as.mip(E, epsilon )$E + E <- normalize(E) + return(E) +} + +# EditMatrixToEditSet zet een editmatrix om in een editset. + # Voor de mixed-edits zijn enige bewerkingen nodig. In een editset worden categoriale variabelen gebruikt. +# Een mixed statement heeft de vorm: C1 of C2 of C3.... +# Ze worden omgezet in de vorm: IF (niet C1) THEN C2 of C3 etc. +EditMatrixToEditSet <- function (E){ + Editlist <- "" + if (nrow(E)>0) { + # Basis voor de output zijn de niet-categoriale edits uit E. De categoriale edits komen niet in de editset. + # De niet-categoriale edits (Puur numeriek en mixed) worden eerst opgeslagen in een matrixformaat. + Em <- as.matrix (as.data.frame(as.editset(E))) #Em zijn de edits uit E in matrixformaat (Tekst) + Editlist <- Em[!grepl(".",Em[,1],fixed=TRUE),, drop=FALSE] #de niet categoriale edits uit Em...(basis voor de output). + # De mixed edits uit de editlist vereisen enkele bijzondere bewerkingen. Hieronder worden ze geidentificeerd + ismixed <- grepl(".l",Editlist[,2],fixed=TRUE) # de mixed edits uit editlist ...een 1 betekent mixed edit; een nul betekent puur numeriek + mixedname <- which( grepl(".l",Editlist[,2],fixed=TRUE) ) # de posities van de mixed edits in Editlist + #changes the names in Editlist...de naam begint met mix of num, gevolgd door een volgnummer. + Editlist[,1] <- paste( ifelse(ismixed,"mix", "num") , (row(Editlist)[,1]), sep="") + # De categoriale variabelen uit de editmatrix komen niet voor in de editset. Daarom worden zij geimputeerd met nul. + catVarsinE <- getVars(E)[grepl(".",getVars(E),fixed=TRUE),drop=F] #namen van de categoriale variabelen + if (length(catVarsinE > 0)) {E<-substValue(E,catVarsinE,rep(0,length(catVarsinE)),reduce=FALSE, removeredundant=FALSE)}#imputeer de waarde nul voor alle categoriale variablen in the editmatrix E...de categoriale variabelen komen namelijk niet voor in een editset + #aanmaken van een lijst met daarin op de rijen de mixed edits en in de kolommen de categoriale varhiabelen die in die mixed edit voorkomen. + VarListMixEdits <-Editlist[ismixed,2] # voorbeeld edit l1+L2 <= 1 + if (length (VarListMixEdits) >0){ + VarListMixEdits <- gsub("<", "+", VarListMixEdits) #vervang < door + bijvoorbeeld: l1 + l2 <=1 wordt l1 + l2 +=1 + CatVarsinMixEdits <- (strsplit((VarListMixEdits),"+" , fixed=TRUE )) # splitsen op +.. je krijgt dan l1,l2, =1 + # Omzetten van de mixed edits naar ' leesbare' IF-then edits. + for (i in 1 : length(CatVarsinMixEdits)) { + for (j in 1 :length(CatVarsinMixEdits[[i]])-1 ) { # in alle kolommen, muv de laatste, staan categoriale variabelen. + editname <- gsub(" ","",CatVarsinMixEdits[[i]][j]) # de namen van de categoriale variabelen corresponderen met categoriale edits in E + edit <- E[which(rownames(E)==editname),] + editdf <- as.data.frame(edit)$edit # de betreffende edit wordt gekopieerd uit E en omgezet in een dataframe + if (j==1) { + editdf <- gsub("<=", ">", editdf) #een mixed edit wordt omgezet van het formaat C1 of C2...of Cn naar IF not C1 THen C2 of...of Cn. + newedit <- paste(" if (", editdf, ")", sep=" ") # de eerste component (j==1) komt in het if-deel. Het <= teken vervangen door > + } + if (j>1){ # tweede, derder, vierde term komen in het "THEN" gedeelte + newedit <- paste (newedit, editdf , sep =" " ) # in het then deel wordt de editdf gewoon overgenomen + if (j < length(CatVarsinMixEdits[[i]])-1) { newedit <- paste(newedit, "|", sep=" ") } + } + } + Editlist[mixedname[i],2] <- newedit #plaats de aangepaste mixed edit terug in de editlist + } + } + } + return(Editlist) +} + + +#------functies voor Stap 1----detecteren of het stelsel edits stijdig is. + +isEditsFeasible<-function(E, epsilon=as.mip(E)$epsilon) { + iscat <- isCategoricalVariable(E) # iscat is 1 voor alle geheeltallige variabelen + A <- AdaptToMip(E)$A # De coefficientenmatrix, operators en rhs worden aangepast zodanig dat deze leesbaar worden voor de solver. + ops <- AdaptToMip(E)$ops + b <- AdaptToMip(E)$rhs + p <- FillMip(A, ops, b, iscat, epsilon) # lineair programmeringsprobleem wordt aangemaakt. + feasible <- isFeasible(p) # test op feasibility + return(feasible) +} + +#------functies voor Stap 2----aanpassen van de edits voor fixed values: variabelen die slechts één mogelijke waarde mogen aannemen + +# geeft de kleinst mogelijke waarde van variabele i, gegeven de edits die worden weergegen in een lp-solve object p +MinimumValue <-function(A, p ,i){ + minval <- -9999 + objective <- rep(0, ncol(A)) # objective is een vector met de coefficienten van de doelfunctie + objective[i] <- 1 # het i-de element is 1; variabele i wordt immers geoptimaliseerd + set.objfn(p,objective) + result <-solve(p) + if (result ==0) {minval <- get.objective(p)} # result=0 betekent dat een eindig minimum is gevonden + if (result > 0) {minval <- -9999} # indien geen eindig minimum is gevonden wordt de waarde -9999 gesubstitueerd. + return(minval) +} + +# geeft de grootst mogelijke waarde van variabele i, gegeven de edits die worden weergegen in een lp-solve object p +MaximumValue <-function(A, p ,i){ + maxval <- 9999 + objective <- rep(0, ncol(A)) # objective is een vector met de coefficienten van de doelfunctie + objective[i] <- 1 # het i-de element is 1; variabele i wordt immers geoptimaliseerd + lp.control(p, sense="max") # maximalisatie van de doelfunctie + set.objfn(p,objective) + result <-solve(p) + if (result ==0) {maxval <- get.objective(p)} # result is 0 betekent dat een eindig maximum is gevonden + if (result > 0) {maxval <- 9999} # indien geen eindig minimum is gevonden wordt de waarde +9999 gesubstitueerd. + return(maxval) +} + +# +# MinimizeEachVariable resulteert in een vector met daarin de minimum waarde per variabele, gegeven een editset +# de categoriale variabelen worden niet meegenomen. +MinimizeEachVariable <- function (A, ops, b, iscat, epsilon){ + smallest <- rep(-9999, ncol(A)) # initialisatie op -9999 + p <- FillMip(A, ops, b, iscat, epsilon) + for (i in 1: ncol(A)) { + if (!iscat[i]) { smallest[i] <- MinimumValue(A, p, i)} #bepalen van minimum per variable + } + return(smallest)} + +# MinimizeEachVariable resulteert in een vector met daarin de maximum waarde per variabele, gegeven een editset +# de categoriale variabelen worden niet meegenomen. +MaximizeEachVariable <- function (A, ops, b, iscat, epsilon){ + largest <- rep(9999, ncol(A)) # initialisatie op +9999 + p <- FillMip(A, ops, b, iscat, epsilon=0.001) + for (i in 1: ncol(A)) { + if (!iscat[i]) {largest[i] <- MaximumValue(A, p, i)} # bepalen van maximum per variabele + } + return(largest)} + +# De functie geeft de variablen weer die slechts één mogelijke waarde kunnen aannemen. +# de categoriale variabelen worden niet meegenomen. +FixedValues <- function (E, epsilon=as.mip(E)$epsilon){ + iscat <- isCategoricalVariable(E) # iscat is 1 voor alle geheeltallige variabelen + Adapt <- AdaptToMip(E) + A <- Adapt$A # De coefficientenmatrix, operators en rhs worden aangepast zodanig dat deze leesbaar worden voor de solver. + ops <- Adapt$ops + b <- Adapt$rhs + minima <- MinimizeEachVariable (A, ops, b, iscat, epsilon) # een vector met daarin de minimale waarde per variabele + maxima <- MaximizeEachVariable (A, ops, b, iscat, epsilon) # een vector met daarin de maximale waarde per variabele + return(list("variables"=getVars(E)[minima==maxima],"values"=minima[minima==maxima])) # output zijn de variablenamen met een fixed value. +} + +# de edits met fixed vars worden opgeslagen in een editset. +# dit zijn edits die veranderen na de imputatie van die variabelen. +LogEditswithFixedValues <-function(E, fixedvars, fixedvals){ + DoesEditContainFixedVars <- as.matrix(rowSums(contains(E,var=fixedvars, drop=FALSE)) > 0) # indicator die aangeeft of een numerieke of categoriale edit een fixed variable omvat + #DoesEditContainFixedVars omvat nooit de mixed edits. + # mixed edits waar fixedvars in voor komen worden hieronder toegevoegd + DoesCatEditContainFixedVars <- DoesEditContainFixedVars * isCatEdit(E) + CatEditswithFixedVars <- rownames(E) [ DoesCatEditContainFixedVars==1 , drop=F] + if (length( CatEditswithFixedVars )>0) { + isMixedEditwithFixedVars <- as.matrix( (rowSums(contains(E, var= CatEditswithFixedVars) > 0 ) * (isMixedEdit(E)==TRUE)))# indicator die aangeeft of een numerieke of categoriale edit een fixed variable omvat + DoesEditContainFixedVars <-DoesEditContainFixedVars+isMixedEditwithFixedVars + } + DoesEditContainFixedVars [isCatEdit(E)] <- TRUE + EditsWithFixedVars <- EditMatrixToEditSet(E[ DoesEditContainFixedVars==1,, drop=F]) # de edits met fixed variables + ChangedEditsWithFixedVars <- EditMatrixToEditSet(substValue(E[DoesEditContainFixedVars==1,, drop=F],fixedvars,fixedvals,reduce=TRUE, removeredundant=TRUE)) # de edits met fixed variables na aanpassing + return (list("EditsWithFixedVars"=EditsWithFixedVars,"ChangedEditsWithFixedVars"=ChangedEditsWithFixedVars)) +} + +# values that can attain only one value are substituted +DetermineFixedValues<-function(E,epsilon=as.mip(E)$epsilon){ + fixedvars <- "" # initialisatie + fixedvals <- "" # initialisatie + LogFixed <- "" #initialisatie + fixed <- FixedValues(E) + if (length (fixed$variables) > 0 ) { + fixedvars <-fixed$variables #numerieke variabelen die slechts één mogelijke waarde mogen aannemen + fixedvals <-fixed$values + LogFixed <- LogEditswithFixedValues(E,fixedvars) + } + return(list("E"=E,"variables"=fixedvars,"values"=fixedvals, "EditsWithFixedVars"=LogFixed)) +} + + + +# values that can attain only one value are substituted +SubstituteFixedValues<-function(E,epsilon=as.mip(E)$epsilon){ + fixedvars <- "" # initialisatie + fixedvals <- "" # initialisatie + LogFixedEditswithFixedVars <- "" #initialisatie + LogFixedChangedEditsWithFixedVars <- "" + fixed <- FixedValues(E) + if (length (fixed$variables) > 0 ) { + fixedvars <-fixed$variables #numerieke variabelen die slechts één mogelijke waarde mogen aannemen + fixedvals <-fixed$values + LogFixed <- LogEditswithFixedValues(E,fixedvars, fixedvals) + LogFixedEditswithFixedVars<-LogFixed$EditsWithFixedVars + LogFixedChangedEditsWithFixedVars<-LogFixed$ChangedEditsWithFixedVars + + E<-substValue(E,fixedvars,fixedvals,reduce=TRUE, removeredundant=TRUE) # de fixed values worden gesubstitueerd + E<-SimplifyNewEdits (E) # Zo mogelijk worden de resulterende edits verder vereenvoudigd. + } + return(list("E"=E,"variables"=fixedvars,"values"=fixedvals, "OldEditsWithFixedVars"=LogFixedEditswithFixedVars, "NewEditsWithFixedVars"= LogFixedChangedEditsWithFixedVars )) +} + + +# Uitgangssituatie: een edit set waarin de fixed values, variabelen die slechts één waarde kunnen aannemen, zijn gesubstitueerd. +# ze komen dus niet meer voor. +# Aan het einde van het gehele opschoonproces worden de fixed values weer toegevoegd in de vorm van een edit. +# Bijv. de edit Y=100. +# DE in- en uitvoer is een editset. + +AddFixedValuestoanEditSet <- function (Es, fixvars, fixvalues){ + Fs<-"" + #if (Es[1]!=Fs) { + if (Es[1] != "") {NumberofEdits <- nrow(Es)} + if (Es[1] == "") {NumberofEdits <- 0} + if (fixvars[1] != "") {NumberofFixed <- length(fixvars)} else { NumberofFixed <- 0} + Fs <- matrix (nrow= NumberofEdits +NumberofFixed, ncol= 2) + if (NumberofEdits > 0) {Fs[1: NumberofEdits ,]<- Es} + if (NumberofFixed > 0) { + for (i in 1 : NumberofFixed ) { + Fs[ NumberofEdits +i,1]<- paste ("num", i+ NumberofEdits , sep="") + Fs[ NumberofEdits +i,2]<- paste (fixvars[i], " ==", fixvalues[i] , sep="" ) + } + } + # } + return(Fs) +} + +#------functies voor Stap 3----vereenvoudigen van conditionele edits. + + +# Een edit "IF (A1 en A2 en A3 en ...) THEN (B1 of B2 of B3 of ...)" wordt intern geschreven als "niet A1" of "niet A2" of "niet A3" of...of B1 of B2 of B3. +# oftewel: C1 of C2 of ...of Cn +# De functie onderzoekt of een van deze componenten (zeg C1) altijd waar is. +# Daartoe wordt gecontroleerd of er een oplossing is voor het probleem, dat wordt verkregen door "niet C1" toe te voegen aan de edits. +# De combinatie van edits: +# C1 of C2 of....of Cn +# niet C1 +# wordt herschreven tot +# C2 of....of Cn +# niet C1 +# Indien dit tot een infeasible stelsel leidt dan is C1 altijd waar +isPartofComposedEditAlwaysTrue<-function (E, i, iscat, epsilon=as.mip(E)$epsilon){ + A <-getA(E) + CatHelpVariableInEdit <- names(which(A[i,]*iscat!=0)) # de naam van de geheeltallige variabele in edit i + F<- substValue(E, CatHelpVariableInEdit , 1, reduce = FALSE, removeredundant = FALSE) # subsitutie van de waarde 1 voor de geheeltallige variabele. Betekenis is dat er niet wordt voldaan aan edit i. In het geval i=1 wordt de bewering C1 of ...of Cn veranderd in C2 of...of Cn. + F[i,] <- E[i,] #originele edit i wordt overgenomen + F[, CatHelpVariableInEdit] <- rep(0,length(getb(F))) #de categoriale hulpvariabele in edit i worden verwijderd...edit i maakt geen deel meer uit van de samengestelde IfThen edit, maar staat op zichzelf + A <- AdaptToMip(F, epsilon)$A + ops <- AdaptToMip(F, epsilon)$ops + b <- AdaptToMip(F, epsilon)$rhs + Ai <- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$A #bepalen van de negatie van edit i + opsi<- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$ops + bi <- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$b + p <- FillMip(Ai, opsi, bi, iscat, epsilon=0.001) + redundant <- !isFeasible(p) + return(redundant)} + + +# De functie isPartofComposedEditAlwaysViolated onderzoekt of een edit i, een onderdeel van een conditionele edit, altijd wordt geschonden. +# Een edit "IF (A1 en A2 en A3 en ...) THEN (B1 of B2 of B3 of ...)" wordt intern geschreven als "niet A1" of "niet A2" of "niet A3" of...of B1 of B2 of B3. +# oftewel: C1 of C2 of ...of Cn +# De functie onderzoekt of een van deze componenten (zeg C1) altijd wordt geschonden +isPartofComposedEditAlwaysViolated<-function (E, i, iscat, epsilon=as.mip(E)$epsilon){ + A <-getA(E) + CatHelpVariableInEdit <- names(which(A[i,]*iscat!=0)) # de naam van de geheeltallige variabele in edit i + F<- substValue(E, CatHelpVariableInEdit , 0, reduce = FALSE, removeredundant = FALSE) #subsititutie van de waarde nul. Dit betekent dat aan edit i (een component van if-then functie) wordt voldaan + A <- AdaptToMip(F, epsilon)$A + ops <- AdaptToMip(F, epsilon)$ops + b <- AdaptToMip(F, epsilon)$rhs + p <- FillMip(A, ops, b, iscat, epsilon=0.001) + redundant <- !isFeasible(p) #als het mip probleem strijdig is dan betekent dat dat er niet aan edit i kan worden voldaan. Edit i is een onderdeel van een samengestelde if-then edit. Omdat edit i niet waar kan zijn kan het uit de samengestelde if-then edit worden verwijderd. + return(redundant)} + + +# In LogSimplifiedEdits wordt bijgehouden welke conditionele edits vereenvoudigd worden. Resultaat wordt weggeschreven in een editset +# RedundantCatVar omvat de categoriale variabelen behorende bij een overbodige onderdeel van een if-then edit +LogSimplifiedEdits <- function(E, RedundantCatVar){ + SimplifiedComposedEdits <- "" # initialisatie + # SimplifiedComposedEdits is true voor alle numerieke edits die vereenvoudigd worden. + # Daarnaast is de waarde true voor alle categoriale variabelen (dat komt namelijk verderop goed uit, bij het omzetten van de editmatrix naar een editset zijn ze (mogelijk) nodig, dat gebeurt in de functie Editmatrixtoeditset) + isEditSimplified <- rowSums(contains(E,RedundantCatVar,drop=FALSE))>0 + isEditSimplified[isCatEdit(E)]<-TRUE + if (sum(isEditSimplified[!isCatEdit(E)])>0) { + SimplifiedEdits <- E[isEditSimplified,, drop=F] + SimplifiedComposedEdits <- EditMatrixToEditSet(SimplifiedEdits) # omzetten naar editset ...bij het omzetten van conditionele edits zijn de definities van de categoriale edits nodig. Vandaar dat alle categoriale edits hier ook worden meegenomen + + } + return( SimplifiedComposedEdits) +} + +# LogRedundantPartsinMixedEdits houdt bij welke onderdelen van if-then edits overbodig zijn. +# RedundantCatVar omvat de categoriale variabelen behorende bij een overbodige onderdeel van een if-then edit +LogRedundantPartsinMixedEdits<-function(E, RedundantCatVar){ + RedundantParts <- E[rownames(E) %in% RedundantCatVar,,drop=F] #hier worden categoriale edits geselecteerd die een component beschrijven vaan een mixed edit waar altijd aan wordt voldaan. + RedundantParts <- substValue( RedundantParts ,RedundantCatVar ,rep(0,length( RedundantCatVar)),reduce=TRUE, removeredundant=TRUE) # door deze subsitutie worden de categoriale variablen uit de edit verwijderd. Die zijn hier niet nodig. + rownames(RedundantParts)<-gsub(".l", "num", rownames(RedundantParts) ) # door de bovenstaande imputatie zijn de categoriale edits veranderd in een numerieke edit. Naamgeving wordt hierop aangepast Is nodig voor de functie EditMatrixToEditSet, waarin een editmatrix wordt omgezet in een editset + RedundantParts <- EditMatrixToEditSet(RedundantParts) # omzetten naar editset + + return(RedundantParts) +} + + +# Samengestelde conditionele edits worden opgeschoond; de overbodige delen worden verwijderd. +# d.w.z. onderdelen die altijd geschonden zijn. +# resulteert in de nieuwe editmatrix "ENonRedundant" +# "SimplifiedComposedEdit" is een editset met daarin de vereenvoudigde conditionele edits +# "RedundantParts" is een editset met de overbodige onderdelen van de vereenvoudigde conditionel edit.. +SimplifyComposedMixedEdits <- function (E, epsilon=as.mip(E)$epsilon){ + isredundant<-rep(FALSE,nrow(E)) #initialiseer op FALSE + iscat <- isCategoricalVariable(E) # iscat is 1 voor alle geheeltallige variabelen + isEditSimplified <- rep(FALSE,nrow(E)) #initialisatie + SimplifiedMixEdits <- "" #initialisatie + RedundantPartsinSimplifiedMixedEdits <- "" #initialisatie + ENonRedundant <- E #initialisatie + if (nrow(E)>0) { + for (i in 1:nrow(E)){ + if (isNumericalEdit(E)[i]== FALSE) {isredundant[i]<-isPartofComposedEditAlwaysViolated(E, i, iscat, epsilon)} #Voor iedere component van een samengestelde IF-THEN edit(die kan worden weergegeven met C1 of...of Cn) wordt onderzocht of daaraan voldaan kan worden. Als nooit aan een bepaalde component kan worden voldaan kan die worden weggelaten. + } + RedundantCatEdit <- rownames(E)[isredundant,drop=F] #naam van een categoriale edit behorend bij en overbodig onderdeel van een if-then edit. + RedundantCatVar <- RedundantCatEdit #categoriale variabele behorende bij een overbodig onderdeel "if-then edit" + ENonRedundant <- E[isredundant==FALSE,,drop=F] # de niet overbodige edits + if (length(RedundantCatEdit) >0) { + ENonRedundant <- substValue(ENonRedundant, RedundantCatVar ,rep(1,length( RedundantCatVar )),reduce=TRUE, removeredundant=TRUE) # subsitutie van de waarde 1 voor een categoriale var. (1 betekent dat niet aan de component Ci voldaan wordt) + ENonRedundant<- SimplifyNewEdits(ENonRedundant) # verder opschonen van de editset + SimplifiedMixEdits <- LogSimplifiedEdits (E, RedundantCatVar) + + RedundantPartsinSimplifiedMixedEdits <- LogRedundantPartsinMixedEdits(E, RedundantCatVar) + } + } + return(list("E"=ENonRedundant,"Simplifiededits"= SimplifiedMixEdits,"SimplifiededitsRedundant" = RedundantPartsinSimplifiedMixedEdits) ) +} + +# Conditionele edits worden, indien mogelijk, vervangen door een onconditionele edit. +ReplaceConditionalbyUnconditional <- function (E, epsilon=as.mip(E)$epsilon){ + unconditional <-rep(FALSE,nrow(E)) #initialiseer op FALSE + iscat <- isCategoricalVariable(E) # iscat is 1 voor alle geheeltallige variabelen + LogSimplifiedMixEdits <- "" #initialisatie + LogNewUnconditional <- "" #initialisatie + isEditSimplified <- rep(FALSE,nrow(E)) #indicator die aangeeft of een edit is aangepast. Initialisatie + ESimplified <-E #initialisatie + if (nrow(E)>0 ){ + for (i in 1:nrow(E)){ + if (isNumericalEdit(E)[i]== FALSE) {unconditional[i]<-isPartofComposedEditAlwaysTrue(E, i, iscat, epsilon)} #Voor ieder onderdeel van een samengestelde IF-THEN edit wordt onderzocht of altijd aan dat onderdeel wordt voldaan. + } + CatVarUnconditional <- rownames(E[unconditional,,drop=F]) #categoriale variabelen behorende bij onderdelen van if-then edits waar altijd aan wordt voldaaan + if (length(CatVarUnconditional ) > 0) { + isMixedEditSimplified <- rowSums( contains(E[isMixedEdit(E),,drop=F], var=CatVarUnconditional))>0 # een indicator die aangeeft welke mixed edits overbodig zijn omdat zij een component omvatten waar altijd aan wordt voldaan + SimplefiedMixedEdits <- E[isMixedEdit(E),,drop=F][isMixedEditSimplified==TRUE,drop=F] + ESimplified <- substValue(E,CatVarUnconditional ,rep(0,length(CatVarUnconditional )),reduce=TRUE, removeredundant=TRUE) # imputatie van de categoriale variabelen die staan voor onderdelen van if-then edits waaar altijd aan wordt voldaan. + ESimplified <- ESimplified [(rownames( ESimplified ) %in% rownames(SimplefiedMixedEdits)==FALSE),,drop=FALSE] # de overbodige edits worden weggehaald + ESimplified <-SimplifyNewEdits ( ESimplified )# verder opschonen van de editset + LogSimplifiedMixEdits <- LogSimplifiedEdits (E, CatVarUnconditional ) # maken logfile van aangepaste mixed edits + LogNewUnconditional <- LogRedundantPartsinMixedEdits(E,CatVarUnconditional ) # maken logfile over de componenten van mixed edits waar altijd aan wordt voldaan + } + } + + return(list("E"= ESimplified ,"ReplacedConditional"=LogSimplifiedMixEdits, "NewUnconditional"= LogNewUnconditional )) +} + +# de hoofdfunctie voor het aanpassen van de conditionele edits. +SimplifyConditionalEdits <- function(E, epsilon=as.mip(E)$epsilon){ + Simplify<-SimplifyComposedMixedEdits(E, epsilon=as.mip(E)$epsilon) + Replace<-ReplaceConditionalbyUnconditional(E=Simplify$E, epsilon=as.mip(E)$epsilon) + return(list("E"=Replace$E,"Simplified"=Simplify$Simplifiededits, "RedundantParts"=Simplify$SimplifiededitsRedundant, "ReplacedConditional"=Replace$ReplacedConditional, "NewUnconditional" = Replace$NewUnconditional)) +} + + +#------functies voor Stap 4----verwijderen van overbodige edits + +# isUnconditionalEditRedundant geeft aan of een onconditionele edit i redundant is. +# Uitgangssituatie: een normalised editset, met "<=" en "=" edits. +# Strikte ongelijkheden (<) komen niet voor; voorafgaande aan deze procedure worden zij omgezet in niet-strikte ongelijkheden +# Om uit te zoeken of een "<= edit" redundant is, wordt het het teken van de edit omgedraaid: het teken wordt ">". +# Bovenstaande gebeurt door aanroep van ChangeOperatorToLargerThan +# als dit er toe leidt dat er geen feasible solution is,dan is de betreffende edit redundant. +# Gelijkheidsrestricties worden geschreven als combinatie van een "<=" en een ">=" restrictie; beide worden op redundantie getest. +# ALs beide restricties redundant zijn, is de oorspronkelijke gelijkheidsrestrictie redundant. +isUnconditionalEditRedundant <- function (A, ops, b, i,iscat, epsilon=as.mip(E)$epsilon ){ + Ai <- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$A + opsi<- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$ops + bi <- ChangeOperatorToLargerThan (A, ops, b, i,iscat, epsilon )$b + p <- FillMip (Ai, opsi, bi, iscat, epsilon=0.001) + redundant <- !isFeasible(p) + if (ops[i] == "=" & redundant==TRUE) { + Aj <- ChangeOperatorToSmallerThan (A, ops, b, i, epsilon )$A #toetsen van redundantie van het ">=" deel van een gelijkheidsrestrictie" + opsj<- ChangeOperatorToSmallerThan (A, ops, b, i, epsilon )$ops + bj <- ChangeOperatorToSmallerThan (A, ops, b, i, epsilon )$b + p <- FillMip(Aj, opsj, bj, iscat, epsilon=0.001) + redundant <- !isFeasible(p) + } + return(redundant) +} + +# isConditionalEditRedundant geeft aan of een conditionele edit i redundant is. +# Uitgangssituatie: een conditionele "<=" edit. (<= teken in iF en THEN deel) +# Strikte ongelijkheden (<) komen niet voor; voorafgaande aan deze procedure worden zij omgezet in niet-strikte ongelijkheden +# Gelijkheidsrestricties komen niet voor. Editrules accepteert geen gelijkheden in conditionele restricties. +# Om uit te zoeken of een "<= edit" redundant is, wordt het het teken van alle bijbehorende categoriale edits omgedraaid: "<=" wordt ">" en de categoriale variabelen worden verwijderd. +# als dit er toe leidt dat er geen feasible solution is,dan is de betreffende edit redundant. +isConditionalEditRedundant <- function (A, ops, b, i,iscat, epsilon=as.mip(E)$epsilon ){ + Aj <- ChangeOperatorsMixedEdits(A, ops, b, i, epsilon )$A + opsj<- ChangeOperatorsMixedEdits(A, ops, b, i, epsilon )$ops + bj <- ChangeOperatorsMixedEdits(A, ops, b, i, epsilon )$b + p <- FillMip(Aj, opsj, bj, iscat, epsilon=0.001) + redundant <- !isFeasible(p) + return(redundant) +} + +# isEditRedundant is 1 als een numerieke edit i overbodig is. +isEditRedundant<-function(E, i, epsilon=as.mip(E)$epsilon) { + redundant<-FALSE #initialiseer op FALSE + iscat <- isCategoricalVariable(E) # iscat is 1 voor alle geheeltallige variabelen + A <- AdaptToMip(E)$A # De coefficientenmatrix, operators en rhs worden aangepast zodanig dat deze leesbaar worden voor de solver. + ops <- AdaptToMip(E)$ops + b <- AdaptToMip(E)$rhs + isMixed <- isMixedEdit(E) + if (isNumericalEdit(E)[i]==TRUE) { + if (isMixed[i]==FALSE) { + redundant<-isUnconditionalEditRedundant(A, ops, b, i,iscat, epsilon ) + } else { + redundant<- isConditionalEditRedundant(A, ops, b, i,iscat, epsilon ) + } + } + return(redundant) +} + +# isRedundant geeft weer welke numerieke edits overbodig zijn +isRedundant<-function(E, epsilon=as.mip(E)$epsilon) { + redundant<-rep(FALSE,nrow(E)) #initialiseer op FALSE + for (i in 1:nrow(E)){ redundant[i] <- isEditRedundant(E,i, epsilon)} + return(redundant[isNumericalEdit(E)]) +} + +# In LogredundantEdits maakt een editset aan met de redundante edits +LogRedundantEdits <- function(E, isRedundant){ + # EditSelectiontoEditSet omvat de overbodige numerieke edits en alle categoriale edits. + # de categoriale edits worden allemaal geselecteerd, omdat sommige categoriale edits nodig zijn bij het omzetten van een editmatrix naar een editset . + isEditSelected <- ifelse(isCatEdit(E), TRUE, isRedundant ) + RedundantEdits <- EditMatrixToEditSet(E[isEditSelected,,drop=F]) # omzetten naar editset + return(RedundantEdits) +} + +# RemoveRedundantEdits verwijdert alle overbodige numerieke (puur numerieke en mixed) edits +RemoveRedundantEdits<-function(E,epsilon=as.mip(E)$epsilon){ + Estart <- E + isredundant<-rep(FALSE,nrow(E)) #initialiseer op FALSE + LogRedundant <- "" # initialisatie + NumberRedundant<-0 #initialisatie + if (nrow(E)>0) { + NumberofEdits <- nrow(E) + NumberRedundant <-0 + for (i in NumberofEdits:1){ # achteraan beginnen....lijkt makkelijker. + if( isEditRedundant(E,i, epsilon)){ # hier worden overbodige numerieke edits gedetecteerd. + NumberRedundant <-NumberRedundant +1 + E <- DeleteEdit(E,i) # weghalen overbodige numerieke edit + isredundant[i]<-TRUE + E<-SimplifyNewEdits (E) #na het weghalen van een overbodige numerieke edit kan het zo zijn dat er ook mixed edits overbodig worden. In deze functie wordt bekeken of de edits verder vereenvoudigd kunnen worden. + } + } + } + if (NumberRedundant >0) { LogRedundant <- LogRedundantEdits(Estart, isredundant)} # omzetten naar editset + return(list("E"=E, "redundant"=LogRedundant)) +} + + + + +#------Aanroep van het geheel + + +CleanEdits <- function(E, epsilon= as.mip(E)$epsilon){ + E<-PrepareEdits(E) + if (isEditsFeasible(E,epsilon)) { + SubstFixed <- SubstituteFixedValues(E, epsilon) + # SubstFixed <- DetermineFixedValues(E, epsilon)# + SimpleConditional <- SimplifyConditionalEdits(SubstFixed$E,epsilon) + + RemoveRedundant <- RemoveRedundantEdits(SimpleConditional$E, epsilon) + E <- EditMatrixToEditSet (RemoveRedundant$E) + E <- AddFixedValuestoanEditSet (E, SubstFixed$variables, SubstFixed$values)# + } else { + E <- "NOT FEASIBLE" + fixvars <- "NOT FEASIBLE" + fixvalues<- "NOT FEASIBLE" + redundant <- "NOT FEASIBLE" + Edits_Simplified_BecauseOfFixedVars<- "NOT FEASIBLE" + NewEdits_after_imputation_fixedvars <- "NOT FEASIBLE" + SimplifiedConditional<- "NOT FEASIBLE" + ReplacedConditional <- "NOT FEASIBLE" + NewUnconditional<- "NOT FEASIBLE" + redundant<- "NOT FEASIBLE" + RedundantParts <- "NOT FEASIBLE" + } + return(list("CleanedEdits" =E,"Fixedvariables"=SubstFixed$variables, "Fixedvalues"=SubstFixed$values, "Edits_Simplified_BecauseOfFixedVars"=SubstFixed$OldEditsWithFixedVars,"NewEdits_after_imputation_fixedvars"=SubstFixed$NewEditsWithFixedVars, "SimplifiedConditionalEdits"= SimpleConditional$Simplified,"SimplifiedConditionalEditsRedundantParts"=SimpleConditional$RedundantParts, "NewUnconditional"=SimpleConditional$NewUnconditional, "ConditionalReplacedbyUnconditional"=SimpleConditional$ReplacedConditional ,"RemovedEdits_BecauseofRedundancy"=RemoveRedundant$redundant ) )} + + + + + + + + + + + + + + + + + + + + + + + + +#----------------------------------------------------------VOORBEELDEN van een aanroep van CleanEdits----------------------------- + + + +E <- editfile("set2.txt") +H<- CleanEdits(E) +HC<- H$CleanedEdits +HCE <- editset(HC[,2]) + +filename<-paste(path,"Set2Cleaned.txt") +write.csv2(HCE,file=filename, row.names=FALSE, sep=" ", quote=FALSE) diff --git a/removeredundant b/removeredundant new file mode 100644 index 0000000..37d20bf --- /dev/null +++ b/removeredundant @@ -0,0 +1,651 @@ + + +#------ short explanation ------- +# this code simplifies unnecessarily complicated edits and removes redundant edits +# The process consists of four steps: +# Step 1: Feasibility of the constraints. Are there any contradictionary constraints? +# Step 2: Fixed variables: find variables that can attain only value. These variables are eliminated from the edits +# Step 3: Simplify conditional (IF-THEN) edits. Find out whether one or more unconditionale edits can be replaced by unconditional edits. +# Step 4: Identify and remove redundant edits. For example: the edit x<5 is redundant if there is also an edit x < 6. +# The function "CleanEdits" is the main function. + +#----Inititalisation + +setwd("G:/onderhanden_werk/gaafmaakonderzoek2013/") +path <- paste(getwd(),"/", sep="") +.libPaths("//dmkv1f/dmk1/kennR/R/R2.15") +library(editrules) +library(deducorrect) +library(lpSolveAPI) + +#----General helpfunctions ----------------------------------------------------------------------------------------# + +# isCatVar is an indicator for categorical variables in editmatrix E. +isCatVar <- function(E){ + CatVar <- grepl(".",getVars(E), fixed=TRUE) #Categorical variables are variables with a point in its name. + return(CatVar)} + +# isNumEdits is an indicator for numerical edits in editmatrix E +isNumEdit <- function (E) { + numericals <- grepl("num",rownames(E), fixed=TRUE) #Numerical edits are edits with "num" in its name + return(numericals)} + +# isCatEdit is an indicator for categorical edits in editmatrix E +isCatEdit <- function(E){ + catEdits <- !isNumEdit(E) + return(catEdits)} + +# ContainsCatVar is an indicator for the presence of categorical variables in the edits of an editmatrix E +ContainsCatVar <- function(E){ + EditCatVar <- FALSE + if (sum(isCatVar(E)) > 0 ) { + EditCatVar <-rowSums(contains(E,var=getVars(E)[isCatVar(E),drop=F]))>0 + } + return(EditCatVar)} + +#DeleteEdit removes edit i from an editmatrix E. +DeleteEdit <- function (E, i){ + return(E[c(1:nrow(E))!=i]) +} + +# isVarinEdit is an indicator for the presence of variables "var" in the edits of editmatrix E. +# A conditional edit is represented by a number of edits, of which only one is numerical. +# if a variable appears in one of the edits belonging to a conditional constraint, the indicator value will also be one for all other edits that belong to the same conditional edit. +isVarinEdit <- function(E, var){ + isVarinEdit <- as.matrix(rowSums(contains(E,var=var, drop=FALSE)) > 0) # indicator that shows whether an edit contains at least one of the variables in var + isVarinCatEdit <- isVarinEdit & isCatEdit(E) + if (sum(isVarinCatEdit)>0){ #if variables in var appear in a categorical edit belonging to conditional constraint, the numerical edits belonging to the same unconditional edits are also selected. + NamesofCatEditswithVars <- rownames(E) [ isVarinCatEdit , drop=F] + isCondNumEditwithVar <- isNumEdit(E) & ContainsCatVar(E) & as.matrix (rowSums(contains(E, var= NamesofCatEditswithVars) > 0 )) + isVarinEdit <- isVarinEdit | isCondNumEditwithVar + } + isCondNumEditwithVar <- isNumEdit(E) & ContainsCatVar(E) & isVarinEdit + CatVarsinCondEdit <- names(which (colSums(contains(E[isCondNumEditwithVar,,drop=FALSE], var= getVars(E)[isCatVar(E)])) > 0 )) # all categorical edits are selected that belong to the same conditional edit + if (length(CatVarsinCondEdit)>0){isVarinEdit <- isVarinEdit | (rownames(E) %in% CatVarsinCondEdit) } + return(isVarinEdit) +} + +#--------------Functions for the LP solver +# AdaptToMip changes the operators of the edits in an editmatrix E +# the operator '==' is replaced by '=' +# the operator '<' is replaced by '<= rhs - epsilon' +# the function is needed because the LP-solver can only deal with "<= and "="edits; not with "<" type of edits. +AdaptToMip <- function(E, epsilon=as.mip(E)$epsilon) { + E <- normalize(E) + A <- getA(E) + ops <- getOps(E) # possible operators: "==" "'<", "<=" , as edits are in normalform) + rhs <- getb(E) # right hand side of the edits + rhs[ops== "<"] <- rhs[ops== "<"] - epsilon + ops[ops== "<"] <- "<=" + ops[ops== "=="] <- "=" + E <- as.editmatrix(A=A,ops=ops,b=rhs) + return(E) +} + +# FillMip creates a new lpSolve linear programming object p from an editmatrix E +FillMip <- function(E, objfunc=rep(0,ncol(E)-1)) { + A <- getA(E) + b <- getb(E) + ops <-getOps(E) + iscat <- isCatVar(E) + nvar <- ncol(A) # number of variables + ncon <- nrow(A) # number of constraints + p <- make.lp(ncon, nvar) # a new lp object is created with nvar variables and ncon constraints + for (j in 1: nvar) {set.column(p, j, A[,j])} # fill the constraintsmatrix column-wise + set.constr.type(p,ops) + set.rhs(p,b) + set.objfn(p,objfunc) # define objective function - the coefficients of the variables in the objective function are given in objfunc + set.bounds(p,lower= rep(-Inf,nvar)) # lower bound of each variable is -infinity + set.bounds(p,upper= rep(Inf,nvar)) # upper bound of each variabele is +infinity + set.type(p,which(iscat),"binary") # define binary variables + return(p) +} + +# IsFeasible is an indicator for the feasiblity of an optimization problem p. +isFeasible <- function (p) { + lp.control(p, break.at.first = TRUE, epsint= 1.0e-15, epspivot=1.0e-15) #we only need to know whether or not one feasible solution exists. Therefore, we can stop if a solution is found. Break.at.first=TRUE + result <-solve(p) # solve optimization problem + feas <- (result !=2) # result = 2 means infeasibile. + return(feas) +} + +#-------------------General functions for finding the negate of an edit + +#NegateSingelEdit replaces the i-th edit of an editmatrix E by the negate of that edit. +NegateSingleEdit <- function (E, i, epsilon =as.mip(E)$epsilon) { + A <- getA(E) + ops_in <-getOps(E) + ops_out <-ops_in + b <- getb(E) + ops_out[i][ops_in[i]=="<="] <- ">=" # the negate of an "<=" edit is an ">" edit. But, because the solver cannot deal with ">" constraints, this type of constraints is converted into ">=". + b[i][ops_in[i]=="<="] <- b[i] + epsilon + ops_out[i][ops_in[i]==">="] <- "<=" + b[i][ops_in[i]==">="] <- b[i] - epsilon + E <- as.editmatrix(A=A, ops=ops_out, b=b) + return(E) +} + +#NegateSingelEdit replaces the i-th edit of an editmatrix E by the negate of that edit. +# edit i is assumed to be a composed edit, i.e. an ifthen edit, with categorical variables +# Explanation: A composed edit is modelled as: C1 OR C2 or C3 or.... +# the negate of such an edit is given by: NOT C1 AND NOT C2 AND NOT C3 AND.. +# +NegateConditionalEdit <- function (E, i, epsilon =as.mip(E)$epsilon) { + A <- getA(E) + ops <-getOps(E) + b <- getb(E) + iscat<-isCatVar(E) + CatVarsinEditi <- names(which(A[i,]*iscat!=0)) # the names of the categorical variables in edit i + CatEdits<- CatVarsinEditi # For each categorical variable an categorical edit exists. + A[, CatVarsinEditi]<-0 # each of the categorical variabeles in edit i will get the value 0. By doing is a conditional edit is replaced by a number of unconditional edits. + ops[CatEdits] <- ">=" # replace each categorical edit, in Catedits by the negate of that edit. + b[CatEdits] <- b[CatEdits] + epsilon # the negate of a <= edit is a > edit. We use >= type of edits. There we need to add epsilon to the right hand side + E <- as.editmatrix(A=A, ops=ops, b=b) + return(E) +} + + +#------------------------------------------------------------------------------------------------------------------ +# functions for the transformation of the in- and output. + +# PrepareEdits creates an editmatrix E from an editset E and expresses the editmatrix in normalform. +PrepareEdits<-function (E, epsilon=as.mip(E)$epsilon) { + E <- as.mip(E, epsilon )$E + E <- normalize(E) + return(E) +} + +# EditMatrixToEditSet coerces an editmatrix E to an editlist. +# function will be used to write logfiles +EditMatrixToEditSet <- function (E){ + Es <- "" + if (nrow(E)>0) { + isNumEditNoCatVar <- isNumEdit(E)& !ContainsCatVar(E) + isNumEditWithCatVar <- isNumEdit(E)& ContainsCatVar(E) + NNumEditWithCatVar <- sum(isNumEditWithCatVar) + NNumEditNoCatVar <- sum(isNumEditNoCatVar) + EsNumEditsNoCatVar <- as.matrix(as.data.frame(as.editset(E[isNumEditNoCatVar,,drop=F]))) # Numerical edits without categorical variables in matrixformat + EsNumEditsWithCatVar <- as.matrix(as.data.frame(as.editset(E[isNumEditWithCatVar,,drop=F]))) # numerical edits with categorical variables in matrixformat + if (NNumEditWithCatVar >0){ # additional operations are needed for numerical edits with categorical varibiables. These are transformed into conditional `If THEN´ edits + for (i in 1 : NNumEditWithCatVar) { + IndexEdit <- which(isNumEditWithCatVar)[i] + CatVarsinEdit <- getVars(E)[isCatVar(E)][contains(E[IndexEdit,isCatVar(E)])] + for (j in 1: (length(CatVarsinEdit))) { + CatVar <- CatVarsinEdit[j] + CatEdit <- substValue(E[CatVar,,drop=F],CatVar,0, reduce=F, removeredundant=F) # the categorical edit that has been defined for a categorical variable. In this edit the categorical variable is eliminated + if (j==1) { + CatEdit <- NegateSingleEdit(CatEdit,1) # the first part will become the `if´ part. for this part the edit needs to be negated, i.e. C1 or C2 is expressed as IF NOT C1 then C2 + dfEdit <- as.data.frame(CatEdit)$edit + dfEdit <- paste(" if (", dfEdit, ")", sep=" ") + } + if (j>1){ + dfEditNewPart <- as.data.frame(CatEdit)$edit + dfEdit <- paste (dfEdit, dfEditNewPart , sep =" " ) + if (j < length(CatVarsinEdit)) { dfEdit <- paste(dfEdit, "|", sep=" ") } + } + } + EsNumEditsWithCatVar[i,2]<-dfEdit + } + } + if ((NNumEditWithCatVar>0) & (NNumEditNoCatVar >0)) {Es <- rbind(EsNumEditsNoCatVar,EsNumEditsWithCatVar)} + if ((NNumEditWithCatVar>0) & (NNumEditNoCatVar ==0)) {Es <-EsNumEditsWithCatVar} + if ((NNumEditWithCatVar==0) & (NNumEditNoCatVar >0)) {Es <-EsNumEditsNoCatVar} + } + return(Es) +} + +#------Specific functions for step 1----feasibilitity of the constraints + +# isEditsFeasible in an indicator for the feasibility of the edits in editmatrix E +isEditsFeasible<-function(E, epsilon=as.mip(E)$epsilon) { + E <- AdaptToMip(E) + p <- FillMip(E) # lpsolve object is made + feasible <- isFeasible(p) # test for feasilbility + return(feasible) +} + +#------Specific functions for Step 2---determine fixed variables, i.e. variables that can only attain one value. + +#MinimumValue gives the minimum value for variable i in editmatrix E +MinimumValue <-function(E ,i){ + minval <- -9999 + objective <- rep(0, (ncol(E)-1)) # coefficients of objective function + objective[i] <- 1 + p <- FillMip(E, objfunc=objective) + result <-solve(p) + if (result ==0) {minval <- get.objective(p)} # result=0 means that a minimum value was found. + if (result > 0) {minval <- -9999} # if no minimum value is found, the result of the function is -9999 + return(minval) +} + +#MaximumValue gives the maximum value for variable i in Editmatrix E +MaximumValue <-function(E ,i){ + maxval <- 9999 + objective <- rep(0, (ncol(E)-1)) + objective[i] <- 1 #coefficients of objective function + p <- FillMip(E, objfunc=objective) + lp.control(p, sense="max") + result <-solve(p) + if (result ==0) {maxval <- get.objective(p)} # result=0 means that a maximum value was found. + if (result > 0) {maxval <- 9999} # if no maximum value is found, the result of the function is +9999 + return(maxval) +} + +# MinimizeEachVariable gives the minimum value for each numerical variable in Editmatrix E +MinimizeEachVariable <- function (E){ + smallest <- rep(-9999, (ncol(E)-1)) # initialisation + for (i in 1: (ncol(E)-1)) { + if (!isCatVar(E)[i]) { smallest[i] <- MinimumValue(E, i)} + } + return(smallest)} + +# MaximizeEachVariable gives the maximum value for each numerical variable in Editmatrix E +MaximizeEachVariable <- function (E){ + largest <- rep(9999, (ncol(E)-1)) # initialisation + for (i in 1: (ncol(E)-1)) { + if (!isCatVar(E)[i]) {largest[i] <- MaximumValue(E, i)} + } + return(largest)} + +# FixedValues gives the names of the fixed variables in editmatrix E, together with their values +FixedValues <- function (E){ + E <- AdaptToMip(E) + minima <- MinimizeEachVariable (E) # a vector with minimum values + maxima <- MaximizeEachVariable (E) # a vector with maximum values + return(list("variables"=getVars(E)[minima==maxima],"values"=minima[minima==maxima])) # as output are given: the names of the variables and their values +} + +# Adds a constraint in editmatrix E for each fixed value. For example if the value of x has to be 10, the constraint x =10 is added. +AddFixedValuesAsConstraints<-function(E, fixvars, fixvalues){ + nfixed <- length(fixvars) + An <- matrix(0,nrow=nfixed, ncol=(ncol(E)-1)) + rownames(An)<- rep("num",nrow(An)) + colnames(An)<-colnames(getA(E)) + for (i in 1:(nrow(An))) {An[i,colnames(An)==fixvars[i]]<-1} + opsn <- rep("==", nfixed) + bn<-fixvalues + An <- rbind(getA(E), An) + opsn <- c(getOps(E),opsn) + bn <- c(getb(E),bn) + En <- as.editmatrix(A=An,ops=opsn,b=bn) + return("E"=En) +} + +# Mainfunction Step 2 +# Creates a new editmatrix. The fixed values are represented by a single constraint (e.g x=10) In all other constraints the fixed variable are substituted (e.g. the value 10 is filled in for x) +SubstituteFixedValues<-function(E){ + fixedvars <- "" # initialise + fixedvals <- "" # initialise + LogOriginalEditswithFixedVars <- "" #initialise + LogAdjustedEditswithFixedVars <- "" + fixed <- FixedValues(E) + nedits <- nrow(E) + if (length (fixed$variables) > 0 ) { + fixedvars <-fixed$variables + fixedvals <-fixed$values + isEditsWithFixedvars <- isVarinEdit(E,fixedvars) + LogOriginalEditswithFixedVars<-EditMatrixToEditSet(E[isEditsWithFixedvars,,drop=F]) + E<-substValue(E,fixedvars,fixedvals,reduce=FALSE, removeredundant=FALSE) + E <- AddFixedValuesAsConstraints(E,fixedvars,fixedvals) + LogAdjustedEditswithFixedVars<-EditMatrixToEditSet(E[isEditsWithFixedvars,,drop=F]) + } + return(list("E"=E,"variables"=fixedvars,"values"=fixedvals, "OldEditsWithFixedVars"=LogOriginalEditswithFixedVars, "NewEditsWithFixedVars"= LogAdjustedEditswithFixedVars )) +} + +#------functions for Step 3---simplify conditional edits +# Conditional edits are written in the form C1 or C2 or ....where Ci is a statement that corresponds to a categorical edit i + +# isPartofConditionalEditAlwaysTrue is an indicator for categorical edits that belong to a statement Ci that is always satisfied. +isPartofConditionalEditAlwaysTrue<-function (E, i, epsilon=as.mip(E)$epsilon){ + A <-getA(E) + AlwaysSatisfied <- FALSE + iscat <- isCatVar(E) + if (isCatEdit(E)[i]==T) { # THE COMPonents of a composed edits are expressed as categorical variables. + nedits <- nrow(E) + CatHelpVariableInEdit <- names(which(A[i,]*iscat!=0)) # name of the categorical variable in edit i + E <- as.editmatrix(A=rbind(getA(E),getA(E)[i,,drop=F]), ops=c(getOps(E),getOps(E[i])), b=c(getb(E),getb(E)[i])) # replicates edit i + E[(nedits+1),]<-substValue(E[(nedits+1),,drop=F], CatHelpVariableInEdit , 0, reduce = FALSE, removeredundant = FALSE) # by substitution of the categorical variable the conditional edit is replaced by an unconditional edit + E <- AdaptToMip(E) + E <- NegateSingleEdit (E, (nedits+1)) #the statement is always satisfied means that it is redundant. IN order to check for redundancy we replace the edit by the negate of that edit. + p <- FillMip(E) + AlwaysSatisfied <- !isFeasible(p) # if there is no solution than the statement of edit i is redundant (or in other words: always satisfied) + } + return(AlwaysSatisfied)} + + # isPartofConditionalEditAlwaysViolated is an indicator for categorical edits that belong to a statement Ci that is always violated. +isPartofConditionalEditAlwaysViolated<-function (E, i, epsilon=as.mip(E)$epsilon){ + A <-getA(E) + AlwaysViolated <- FALSE + iscat <- isCatVar(E) + if (isCatEdit(E)[i]==T) {# THE COMPonents of a composed edits are expressed as categorical variables. + nedits <- nrow(E) + CatVarInEdit <- names(which(A[i,]*iscat!=0)) # name of the categorical variable in edit i + E <- as.editmatrix(A=rbind(getA(E),getA(E)[i,,drop=F]), ops=c(getOps(E),getOps(E[i])), b=c(getb(E),getb(E)[i])) # replicates edit i + E[(nedits+1),]<-substValue(E[(nedits+1),,drop=F], CatVarInEdit , 0, reduce = FALSE, removeredundant = FALSE) # by substitution of the categorical variable the conditional edit is replaced by an unconditional edit + E <- AdaptToMip(E, epsilon) + p <- FillMip(E) + AlwaysViolated <- !isFeasible(p) # if there is no solution, then the statement of edit i is always violated. Implicitly, it is assumed that the initial edits are noncontradictory. + } + return(AlwaysViolated)} + +# LogRedundantParts creates an editset from an editmatrix E, containing the edits that belong to Catvar. (remember that each cat. variable belongs to a statement / an edit) +LogCatVarEdit<-function(E, CatVar){ + CatVarEdits <- E[rownames(E) %in% CatVar,,drop=F] #selection of edits with catvar in the name + CatVarEdits <- substValue( CatVarEdits ,CatVar ,rep(0,length( CatVar)),reduce=TRUE, removeredundant=TRUE) # by this substitution a conditional edit is transfered into an unconditional edit. + rownames(CatVarEdits)<-gsub(".l", "num", rownames(CatVarEdits) ) # after substitution of the categorical variable, the edit becomes numeric + CatVarEdits <- EditMatrixToEditSet(CatVarEdits) # creating the editset + return(CatVarEdits) +} + +#Transform_CategoricalEdit_into_NumericEdit replaces a conditional, categorical edits by a numeric, unconditional edit, if possible. +# After simplification of a conditional edit an unconditional edit may be obtained. +# for example: the original edit may be C1 or C2 or C3. However, if it turns out that c2 and C3 cannot occur; the edit will be simplified. It will be expressed as C1. This function transforms such an edit into a numerical, unconditional one. +Transform_CategoricalEdit_into_NumericEdit<- function(E){ + A <-getA(E) + b <-getb(E) + iscat <- isCatVar(E) + NumEdits <- isNumEdit(E) + NewUnconditionalEdits <- NumEdits & ((rowSums(A[, iscat, drop=FALSE])>0) & (b[, drop=FALSE]==0)) + if (sum(NewUnconditionalEdits )>0) { + CatVarUnconditionalEdit <- getVars(E)[colSums(contains(E[ NewUnconditionalEdits,,drop=F]))>0, drop=F] + if (length( CatVarUnconditionalEdit)>0){ + E <- substValue(E,CatVarUnconditionalEdit,rep(0, length(CatVarUnconditionalEdit)),reduce=TRUE, removeredundant=TRUE) # by substitution of the categorical variable a conditional edit becomes an unconditional edit + rownames(E)[rownames(E)%in% CatVarUnconditionalEdit]<-paste ("num", gsub(".", "", rownames(E)[rownames(E)%in% CatVarUnconditionalEdit], fixed=TRUE ), sep= "") # the name of the edit is changed. The new name shows that the edit is numerical now. + } + } + return(E) +} + +#SimplifyCOnditionalEdits removes parts of conditional edits in editmatrix E that are always violated" +SimplifyConditionalEdits <- function (E, epsilon=as.mip(E)$epsilon){ + iscat <- isCatVar(E) + isEditAlwaysViolated <- rep(FALSE,nrow(E)) #initialise + AlwaysViolatedEdits <- "" #initialise + RedundantPartsinConditionalEdits <- "" #initialise + ESimplified <- E #initialise + LogSimplifiedEdits <- "" + LogRedundantPartsinConditionalEdits<-"" + if (nrow(E)>0) { + for (i in 1:nrow(E)){ + if (isCatEdit(E)[i]== TRUE) {isEditAlwaysViolated[i]<-isPartofConditionalEditAlwaysViolated(E, i, epsilon)} #for every categorical variable it is evaluated whether or it not it belongs to a statement (a part of a conditional edit) that is always violated. + } + AlwaysViolatedEdits <- rownames(E)[isEditAlwaysViolated,drop=F] #names of categorical edits that are always violated + CatVarAlwaysViolated <- AlwaysViolatedEdits #names of categorical variables. The names of the categorical variables are the same as the names of the edits. + ESimplified <- E[isEditAlwaysViolated==FALSE,,drop=F] + if (length(AlwaysViolatedEdits ) >0) { + ESimplified <- substValue(ESimplified , CatVarAlwaysViolated ,rep(1,length( CatVarAlwaysViolated )),reduce=TRUE, removeredundant=TRUE) # by this substitution, redundant parts of a conditional edit are removed from that edit + ESimplified <-Transform_CategoricalEdit_into_NumericEdit(ESimplified ) # conditional edits are replaced by unconditional edits, if possible (i.e. if there is only one component) + LogSimplifiedEdits <- EditMatrixToEditSet (E[isVarinEdit(E,CatVarAlwaysViolated),,drop=F]) # make a log file of the simplified edits + LogRedundantPartsinConditionalEdits <- LogCatVarEdit(E, CatVarAlwaysViolated) + } + } + return(list("E"= ESimplified,"Simplifiededits"= LogSimplifiedEdits,"SimplifiededitsRedundant" = LogRedundantPartsinConditionalEdits) ) +} + +# ReplaceConditionalbyUnconditional replaces a redundant conditional edit by a numeric, nonconditional edit. +# Conditional edits are stated as: C1 or C2 or.... +# if C1 is always true, then the conditional edit C1 or C2 or....is redundant and can be replaced by the unconditional edit C1. +ReplaceConditionalbyUnconditional <- function (E, epsilon=as.mip(E)$epsilon){ + iscat <- isCatVar(E) + NumEdit <- isNumEdit(E) + LogSimplifiedEdits <- "" #initialise + LogNewUnconditional <- "" #initialise + isEditSimplified <- rep(FALSE,nrow(E)) #initialise + ESimplified <-E #initialisatie + if (nrow(E)>0 ){ + for (i in 1:nrow(E)){ + if (NumEdit[i]== FALSE) {isEditSimplified[i]<-isPartofConditionalEditAlwaysTrue(E, i, epsilon)} #for every statement in a categorical edit it is checked whether it is always true + } + CatVarNewUnconditional <- rownames(E[isEditSimplified,,drop=F]) #names of the categorical edits/variables belonging to conditions that are always true. + if (length(CatVarNewUnconditional ) > 0) { + isNumEditRedundant<- NumEdit & rowSums( contains(E[,,drop=F], var=CatVarNewUnconditional))>0 # indicator for redundant conditional edits; conditional edits that are always true. + ESimplified <- E[!isNumEditRedundant,,drop=FALSE] + ESimplified <- substValue(ESimplified,CatVarNewUnconditional ,rep(0,length(CatVarNewUnconditional )),reduce=TRUE, removeredundant=TRUE) # the imputation transforms a conditional edit into an unconditional edit + rownames(ESimplified)[rownames(ESimplified)%in% CatVarNewUnconditional]<-paste ("num", gsub(".", "", rownames(ESimplified)[rownames(ESimplified)%in% CatVarNewUnconditional], fixed=TRUE ), sep= "") # it is shown in the editname that the edit is numerical. + RedundantCatVar <- names(which(colSums(ESimplified [isNumEdit(ESimplified),isCatVar(ESimplified)])==0)) #categorical variables that do not appear (anymore) in numerical edits can be removed. + ESimplified <- ESimplified[!(rownames(ESimplified) %in% RedundantCatVar),, drop=F] # removing redundant categorical edits + LogSimplifiedEdits <- EditMatrixToEditSet (E[isVarinEdit(E,CatVarNewUnconditional),,drop=F]) # Create logfile containing all edits that include a categorical variable belonging to a statement that is always true + LogNewUnconditional <- LogCatVarEdit(E,CatVarNewUnconditional ) # create log of the parts of conditional edits that are always satisfied. + } + } + return(list("E"= ESimplified ,"ReplacedConditional"=LogSimplifiedEdits, "NewUnconditional"= LogNewUnconditional )) +} + +# Main function for step 3 +SimplifyCondEdits<- function(E, epsilon=as.mip(E)$epsilon){ + Simplify<-SimplifyConditionalEdits(E, epsilon=as.mip(E)$epsilon) + Replace<-ReplaceConditionalbyUnconditional(E=Simplify$E, epsilon=as.mip(E)$epsilon) + return(list("E"=Replace$E,"Simplified"=Simplify$Simplifiededits, "RedundantParts"=Simplify$SimplifiededitsRedundant, "ReplacedConditional"=Replace$ReplacedConditional, "NewUnconditional" = Replace$NewUnconditional)) +} + + +#------functions for step 4----- remove redundant edits. + +# isUnconditionalEditRedundant is an indicator for redundancy of an edit i in editmatrix E, where edit i is a numeric, unconditional edit +# An edit is redundant if the problem that is obtained by replacing an edit by its negate edit leads to a contradictory set of edits. +# Equality constraints are repaced by two edits: a "<=" edit and a ">=" edit. An equality edit is infeasible if the corresponding "<=" and ">=" edits are infeasible. +isUnconditionalEditRedundant <- function (E, i, epsilon=as.mip(E)$epsilon ){ + OpsIn <- getOps(E) + OpsOut1 <- OpsIn + OpsOut1[OpsIn=="="]<-"<=" + E1 <- as.editmatrix(A=getA(E),b=getb(E), ops=OpsOut1) + En <- NegateSingleEdit(E1,i) + p <- FillMip (En) + redundant <- !isFeasible(p) + if (OpsIn[i] == "=" & redundant==TRUE) { + OpsOut2 <- OpsIn + OpsOut2[OpsIn=="="]<-">=" + E2 <- as.editmatrix(A=getA(E),b=getb(E), ops=OpsOut2) + En <- NegateSingleEdit(E2,i) + p <- FillMip(En, epsilon=as.mip(E)$epsilon) + redundant <- !isFeasible(p) + } + return(redundant) +} + +# isConditionalEditRedundant is an indicator for redundancy of an edit i in editmatrix E, where edit i is a numeric, conditional edit +isConditionalEditRedundant <- function (E, i,epsilon=as.mip(E)$epsilon ){ + E2 <- NegateConditionalEdit(E,i) + p <- FillMip(E2) + redundant <- !isFeasible(p) + return(redundant) +} + +# isEditRedundant is an indicator for redundancy of a numerical edit i in editmatrix E, where i is a numerical edit +isEditRedundant<-function(E, i, epsilon=as.mip(E)$epsilon) { + redundant<-FALSE #initialise + E <- AdaptToMip(E) + NumEditwithCatVars <- isNumEdit(E) & ContainsCatVar(E) + if (isNumEdit(E)[i]==TRUE) { + if (NumEditwithCatVars[i]==FALSE) { + redundant<-isUnconditionalEditRedundant(E, i,epsilon ) + } else { + redundant<- isConditionalEditRedundant(E, i, epsilon ) + } + } + return(redundant) +} + +# isRedundant is an indicator for redundancy for all edits in editmatrix E +isRedundant<-function(E, epsilon=as.mip(E)$epsilon) { + redundant<-rep(FALSE,nrow(E)) #initialiseer op FALSE + for (i in 1:nrow(E)){ redundant[i] <- isEditRedundant(E,i, epsilon)} + return(redundant) +} + + +# RemoveRedundantEdits deletes all redundant edits from an editmatrix E +RemoveRedundantEdits<-function(E,epsilon=as.mip(E)$epsilon){ + Estart <- E + isRedundant<-rep(FALSE,nrow(E)) #initialise + LogRedundant <- "" #initialise + NumberRedundant<-0 #initialise + if (nrow(E)>0) { + nedits <- nrow(E) + for (i in nedits:1){ + if( isEditRedundant(E,i, epsilon)){ + NumberRedundant <-NumberRedundant +1 + E <- DeleteEdit(E,i) + isRedundant[i]<-TRUE + RedundantCatVar <- names(which(colSums(E [isNumEdit(E),isCatVar(E)])==0)) #categorical variables that do not appear in numerical edits (anymore) can be left out. + E <- E[!(rownames(E) %in% RedundantCatVar),, drop=F] + } + } + } + if (NumberRedundant >0) { + CatVarinRedundantEdits <- names(colSums(Estart[isRedundant,isCatVar(Estart)])) + isRedundantext <- isRedundant | (rownames(Estart) %in% CatVarinRedundantEdits) # isRedundant is extended with all categorical edits that belong to the same conditional edits as the edits in inRedundant + LogRedundant <- EditMatrixToEditSet(Estart[isRedundantext,,drop=FALSE]) + } + return(list("E"=E, "redundant"=LogRedundant)) +} + + + + +#------Main FUNCTION--------------------------------------------------------------------------------------------------- + + +CleanEdits <- function(E, epsilon= as.mip(E)$epsilon){ + E<-PrepareEdits(E) + if (isEditsFeasible(E,epsilon)) { + SubstFixed <- SubstituteFixedValues(E) + SimpleConditional <- SimplifyCondEdits(SubstFixed$E,epsilon) + RemoveRedundant <- RemoveRedundantEdits(SimpleConditional$E, epsilon) + E <- EditMatrixToEditSet (RemoveRedundant$E) + } else { + E <- "NOT FEASIBLE" + fixvars <- "NOT FEASIBLE" + fixvalues<- "NOT FEASIBLE" + redundant <- "NOT FEASIBLE" + Edits_Simplified_BecauseOfFixedVars<- "NOT FEASIBLE" + NewEdits_after_imputation_fixedvars <- "NOT FEASIBLE" + SimplifiedConditional<- "NOT FEASIBLE" + ReplacedConditional <- "NOT FEASIBLE" + NewUnconditional<- "NOT FEASIBLE" + redundant<- "NOT FEASIBLE" + RedundantParts <- "NOT FEASIBLE" + } + return(list("CleanedEdits" =E,"Fixedvariables"=SubstFixed$variables, "Fixedvalues"=SubstFixed$values, "Edits_Simplified_BecauseOfFixedVars"=SubstFixed$OldEditsWithFixedVars,"NewEdits_after_imputation_fixedvars"=SubstFixed$NewEditsWithFixedVars, "SimplifiedConditionalEdits"= SimpleConditional$Simplified,"SimplifiedConditionalEditsRedundantParts"=SimpleConditional$RedundantParts, "NewUnconditional"=SimpleConditional$NewUnconditional, "ConditionalReplacedbyUnconditional"=SimpleConditional$ReplacedConditional ,"RemovedEdits_BecauseofRedundancy"=RemoveRedundant$redundant ) )} + + + + +#----------------------------------------------------------VOORBEELDEN van een aanroep van CleanEdits----------------------------- + + + +E <- editfile("set2.txt") +H<- CleanEdits(E) +HC<- H$CleanedEdits +HCE <- editset(HC[,2]) + +filename<-paste(path,"Set2Cleaned.txt") +write.csv2(HCE,file=filename, row.names=FALSE, sep=" ", quote=FALSE) + + +---------# under construction---------------------------------------------------------------------------------------------------------- + +# new functions for step 1 for the identification of contradictory edits. + + #ReplaceEqualitiesbyTwoInequlities replaces all equality constraints by two "<=" inequality constraints. For example x = 6 is replaced by x <=6 and -x<=-6 + ReplaceEqualitiesbyTwoInequalities<-function(E){ + E <- AdaptToMip(E) + A <- getA(E) + ops <- getOps(E) + b <- getb(E) + neq <- sum(ops=="=") + if (neq >0){ + A <- rbind(A, -1*A[ops=="=", ,drop=F]) + b <- c(b,-1*b[ops=="="]) + ops <-c(ops,ops[ops=="="]) + ops[ops=="="] <- "<=" + } + E <- as.editmatrix(A=A,b=b,ops=ops) + return(E) + } + +#FindInconsistentEdits creates an editmatrix with edits that need to be deleted from editset E in order to obtain a consistent editset. + +FindInconsistentEdits<-function(E, epsilon=as.mip(E)$epsilon, ForceInconsistent=rep(FALSE, sum(isNumEdit(E))), ForceNotInconsistent=rep(FALSE, sum(isNumEdit(E)))) { + EInitial <-E + nedit <- nrow(E) + IsInconsistentEdit <- rep(NA, sum(isNumEdit(E))) + E<- AdaptToMip(E) + ForceInconsistent <- c(ForceInconsistent,ForceInconsistent[getOps(E)%in% c("=","==")]) + ForceNotInconsistent <- c(ForceNotInconsistent,ForceNotInconsistent[getOps(E)%in% c("=","==")]) + E<- ReplaceEqualitiesbyTwoInequalities(E) + isnumEdit<-isNumEdit(E) + NnumEdit <- sum(isnumEdit) + iscat <- isCatVar(E) + nvar <- length(iscat) + A <- getA(E) + A[abs(A)==as.mip(E)$M]<- A[abs(A)==as.mip(E)$M]/1000 # the big M values of the initial MIP problem need to be reduced to prevent numerical problems. Later big M's will be added that need to be larger than the big M's in the current matrix. + colnamesA <-c(colnames(A),paste(".l",seq(from=(nvar+1), to=(nvar+NnumEdit)), sep="")) # new colnames for new categorical variables. + A <- cbind(A, matrix(0,nrow=nrow(A),ncol=NnumEdit)) #extending the coefficientsmatrix + colnames(A)<- colnamesA + coef <- -as.mip(E)$M/1 #!!!!# new big M values are introduced that need to be larger than the existing values. + A[isnumEdit,(nvar+1):(ncol(A))]<-diag(coef,NnumEdit) + E <- as.editmatrix(A=A,ops=getOps(E), b=getb(E)) + if (sum(ForceInconsistent)>0) { + subvar <- getVars(E)[(nvar+1):(ncol(E)-1)][ForceInconsistent==TRUE] + E<- substValue(E,var=subvar, value= rep(1,length(subvar)),reduce=FALSE,removeredundant=FALSE) + rhs <- getb(E) + rhs[isNumEdit(E)][ForceInconsistent]<-rhs[isNumEdit(E)][ForceInconsistent]/100 #needed to prevent computational problems + E <- as.editmatrix(A=getA(E),b=rhs,ops=getOps(E)) + } + if (sum(ForceNotInconsistent)>0) { + subvar <- getVars(E)[(nvar+1):(ncol(E)-1)][ForceNotInconsistent==TRUE] + E<- substValue(E,var=subvar, value= rep(0,length(subvar)),reduce=FALSE,removeredundant=FALSE)} + IndexOriginalNumEdits<- c(seq(1,sum(isNumEdit(EInitial))),which(getOps(EInitial) %in% c("=","==") )) # each equality edit is represented by two inequality edits. IndexOriginalNumEdits shows which of the numericaledits corrspond to the initial numerical edits + ObjFuncCoef <- rep(1,(ncol(E)-1)) + ObjFuncCoef[1:(ncol(EInitial)-1)] <-0 + p <- FillMip(E,ObjFuncCoef) + q<-solve(p) + if (q==0) { + q <- get.variables(p)[(ncol(EInitial)+1-1):(ncol(E)-1)] # THE RANge (ncol(EInitial)+1-1):(ncol(E)-1) is for the added categorical variables + if (sum(q)>0) { + IndexInconsistentNumEdit <- unique(IndexOriginalNumEdits[q==1]) + InconsistentNumEdit <- rownames(EInitial[isNumEdit(EInitial),,drop=F][IndexInconsistentNumEdit]) + IsInconsistentNumEdit <- rownames(EInitial) %in% InconsistentNumEdit + CatVarinInconsistentEdits <-names(which(colSums(contains(EInitial[InconsistentNumEdit,,drop=F]))>0 & isCatVar(EInitial)==TRUE) ) # if a numerical edit contains categorical variables, the edits corresponding to these categorical variables are also selected + IsInconsistentEdit <- IsInconsistentNumEdit | (rownames(EInitial) %in% CatVarinInconsistentEdits) + EInconsistent <- EInitial[IsInconsistentEdit,,drop=F ] + } + } + return(IsInconsistentEdit) +} + + +FindCycleofInconsistentEdits<-function(E){ + indexEdits <- NA + isInconsistentEdit <- FindInconsistentEdits(E) + isInconsistentNumEdit <-isInconsistentEdit & isNumEdit(E) + if (sum(isInconsistentNumEdit)>0 ) { + isInconsistentEdit_i <-isInconsistentEdit + FirstInconsistent <- which(isInconsistentEdit)[1] + ForceInconsistent <- isInconsistentEdit[isNumEdit(E)] + ForceNotInconsistent <- rep(FALSE,sum(isNumEdit(E))) + ForceInconsistent[FirstInconsistent]<- FALSE + ForceNotInconsistent[FirstInconsistent]<- TRUE + while (is.na(isInconsistentEdit_i[1])!=TRUE) { + isInconsistentEdit_i <- FindInconsistentEdits(E, ForceInconsistent=ForceInconsistent,ForceNotInconsistent=ForceNotInconsistent )[isNumEdit(E)==TRUE] + if (is.na(isInconsistentEdit_i[1] )!=TRUE) { ForceNotInconsistent <- ForceNotInconsistent | (isInconsistentEdit_i==TRUE & ForceInconsistent==FALSE)} + } + insetInconsistent <- ForceInconsistent | ForceNotInconsistent + indexInconsistent <- which(insetInconsistent) + ForceNotInconsistent <- rep(FALSE,sum(isNumEdit(E))) + ForceInconsistent <- insetInconsistent + for (i in 1:sum(insetInconsistent)){ + ForceInconsistent[indexInconsistent[i]]<-FALSE + isInconsistentEdit_i <- FindInconsistentEdits(E, ForceInconsistent=ForceInconsistent,ForceNotInconsistent=ForceNotInconsistent )[isNumEdit(E)==TRUE] + if (is.na(isInconsistentEdit_i[1])==TRUE) {ForceInconsistent[indexInconsistent[i]]<-TRUE} + } + indexEdits<- rep(FALSE, nrow(E)) + indexEdits[isNumEdit(E)]<-ForceInconsistent + CatVarinInconsistentEdits <-names(which(colSums(contains(E[indexEdits,,drop=F]))>0 & isCatVar(E)==TRUE) ) # if a numerical edit contains categorical variables, the edits corresponding to these categorical variables are also selected + indexEdits <- indexEdits | (rownames(E) %in% CatVarinInconsistentEdits) + } + return (indexEdits) +} +