From 20fc928ad1e510c509c5c64d75ad97eda59cf2f5 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 09:02:16 -0400
Subject: [PATCH 01/27] Initial cleanup

---
 .gitignore                                    |    2 +
 PHESANT.Rproj                                 |   13 +
 WAS/fixOddFieldsToCatMul.r                    |   37 +-
 WAS/loadConfounders.r                         |  193 ++--
 WAS/loadData.r                                |  109 +-
 WAS/loadIndicatorFields.r                     |  252 ++---
 WAS/loadPhenotypes.r                          |  168 ++-
 WAS/loadTraitOfInterest.r                     |   37 +-
 WAS/phenomeScan.r                             |    1 +
 testWAS/results/forest-binary.pdf             |  Bin 5117 -> 0 bytes
 testWAS/results/forest-continuous.pdf         |  Bin 4309 -> 0 bytes
 testWAS/results/forest-ordered-logistic.pdf   |  Bin 4748 -> 0 bytes
 testWAS/results/modelfit-log-1-3.txt          |   12 -
 testWAS/results/modelfit-log-2-3.txt          |   43 -
 testWAS/results/modelfit-log-3-3.txt          |   24 -
 testWAS/results/modelfit-log-all.txt          |    6 -
 testWAS/results/qqplot.pdf                    |  Bin 4916 -> 0 bytes
 testWAS/results/results-combined.txt          |   26 -
 testWAS/results/results-linear-1-2.txt        |    1 -
 testWAS/results/results-linear-1-3.txt        |    3 -
 testWAS/results/results-linear-2-3.txt        |    1 -
 testWAS/results/results-linear-3-3.txt        |    1 -
 testWAS/results/results-log-1-3.txt           |   13 -
 testWAS/results/results-log-2-3.txt           |   11 -
 testWAS/results/results-log-3-3.txt           |    5 -
 testWAS/results/results-log-all.txt           |    2 +-
 .../results/results-logistic-binary-1-2.txt   |    1 -
 .../results/results-logistic-binary-1-3.txt   |    4 -
 .../results/results-logistic-binary-2-3.txt   |    3 -
 .../results/results-logistic-binary-3-3.txt   |   13 -
 .../results/results-logistic-binary-all.txt   |   31 +-
 .../results-multinomial-logistic-1-2.txt      |    1 -
 .../results-multinomial-logistic-1-3.txt      |    1 -
 .../results-multinomial-logistic-2-3.txt      |   10 -
 .../results-multinomial-logistic-3-3.txt      |    1 -
 .../results-multinomial-logistic-all.txt      |   16 +-
 .../results/results-ordered-logistic-1-2.txt  |    1 -
 .../results/results-ordered-logistic-1-3.txt  |    2 -
 .../results/results-ordered-logistic-2-3.txt  |    4 -
 .../results/results-ordered-logistic-3-3.txt  |    1 -
 .../results/results-ordered-logistic-all.txt  |    8 +-
 testWAS/results/variable-flow-counts-1-3.txt  |   22 -
 testWAS/results/variable-flow-counts-2-3.txt  |   16 -
 testWAS/results/variable-flow-counts-3-3.txt  |    9 -
 testWAS/results/variable-flow-counts-all.txt  |   10 +-
 .../results/variable-flow-counts-combined.txt |   38 -
 testWAS/testdata.csv                          | 1001 -----------------
 variable-info/update-outcome-info/toTabs.sh   |    0
 48 files changed, 414 insertions(+), 1739 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 PHESANT.Rproj
 delete mode 100644 testWAS/results/forest-binary.pdf
 delete mode 100644 testWAS/results/forest-continuous.pdf
 delete mode 100644 testWAS/results/forest-ordered-logistic.pdf
 delete mode 100644 testWAS/results/modelfit-log-1-3.txt
 delete mode 100644 testWAS/results/modelfit-log-2-3.txt
 delete mode 100644 testWAS/results/modelfit-log-3-3.txt
 delete mode 100644 testWAS/results/qqplot.pdf
 delete mode 100644 testWAS/results/results-combined.txt
 delete mode 100644 testWAS/results/results-linear-1-2.txt
 delete mode 100644 testWAS/results/results-linear-1-3.txt
 delete mode 100644 testWAS/results/results-linear-2-3.txt
 delete mode 100644 testWAS/results/results-linear-3-3.txt
 delete mode 100644 testWAS/results/results-log-1-3.txt
 delete mode 100644 testWAS/results/results-log-2-3.txt
 delete mode 100644 testWAS/results/results-log-3-3.txt
 delete mode 100644 testWAS/results/results-logistic-binary-1-2.txt
 delete mode 100644 testWAS/results/results-logistic-binary-1-3.txt
 delete mode 100644 testWAS/results/results-logistic-binary-2-3.txt
 delete mode 100644 testWAS/results/results-logistic-binary-3-3.txt
 delete mode 100644 testWAS/results/results-multinomial-logistic-1-2.txt
 delete mode 100644 testWAS/results/results-multinomial-logistic-1-3.txt
 delete mode 100644 testWAS/results/results-multinomial-logistic-2-3.txt
 delete mode 100644 testWAS/results/results-multinomial-logistic-3-3.txt
 delete mode 100644 testWAS/results/results-ordered-logistic-1-2.txt
 delete mode 100644 testWAS/results/results-ordered-logistic-1-3.txt
 delete mode 100644 testWAS/results/results-ordered-logistic-2-3.txt
 delete mode 100644 testWAS/results/results-ordered-logistic-3-3.txt
 delete mode 100644 testWAS/results/variable-flow-counts-1-3.txt
 delete mode 100644 testWAS/results/variable-flow-counts-2-3.txt
 delete mode 100644 testWAS/results/variable-flow-counts-3-3.txt
 delete mode 100644 testWAS/results/variable-flow-counts-combined.txt
 delete mode 100644 testWAS/testdata.csv
 mode change 100644 => 100755 variable-info/update-outcome-info/toTabs.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9bea433
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+
+.DS_Store
diff --git a/PHESANT.Rproj b/PHESANT.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/PHESANT.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/WAS/fixOddFieldsToCatMul.r b/WAS/fixOddFieldsToCatMul.r
index 84eff53..2fb25c4 100644
--- a/WAS/fixOddFieldsToCatMul.r
+++ b/WAS/fixOddFieldsToCatMul.r
@@ -28,27 +28,22 @@
 # This function changes the format of these variable names from VARID_0_0, VARID_1_0, VARID_2_0 etc (which
 # is instance format), to VARID_0_0, VARID_0_1, VARID_0_2 etc. (array format) so they can be treated as categorical (multiple) fields. 
 fixOddFieldsToCatMul <- function(data) {
-
-	# examples are variables: 40006, 40011, 40012, 40013
-
-	# get all variables that need their instances changing to arrays
-	dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
-
-	for (i in 1:nrow(dataPheno)) {
-		varID = dataPheno[i,]$FieldID;		
-		varidString = paste("x",varID,"_", sep="");			
-			
-		# get all columns in data dataframe for this variable	
-		colIdxs = which(grepl(varidString,names(data)));
-		
-		# change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
-		count = 0;
-		for (j in colIdxs) {	
-			colnames(data)[j] <- paste(varidString, "0_", count, sep="")
-			count = count + 1;
-		}				
+  	# examples are variables: 40006, 40011, 40012, 40013
+  	# get all variables that need their instances changing to arrays
+  	dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
+  	for (i in 1:nrow(dataPheno)) {
+    		varID = dataPheno[i,]$FieldID;		
+    		varidString = paste("x",varID,"_", sep="");			
+    			
+    		# get all columns in data dataframe for this variable	
+    		colIdxs = which(grepl(varidString,names(data)));
+    		
+    		# change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
+    		count = 0;
+    		for (j in colIdxs) {	
+    			  colnames(data)[j] <- paste(varidString, "0_", count, sep="")
+    			  count = count + 1;
+    		}				
 	}
-
 	return(data)
-
 }
diff --git a/WAS/loadConfounders.r b/WAS/loadConfounders.r
index 357dbab..72c5f28 100644
--- a/WAS/loadConfounders.r
+++ b/WAS/loadConfounders.r
@@ -20,117 +20,102 @@
 ## loads confounder variables from phenotype file
 
 loadConfounders <- function(phenotypes) {
-
-if (opt$save==TRUE) { 
-
-	# saving not running tests so we add a fake confounder
-	numRows = nrow(phenotypes)
+    if (opt$save==TRUE) { 
+	      # saving not running tests so we add a fake confounder
+	      numRows = nrow(phenotypes)
         data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
         colnames(data)[1] <- "userID"
         colnames(data)[2] <- "conf1"
         return(data)
-
-} else {
-
-	if (!is.null(opt$confounderfile)) {
-		print("Loading confounders from confounder file ...")
-	
-		confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
-	        confs = lapply(confs,function(x) type.convert(as.character(x)))
-	        confs = as.data.frame(confs)
-
-		## find userID column and change name to userID
-		idx = which(colnames(confs) == opt$userId)
-		confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
-		colnames(confs)[1] <- "userID"
-
-	} else {
-	print("Loading confounders from phenotypes file ...")
-        confNames = getConfounderNames()
-
-        #####
-	##### extract confounders from data file
-        confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
-	confs = lapply(confs,function(x) type.convert(as.character(x)))
-	confs = as.data.frame(confs)
-
-        #####
-	##### process genetic batch to create genetic chip variable
-        if (opt$genetic == TRUE) {
-                genoBatch = confs[,"x22000_0_0"]
-
-                # chip comes from batch field 22000
-                genoChip = rep.int(NA,nrow(confs));
-                idxForVar = which(genoBatch<0);
-                genoChip[idxForVar] = 0;
-                idxForVar = which(genoBatch>=0 & genoBatch<2000);
-                genoChip[idxForVar] = 1;
-
-		# remove geno batch from and add geno chip to confounders
-                confs = confs[,-which(names(confs) == "x22000_0_0")]
-                confs = cbind.data.frame(confs, genoChip)
-        }
-
-
-	#####
-	##### Convert assessment centre to an indicator variable
-	if (opt$sensitivity==TRUE) {
-		confs$x54_0_0 = as.factor(confs$x54_0_0)
-		assCentre = model.matrix(~confs$x54_0_0)
-		assCentre = assCentre[,2:ncol(assCentre)]
-		confs = cbind(confs, assCentre)
-		confs$x54_0_0 = NULL
-	}
-
-	colnames(confs)[1] <- "userID"
-
-	}
-
-	# remove any rows with no values
-	print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
-	confsComp = complete.cases(confs)
-	print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
-	confs = confs[confsComp==TRUE,]
-	print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
-
-	print("Confounder columns:")
-	print(names(confs))
-
-	return(confs)
-}
+    } else {
+      	if (!is.null(opt$confounderfile)) {
+      		    print("Loading confounders from confounder file ...")
+      		    confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
+      	      confs = lapply(confs,function(x) type.convert(as.character(x)))
+      	      confs = as.data.frame(confs)
+      
+      		    ## find userID column and change name to userID
+      		    idx = which(colnames(confs) == opt$userId)
+      		    confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
+      		    colnames(confs)[1] <- "userID"
+    	  } else {
+    	        print("Loading confounders from phenotypes file ...")
+              confNames = getConfounderNames()
+              #####
+    	        ##### extract confounders from data file
+              confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
+    	        confs = lapply(confs,function(x) type.convert(as.character(x)))
+    	        confs = as.data.frame(confs)
+    
+              #####
+    	        ##### process genetic batch to create genetic chip variable
+              if (opt$genetic == TRUE) {
+                    genoBatch = confs[,"x22000_0_0"]
+                    # chip comes from batch field 22000
+                    genoChip = rep.int(NA,nrow(confs));
+                    idxForVar = which(genoBatch<0);
+                    genoChip[idxForVar] = 0;
+                    idxForVar = which(genoBatch>=0 & genoBatch<2000);
+                    genoChip[idxForVar] = 1;
+    
+    		            # remove geno batch from and add geno chip to confounders
+                    confs = confs[,-which(names(confs) == "x22000_0_0")]
+                    confs = cbind.data.frame(confs, genoChip)
+              }
+
+	            #####
+            	##### Convert assessment centre to an indicator variable
+	            if (opt$sensitivity==TRUE) {
+		                confs$x54_0_0 = as.factor(confs$x54_0_0)
+		                assCentre = model.matrix(~confs$x54_0_0)
+		                assCentre = assCentre[,2:ncol(assCentre)]
+		                confs = cbind(confs, assCentre)
+		                confs$x54_0_0 = NULL
+	            }
+	            colnames(confs)[1] <- "userID"
+
+	      }
+
+	      # remove any rows with no values
+	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
+	      confsComp = complete.cases(confs)
+	      print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
+	      confs = confs[confsComp==TRUE,]
+	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
+
+	      print("Confounder columns:")
+	      print(names(confs))
+
+	      return(confs)
+    }
 }
 
 getConfounderNames <- function() {
-
-        #####
-	##### first get vector of confounder names
-
-        # age and sex
-        confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
-
-        # if genetic trait of interest then adjust for genotype chip
-        # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
-        if (opt$genetic == TRUE) {
-
-                confNames = append(confNames, "x22000_0_0")
-
-                if (opt$sensitivity==TRUE) {
-                        confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
-                        print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
-                } else {
-                        print("Adjusting for age, sex and genotype chip")
-                }
-        } else {
-                # non genetic trait of interest, then sensitivity adjusts for assessment center
-                if (opt$sensitivity==TRUE) {
-                        confNames = append(confNames, "x54_0_0")
-                        print("Adjusting for age, sex and assessment centre")
-                } else {
-                        print("Adjusting for age and sex")
-                }
-        }
-
-	return(confNames)
+    #####
+	  ##### first get vector of confounder names
+    # age and sex
+    confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
+    
+    # if genetic trait of interest then adjust for genotype chip
+    # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
+    if (opt$genetic == TRUE) {
+          confNames = append(confNames, "x22000_0_0")
+          if (opt$sensitivity==TRUE) {
+                confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
+                print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
+          } else {
+                print("Adjusting for age, sex and genotype chip")
+          }
+    } else {
+          # non genetic trait of interest, then sensitivity adjusts for assessment center
+          if (opt$sensitivity==TRUE) {
+                confNames = append(confNames, "x54_0_0")
+                print("Adjusting for age, sex and assessment centre")
+          } else {
+                print("Adjusting for age and sex")
+          }
+    }
+	  return(confNames)
 }
 
 
diff --git a/WAS/loadData.r b/WAS/loadData.r
index 127ea39..68d4eed 100644
--- a/WAS/loadData.r
+++ b/WAS/loadData.r
@@ -22,64 +22,57 @@
 # creates confounder data frame
 # returns an object holding these two data frames
 loadData <- function() {
-
-	library(data.table)
-
-	#####
-	##### validating data
-
-	## check phenotype file headers
-	validatePhenotypeInput()
-
-	## check trait of interest file headers
-	validateTraitInput()
-
-	#####
-	##### load data
-
-	## load phenotype
-	print("Loading phenotypes ...")
-	phenotype = loadPhenotypes()
-
-	## load trait of interest
-        toi <- loadTraitOfInterest(phenotype)
-
-        ## load confounders
-        conf <- loadConfounders(phenotype)
-
-	## add trait of interest to phenotype data frame and remove rows with no trait of interest
-	## merge in toi with phenotype - keep id list from phenotypes file
-	phenotype = merge(toi, phenotype, by="userID", all.y=TRUE, all.x=FALSE)
-	
-        ## remove any rows with no trait of interest
-        idxNotEmpty = which(!is.na(phenotype[,"geno"]))
-
-	if (opt$save == TRUE) {
-	        print(paste("Phenotype file has ", nrow(phenotype), " rows.", sep=""))
-	} else {
-		print(paste("Phenotype file has ", nrow(phenotype), " rows with ", length(idxNotEmpty), " not NA for trait of interest (",opt$traitofinterest,").", sep=""))
-	}
-
-        phenotype = phenotype[idxNotEmpty,]
-
-	# match ids from not empty phenotypes list
-	confsIdx = which(conf$userID %in% phenotype$userID)
-        conf = conf[confsIdx,]
-
-	if (nrow(phenotype)==0) {
-	        stop("No examples with row in both trait of interest and phenotype files", call.=FALSE)
-	} else {
-	        print(paste("Phenotype and trait of interest data files merged, with", nrow(phenotype),"examples"))
-	}
-
-	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
-	phenotype = fixOddFieldsToCatMul(phenotype)
-
-	indFields = loadIndicatorFields(colnames(phenotype))
-	
-	d = list(datax=phenotype, confounders=conf, inds=indFields)
-	return(d)
-
+  	library(data.table)
+
+  
+  	##### validating data
+  	## check phenotype file headers
+  	validatePhenotypeInput()
+  
+  	## check trait of interest file headers
+  	validateTraitInput()
+  
+  	##### load data
+  	## load phenotype
+  	print("Loading phenotypes ...")
+  	phenotype = loadPhenotypes()
+  
+  	## load trait of interest
+    toi <- loadTraitOfInterest(phenotype)
+  
+    ## load confounders
+    conf <- loadConfounders(phenotype)
+  
+  	## add trait of interest to phenotype data frame and remove rows with no trait of interest
+  	## merge in toi with phenotype - keep id list from phenotypes file
+  	phenotype = merge(toi, phenotype, by="userID", all.y=TRUE, all.x=FALSE)
+  	
+    ## remove any rows with no trait of interest
+    idxNotEmpty = which(!is.na(phenotype[,"geno"]))
+  
+  	if (opt$save == TRUE) {
+  	      print(paste("Phenotype file has ", nrow(phenotype), " rows.", sep=""))
+  	} else {
+  		    print(paste("Phenotype file has ", nrow(phenotype), " rows with ", length(idxNotEmpty), " not NA for trait of interest (",opt$traitofinterest,").", sep=""))
+  	}
+  
+    phenotype = phenotype[idxNotEmpty,]
+  
+  	# match ids from not empty phenotypes list
+  	confsIdx = which(conf$userID %in% phenotype$userID)
+    conf = conf[confsIdx,]
+  
+  	if (nrow(phenotype)==0) {
+  	      stop("No examples with row in both trait of interest and phenotype files", call.=FALSE)
+  	} else {
+  	     print(paste("Phenotype and trait of interest data files merged, with", nrow(phenotype),"examples"))
+  	}
+  
+  	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
+  	phenotype = fixOddFieldsToCatMul(phenotype)
+  	indFields = loadIndicatorFields(colnames(phenotype))
+  	d = list(datax=phenotype, confounders=conf, inds=indFields)
+  	return(d)
 }
 
 
diff --git a/WAS/loadIndicatorFields.r b/WAS/loadIndicatorFields.r
index 31c94e0..e069c8e 100644
--- a/WAS/loadIndicatorFields.r
+++ b/WAS/loadIndicatorFields.r
@@ -19,144 +19,134 @@
 ##
 ## load data used for data code default value related field, and categorical multiple indicator field
 loadIndicatorFields <- function(phenosToTest) {
-
-	print("Loading indicator fields from phenotypes file ...")
-
-	# read pheno file column names
-        phenoVarsAll = colnames(read.table(opt$phenofile, header=1, nrows=1, sep=','))
-        phenoVarsAll = phenoVarsAll[which(phenoVarsAll!=opt$userId)]
-
-	indVars = c(opt$userId)
-
-	## add indicator variables to pheno data
-	indVars = addIndicatorVariables(indVars, phenosToTest, phenoVarsAll)
-
-	if (length(indVars)>1) {
-		# not just user id column
-		print("Loading required related variable(s):")
-		print(indVars[2:length(indVars)])
-	}
-	else {
-		print("No required related variables.")
-	}
-
-        ## read in the right table columns
-        data = fread(opt$phenofile, select=indVars, sep=',', header=TRUE, data.table=FALSE)
-	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
-	colnames(data)[1] <- "userID"
-	return(data)
+  	print("Loading indicator fields from phenotypes file ...")
+  
+  	# read pheno file column names
+    phenoVarsAll = colnames(read.table(opt$phenofile, header=1, nrows=1, sep=','))
+    phenoVarsAll = phenoVarsAll[which(phenoVarsAll!=opt$userId)]
+  	indVars = c(opt$userId)
+  
+  	## add indicator variables to pheno data
+  	indVars = addIndicatorVariables(indVars, phenosToTest, phenoVarsAll)
+  	if (length(indVars)>1) {
+    		# not just user id column
+    		print("Loading required related variable(s):")
+    		print(indVars[2:length(indVars)])
+  	} else {
+  		  print("No required related variables.")
+  	}
+  
+    ## read in the right table columns
+    data = fread(opt$phenofile, select=indVars, sep=',', header=TRUE, data.table=FALSE)
+  	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+  	colnames(data)[1] <- "userID"
+  	return(data)
 }
 
 
 addIndicatorVariables <- function(indVars, phenosToTest, phenoVarsAll) {
 
-	#####
-	##### default value related fields for data codes
-	# get list of all indicator variables from outcome info file
-
-	# get datacodes with an indicator variable
-	dataCodeIdx = which(!is.na(vl$dataCodeInfo$default_related_field) & vl$dataCodeInfo$default_related_field!="")
-
-	# whether there are any related fields that are not in the phenotype data file when they should be
-        hasIssue=FALSE
-
-	if (length(dataCodeIdx)>0) {
-	dataCodeWithRF = vl$dataCodeInfo[dataCodeIdx,]
-
-	defaultFields = c()
-
-	# check there is a field in the phenotypes data for each data code and if so then add this data codes related field to the phenosToTest list
-	if (nrow(dataCodeWithRF)>0) {
-	for (i in 1:nrow(dataCodeWithRF)) {
-		dc = dataCodeWithRF$dataCode[i]
-
-		# get all fields with this datacode
-		fieldsIdx = which(vl$phenoInfo$DATA_CODING == dc)
-		fieldIDs = vl$phenoInfo$FieldID[fieldsIdx]
-		fieldIDs = paste("x",fieldIDs,"_0_0", sep="")
-
-		# datacode related field
-		rf = dataCodeWithRF$default_related_field[i]
-		rf = paste("x",rf,"_0_0", sep="")
-
-		# if one of these field IDs are in phenotypeColumns then data code related field is needed
-		if (length(intersect(fieldIDs, phenosToTest))>0) {
-			defaultFields = append(defaultFields, rf)
-		}
-	}
-	}
-
-	defaultFields = unique(defaultFields)
-	indVars = append(indVars, defaultFields)
-
-	#####
-	##### check these required variables exist in phenotype file
+  	##### default value related fields for data codes
+  	# get list of all indicator variables from outcome info file
+  
+  	# get datacodes with an indicator variable
+  	dataCodeIdx = which(!is.na(vl$dataCodeInfo$default_related_field) & vl$dataCodeInfo$default_related_field!="")
+  
+  	# whether there are any related fields that are not in the phenotype data file when they should be
+    hasIssue=FALSE
+  
+  	if (length(dataCodeIdx)>0) {
+      	dataCodeWithRF = vl$dataCodeInfo[dataCodeIdx,]
+      	defaultFields = c()
+      
+      	# check there is a field in the phenotypes data for each data code and if so then add this data codes related field to the phenosToTest list
+      	if (nrow(dataCodeWithRF)>0) {
+          	for (i in 1:nrow(dataCodeWithRF)) {
+            		dc = dataCodeWithRF$dataCode[i]
+            
+            		# get all fields with this datacode
+            		fieldsIdx = which(vl$phenoInfo$DATA_CODING == dc)
+            		fieldIDs = vl$phenoInfo$FieldID[fieldsIdx]
+            		fieldIDs = paste("x",fieldIDs,"_0_0", sep="")
+            
+            		# datacode related field
+            		rf = dataCodeWithRF$default_related_field[i]
+            		rf = paste("x",rf,"_0_0", sep="")
+            
+            		# if one of these field IDs are in phenotypeColumns then data code related field is needed
+            		if (length(intersect(fieldIDs, phenosToTest))>0) {
+            			  defaultFields = append(defaultFields, rf)
+            		}
+      	    }
+  	    }
+  
+  	    defaultFields = unique(defaultFields)
+  	    indVars = append(indVars, defaultFields)
+  
+  	    ##### check these required variables exist in phenotype file
         if(length(defaultFields)>0) {
-                for (i in 1:length(defaultFields)) {
-                        if (!(defaultFields[i] %in% phenoVarsAll)) {
-                                print(paste("Required variable: Field ",defaultFields[i],"is a data code related field (default_related_field column in data code information file) but was not found in phenotype data"))
-                        	hasIssue=TRUE
-			}
-                }
+            for (i in 1:length(defaultFields)) {
+                if (!(defaultFields[i] %in% phenoVarsAll)) {
+                    print(paste("Required variable: Field ",defaultFields[i],"is a data code related field (default_related_field column in data code information file) but was not found in phenotype data"))
+                    hasIssue=TRUE
+  			        }
+            }
         }
-	}
-
-	#####
-	##### categorical multiple indicator fields
-
-	# get field info, for fields with cat mult indicator fields
-        fieldsIdx = which(!is.na(vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS))
-
-	if (length(fieldsIdx)>0) {
-	        fieldsWithCMIF = vl$phenoInfo[fieldsIdx,]
-	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "NO_NAN"),]
-	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "ALL"),]
-	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == ""),]
-
-		if (nrow(fieldsWithCMIF)>0) {
-
-		# turn into variable format not field ID
-		fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS = paste("x",fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS,"_0_0", sep="")
-		fieldsWithCMIF$FieldID = paste("x",fieldsWithCMIF$FieldID,"_", sep="")
-		phenosToTestIds = sub("_.*", "_", phenosToTest)
-	
-		# remove rows where the field isn't in the phenotypes list
-		idxIn = which(fieldsWithCMIF$FieldID %in% phenosToTestIds)
-		fieldsWithCMIF = fieldsWithCMIF[idxIn,]
-
-		defaultFields = fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS
-		defaultFields = unique(defaultFields)
-
-	        #####
-		##### remove those that already exist in phenotypes 'part'
-		idxExists = which(defaultFields %in% indVars)
-		if (length(idxExists>0)) {
-		        defaultFields = defaultFields[-idxExists]
-		}
-	
-		indVars = unique(append(indVars, defaultFields))
-
-		#####
-		##### check these required variables exist in phenotype file
-		if(length(defaultFields)>0) {
-			for (i in 1:length(defaultFields)) {
-				if (!(defaultFields[i] %in% phenoVarsAll)) {
-					print(paste("Required variable: Field ",defaultFields[i],"is a categorical multiple indicator field (CAT_MULT_INDICATOR_FIELDS column in variable information file) but was not found in phenotype data")) 
-					hasIssue=TRUE
-				}
-			}
-		}
-
-		}
-	}
-
-	# stop script if there are missing variables
-	if (hasIssue==TRUE) {
-		print("!!! PHESANT has stopped - add required variables to phenotype file or remove relevant phenotypes (so that required variables are not needed).")
-		quit()
-	}
-	
-	return(indVars)
+  	}
+  
+  	##### categorical multiple indicator fields
+  	# get field info, for fields with cat mult indicator fields
+          fieldsIdx = which(!is.na(vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS))
+  
+  	if (length(fieldsIdx)>0) {
+        fieldsWithCMIF = vl$phenoInfo[fieldsIdx,]
+        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "NO_NAN"),]
+        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "ALL"),]
+        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == ""),]
+  
+  		  if (nrow(fieldsWithCMIF)>0) {
+        		# turn into variable format not field ID
+        		fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS = paste("x",fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS,"_0_0", sep="")
+        		fieldsWithCMIF$FieldID = paste("x",fieldsWithCMIF$FieldID,"_", sep="")
+        		phenosToTestIds = sub("_.*", "_", phenosToTest)
+        	
+        		# remove rows where the field isn't in the phenotypes list
+        		idxIn = which(fieldsWithCMIF$FieldID %in% phenosToTestIds)
+        		fieldsWithCMIF = fieldsWithCMIF[idxIn,]
+        
+        		defaultFields = fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS
+        		defaultFields = unique(defaultFields)
+        
+        	        #####
+        		##### remove those that already exist in phenotypes 'part'
+        		idxExists = which(defaultFields %in% indVars)
+        		if (length(idxExists>0)) {
+        		        defaultFields = defaultFields[-idxExists]
+        		}
+        	
+        		indVars = unique(append(indVars, defaultFields))
+        
+        		#####
+        		##### check these required variables exist in phenotype file
+        		if(length(defaultFields)>0) {
+        			for (i in 1:length(defaultFields)) {
+        				if (!(defaultFields[i] %in% phenoVarsAll)) {
+        					print(paste("Required variable: Field ",defaultFields[i],"is a categorical multiple indicator field (CAT_MULT_INDICATOR_FIELDS column in variable information file) but was not found in phenotype data")) 
+        					hasIssue=TRUE
+        				}
+        			}
+        		}
+  
+  		}
+  	}
+  
+  	# stop script if there are missing variables
+  	if (hasIssue==TRUE) {
+  		  print("!!! PHESANT has stopped - add required variables to phenotype file or remove relevant phenotypes (so that required variables are not needed).")
+  		  quit()
+  	}
+  	
+  	return(indVars)
 
 }
 
diff --git a/WAS/loadPhenotypes.r b/WAS/loadPhenotypes.r
index c689ebe..4276242 100644
--- a/WAS/loadPhenotypes.r
+++ b/WAS/loadPhenotypes.r
@@ -20,106 +20,90 @@
 ## load phenotypes from phenotype file
 
 loadPhenotypes <- function() {
-
-
-        ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
-        if (opt$varTypeArg!="all") {
-
-		# read pheno file column names
+    ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
+    if (opt$varTypeArg!="all") {
+		      # read pheno file column names
 	        phenoVars = read.table(opt$phenofile, header=0, nrows=1, sep=',')
 	        phenoVars = phenoVars[which(phenoVars!=opt$userId)]
 
-
-		#####
-		##### calculate part start and end
-
-                partSize = ceiling(length(phenoVars)/opt$numParts);
-                partStart = (opt$partIdx-1)*partSize + 1;
-
-                if (opt$partIdx == opt$numParts) {
-                        partEnd = length(phenoVars);
-                } else {
-                        partEnd = partStart + partSize - 1;
-                }
-
-                print(paste(partStart, '-', partEnd));
-
-
-                #####
-                ##### find range of columns to read in
-
-		## This is more complicated than just reading in a column range, because we need to determine cut points
-		## such that all columns of a particular field are loaded.
-		## A field is included in a 'part' if its last column is within the part range.
-		## e.g. for part 2 of 5 parts and for 100 columns, then fields having their last column at position 21 - 40 (i.e. its column index) are included in this part.
-
-                ## user ID always included
-                phenosToTest = c(opt$userId)
-
-		currentVar=""
-		currentVarLong=""
-		currentVarShort=""
-		first=TRUE
-		phenoIdx=0
-
-		# all columns for a particular field
-		thisPhenoToTest = c()
-
-                for (var in phenoVars) {
-
-                        varx = gsub("^x", "", var);
-                        varx = gsub("_[0-9]+$", "", varx);
-                        varxShort = gsub("^x", "", var);
-                        varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
-			currentVarLong = var
-
-			if (currentVar == varx) { # same variable same timepoint
-				# add current var to pheno list
-				thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
-			}
-                        else if (currentVarShort == varxShort) { # save var, diff timepoint
-                                ## different time point of this var so skip in testing but add here because some are fixed to cat mult
-                                thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
-                        } else {
-                              	## new variable so run test for previous (we have collected all the columns now)
-
-                                if (first==FALSE) {
-                                        if (phenoIdx>=partStart && phenoIdx<=partEnd) { # only start new variable processing if last column of it is within the idx range for this part
-                                       		phenosToTest = append(phenosToTest, thisPhenoToTest)
-					}
-                                }
-
-                                first=FALSE;
-
-                                ## new variable so set values
-                                currentVar = varx
-                                currentVarShort = varxShort
-				thisPhenoToTest = c(as.character(currentVarLong))
+		      #####
+		      ##### calculate part start and end
+          partSize = ceiling(length(phenoVars)/opt$numParts);
+          partStart = (opt$partIdx-1)*partSize + 1;
+          if (opt$partIdx == opt$numParts) {
+                  partEnd = length(phenoVars);
+          } else {
+                  partEnd = partStart + partSize - 1;
+          }
+          print(paste(partStart, '-', partEnd));
+
+
+          #####
+          ##### find range of columns to read in
+
+      		## This is more complicated than just reading in a column range, because we need to determine cut points
+      		## such that all columns of a particular field are loaded.
+      		## A field is included in a 'part' if its last column is within the part range.
+      		## e.g. for part 2 of 5 parts and for 100 columns, then fields having their last column at position 21 - 40 (i.e. its column index) are included in this part.
+
+          ## user ID always included
+          phenosToTest = c(opt$userId)
+
+      		currentVar=""
+      		currentVarLong=""
+      		currentVarShort=""
+      		first=TRUE
+      		phenoIdx=0
+
+		      # all columns for a particular field
+		      thisPhenoToTest = c()
+          for (var in phenoVars) {
+              varx = gsub("^x", "", var);
+              varx = gsub("_[0-9]+$", "", varx);
+              varxShort = gsub("^x", "", var);
+              varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
+			        currentVarLong = var
+
+        			if (currentVar == varx) { # same variable same timepoint
+        				    # add current var to pheno list
+        				    thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+        			} else if (currentVarShort == varxShort) { # save var, diff timepoint
+                    ## different time point of this var so skip in testing but add here because some are fixed to cat mult
+                    thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+              } else {
+                    ## new variable so run test for previous (we have collected all the columns now)
+                    if (first==FALSE) {
+                        if (phenoIdx>=partStart && phenoIdx<=partEnd) { # only start new variable processing if last column of it is within the idx range for this part
+                       		phenosToTest = append(phenosToTest, thisPhenoToTest)
                         }
-
-
-                        phenoIdx = phenoIdx + 1
-                }
-
-                # last variable so test association
-                if (phenoIdx>=partStart && phenoIdx<=partEnd) {
-                        #phenosToTest = append(phenosToTest, as.character(currentVarLong))
-			phenosToTest = append(phenosToTest, as.character(thisPhenoToTest))
-                }
-
-                ## read in the right table columns - a subset of the data file
-                data = fread(opt$phenofile, select=phenosToTest, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
-
-        } else {
-                # reading all data at once
-                data = fread(opt$phenofile, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
-        }
+                    }
+                    first=FALSE;
+
+                    ## new variable so set values
+                    currentVar = varx
+                    currentVarShort = varxShort
+                    thisPhenoToTest = c(as.character(currentVarLong))
+              }
+              phenoIdx = phenoIdx + 1
+      }
+
+      # last variable so test association
+      if (phenoIdx>=partStart && phenoIdx<=partEnd) {
+            #phenosToTest = append(phenosToTest, as.character(currentVarLong))
+			      phenosToTest = append(phenosToTest, as.character(thisPhenoToTest))
+      }
+
+      ## read in the right table columns - a subset of the data file
+      data = fread(opt$phenofile, select=phenosToTest, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+
+  } else {
+          # reading all data at once
+          data = fread(opt$phenofile, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+  }
 
 	## this is type conversion as used in the read.table function (that we used to use ((this was changed because read.table cannot read column subsets))
 	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
-
 	colnames(data)[1] <- "userID"
-
 	return(data)
 
 }
diff --git a/WAS/loadTraitOfInterest.r b/WAS/loadTraitOfInterest.r
index 712362c..52cf5b5 100644
--- a/WAS/loadTraitOfInterest.r
+++ b/WAS/loadTraitOfInterest.r
@@ -21,34 +21,25 @@
 ## load trait of interest, either from separate trait of interest file, or from phenotype file
 
 loadTraitOfInterest <- function(phenotypes) {
-
-
-if (opt$save==TRUE) { 
-	# saving not running tests so we don't have a trait of interest
-
+    if (opt$save==TRUE) { 
+	      # saving not running tests so we don't have a trait of interest
         # add pretend trait of interest so other code doesn't break
         numRows = nrow(phenotypes)
-	data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
-
-} else {
-
-	# load the trait of interest specified by the user
-
+	      data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+    } else {
+	      # load the trait of interest specified by the user
         if (is.null(opt$traitofinterestfile)) {
-                print("Extracting trait of interest from pheno file ...")
-		data = fread(opt$phenofile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
-
+              print("Extracting trait of interest from pheno file ...")
+		          data = fread(opt$phenofile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
         } else {
-                print("Loading trait of interest file ...")
-                data = fread(opt$traitofinterestfile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
+              print("Loading trait of interest file ...")
+              data = fread(opt$traitofinterestfile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
         }
+	    data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+    }
 
-	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
-}
-
-colnames(data)[1] <- "userID"
-colnames(data)[2] <- "geno"
-
-return(data)
+    colnames(data)[1] <- "userID"
+    colnames(data)[2] <- "geno"
 
+    return(data)
 }
diff --git a/WAS/phenomeScan.r b/WAS/phenomeScan.r
index 7d31fdd..2be21ca 100644
--- a/WAS/phenomeScan.r
+++ b/WAS/phenomeScan.r
@@ -42,6 +42,7 @@ option_list = list(
 opt_parser = OptionParser(option_list=option_list);
 opt = parse_args(opt_parser);
 
+save.image(file = "opt.RData")
 source("processArgs.r")
 processArgs();
 
diff --git a/testWAS/results/forest-binary.pdf b/testWAS/results/forest-binary.pdf
deleted file mode 100644
index 7baa6c453ed2e427c6a85b101f5743dd4fa97524..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5117
zcmZ`-c|4SB`xl|6B6}hp4k7WFF~(k&u|#&+$utI&F~cmf?-|)nXtz^HN3vy$5|S)q
zi%9k;LPgnmXF8|e^PcnjJ)h^F`+Hx@eP4fk@9X+Xnj`h)pz?5tWW;2|R0J)e&xr_u
z0tx`(!bOON1_ab4Vo+o(0gptHF@Q8u4W<B9R)8u(p$du!C1n{1XhLxNfA_4hWE|bg
z5-=d5d_1ve5@1Fl`%uVqH*+Gvje^Dy8M}(|it<o8_9GX80m(p4oq}NSZj4jFew_vY
ztpj{80H}*1qi_Td05nH=U`T)>-M|v~uY>Zx92j;CYa#|u{>vQHCE&^QB?(aZ;SO42
zNCXNIO^*lvM?8kX&vKl_;62Em02HdM0)YBh9GQLtP#;Gx4ibYVxM3LQCXwki^M?4R
z*WWTG2rD0KU0RkQSAYq~xqz2Brm4C5qMr3t5>#>1h$ET&iJNO1l1ja?Q*5ywb!knf
zcH%{;xya;RHIjFFjbhl@(_iN26f}d@*3)E{7g1%d#Y!976YmIp<+d-kCTypsl@&+%
z7c{Efq*b+G)S)L0{@)hbmm0o%jyeTbhje9yoj-bTX7c9d)6vtraidNy4NBDckMAe$
z?u4-UJ>L;nou>aoS*jx$!^ha3!ADR|BPfmE)#JnUYP@<T0&<(I!_$cdoyRg-ony7&
zBbQqoReo0#&;A^ZY+05#QI>soyQ=Ly4V)Nzzt_5A_lEJ-x+J0~UgSwo<SwaY>6m}u
z^R3fKVZJ+)F&Q4C8ynj*Eu33kkeRHH-yG4M3)3A+PZ#rBmRD1a_ddZMlR6J5UNTXA
zrC*nYKgE@zan4<~1|_g=od2+8kqcF;&slRTsA;cysSB40C$rovQW!ZH_v|24HW#b6
zSETZ&!C>4tzxLhOd_IfA7tE1;QGkJYze=smURK}%9c+AmJXidV4btcbh?KG1W$#%5
zG#vI}4m-<6DwY=dl8a-j+m7~CikDHdZ63~R?-iP!HzF7BI~#uSJw1%_ARYD6Yi71j
z%=6OYWj8H$8jOnN5G%Ys@4%Jz$s_lp_9q$c9gF0`VZEVVC#boNu|vv&iDBlp;FNV$
zIcx?rQ#LRjPeqKOKM^~7Q0ac_--DmOX<qQ#o0RSsph&E0Qucj*Igv0jk(R<Lpgf=+
z9AsMd#2z+0@s`RII+gZ)O<pkl^*|_fY22&5hrUhisCoS5nBQ7bb;V&!y+=KoS?DZB
z%0eaAu7A*`>nWG7EH#@u>l3RcQz8AqdBgpsm2EWm*h?zq3@_*y^ia)m(3r;?$vk`1
zLdocYU+%DU?%Z{A3cK!jqwmGG^>Amyck4?f#$i&i$)dWh@e`8!+uydX&dlwyCpEV5
z@`Sn3qK=CK#SU!s+`)*HWxc@$ExXv)`CQJNlZDTcImN`5(ZwCJZ7c>35aS#2&8RQ1
zHeatErO*0TQzxOE@zUGn+@0={8;UlQ)z50$m&L3uiqPohRUG290{HsLsN>fXhslQ?
z@YoM8X~5e8t#jX!bLX$C$otqXJ#te`elEydBE+MpQwsC!qS@8DFcXmPhL^i1FNoEe
zNY%PrnPN7$cJGA~r-l?~`hjFx@Q6Ioe_DunR)t3BQ~E={d3a(Ll|21M7FqSNc_ZPv
z$_by39qB4Zh;sQ-98M=5-P{b0p7hd7B7VWv4t_G?9jadL7P%mGr_%^t&uw5s;XJYq
z)t*^ii7+@LaqWTGA5o@6_KUUDb62FBZc+Vuas0VMe<1s0eci^DCTm}B*A&&t+PqL(
zW{oTHdsA67_wci(%<n~u#QI`L=`cKZUR$`T)yieZNd}fRYv((u?%0uwsVv(X8YZ9j
z74vUR_Lg;#i=1B8If;n!q}g4`vrV7k4fEfaocT(WoY5<*BoE!OGZ}oOmh2`to5RIj
zSv!V1S~|z5R@adlKmU~VE#!FG;0kD8*rrG)*1tIFFN$R_=${PtlVbm5R?ryhMgklG
z2BTU6&UEG_Q0NQ_1wa#w8y2NQ@CO_f=z9pb3IInaIRA}=86^6XH0fNAw!)ABN09#7
z&@*8C$<7c^pH9Glh6bn)1E7o`KPVj3^Cufvkr|JTh6W=XJtdAnwDLire;^>q(m)4L
z`f2`O(q}-7e2(B5U0od%3F8JJ7*@_dpAp9D=N+{fyp3lh0g)I#EE>ZI3YyZN2JkO}
zhbgHd{!fBuoaq<AXWY1CZ8gMCrPV9fo2ML(yVmXHX~}GOsNOJH{seUYVWVePnZyxD
zYagUVa{fM;^murlZgQ!3+~qwMrSp3Qk^R0tyh1n5xD^YIJqcLqOdAtj4fV>Zr@ix7
z3K^(ny1Es``uwcj{_ul2cDxg_EwPF59<J<r4t{20!7+{dl$Y!9ujw=2W)6OnfV<7J
zCQx6?0jzJH=sTUz$h4bvt;Sv)%4B{kVfO3VkdvT5n~J7wx@47E+pWiNlP(3LE^+%v
zm`y6_>q*Dp@3%3Ws>`rw4Z(I&*Cgii^oZ0{qAGVoLb9u%!0f)Y8d$?|bQJkajMfXn
zbg5vlDS}BlWki04WaWmFL#If?BITw6n8pp;&VTL&?K)^Y?VK%L9S-u`5(Tk>$DOQV
z5>t8NCulU2+4z<Y1rFuJ-zK4NZVTEAPY{{hX<PQZ0p5aT;_7cyR8<tb<-8lD`@@{K
z(o$CaZn#8;Eeaw<*P6cDhTcBJ^e7)wIkZ9b#_};a&+jp>+opE9f=qE`4HS+LUy<5i
z7t7|!lOD1UMn;^B_hfm=fFG7NvN*zY&j-^i8{RFwNBJ+Os-%#ix-QCi_rQE)^UJPD
zpY?+?S=2RA_8p1{Ki8H-Lo9er580l77C3vO((xrvEbDEZ7_GIeJ8PUxJ9md_Zd@5&
z<LbU)$9zxhs8*;5lk^ELV1JOCZ1+;gffoiW>z(mEw{K-^(js?4Y^%@iyyTY%OF|nU
zVb$CgU7zl3IWWTkCOs{dIW3_NEIiNlK7Phk&BQs*!mKSFyVuthIv&Y;bq^8{;0>?L
zK5{Lx*j1{Kjgrj;k3#qyAxEIc;VSI%5$&#fNA_4MaAbzde1>#r9ge>I@-Uwzl?>><
z6fV1LcU5Sbg;kf!T=?2PNnIU7W<IU?Y|BF7qL{fH_YaOAq{&BXnGWezWm|Z1Z}0`O
z|E91%ztQ0l7ml^u)l6kyQnWy0+h>@HsC;DAv+F)A!G{;4&6;kGbJ;;oM-*^{ALdfa
zjaT5}f3B~S>@TsOOYFf_?z_xbt>LR8cSURB43i69I`XpV=|RoUUOVtgl<)Fn?u{Iq
z97JA`1-p4h;i>*}{n8g9-?&Wly>dHyx;`+GPpS!K#LMg3W~X%@`%wOYYei*)k1Z+(
zod8Wb5_VSjIA1w)xnQ|aUy|x&!50ar{8GC=)CPp-;|{eDA6>vfA47r;2XP0nswag+
z{WeMVd8~5YNy9W&w)xz+gO8Mttq)JN;1llA!%GRAEz6(VzhKt%gE>R@oleRUljcd-
z|1jE6=LAYFC#73o#kktoUt5CvTdZlbPi}a1jNKUxq=uQWWP_xIp}Pb^CPG5YTWmop
zL;Rh1J&@X_@5V{gus=JhgFl`nHYWAdBKw5hiM`UFd2M9-rO79pEd29hZQ<4{)=q!i
zDzVG)NBSCmxzFv=W_YEz*}~zTbspLJV@z)a?A(yPs<o<aKshn~E9-j$-jgk{le!ZY
zanhU89@1#*=pr+WRZTyoUw_ViPUVoUVm@oPCH7tg!858thA%EI!>Gb2(<t$kFQ3e@
z9HAbT4<A=x8>kJ+hWa_Vb7ton=8nHcyp}C0c|B7YQ26DwQUCC3{r=JWiv1z|ef?GU
zFWn#O)56}uMte<P{M4iJ;0u|<JE>iGF+D|<Iy*8Dt<(ws4Q}hX*eBebKF~YxweMAL
zQLkFwH|w;A3pp7@^}3tU-|upqYc(;*F4vvC+l7vSG^HkY6}qiJ^T|q-ZHi>=x7w&$
z#lRPV7J-)o7XnvT_HUfrme@YFQNLlurp6}5#>!UAR(0r0%*iI+Cg+&#n3BYV#0v?C
zBNRbRn2jQbLYFdLsliJJ;j3)#-s?*2<VC-NnJB*V4jej&C|5RyK7b0rsGY|;SZC@w
zsdC-?ru;hbZ1F;8x%*OYH{mcz4!v?watGw<PG;E)+O;@(m(NvgczP2u@b5fBT4OHo
zcv^Y(p_d;N-<BJE{Nnqt(^sdlng_Ffv=F=pzP0Kt`chx!MbQgS8yg09qN4JmUgmV7
zdV8KhdrAksu*JWCpuOEQF6pQTs22OF&8!RzHQIj)ezPthkZ*j)IKDkE;KlOV^3mDE
znz<UH)4cON{J|<@RXyJFlJljcpr}n2&JfN_gVfgOg8Lptv)VKdmjM^?5{{BVoX2;g
z?=pnux3s$Epl<3U&wHx{(wHqxd8q33rB}33!sRiUu!^v&yNF$(Jq~+D!e_$oYqdVR
z9ucB_H=9-awKivDXQW=ErkIbR>&x#~>y{7A9+<s!IkG#vdm(4_c|y~T%ORpk`#BGk
z><>GF;;}f~^EABHwJq`nYl(Qhc)R!#@;35JZeSi?9#3vs-uYJl)@!YX=;8~uFI?jK
zVtJY=t>~$|skF%gfsK<RH9i`Rr*c##HpHj&0^;RQ4%nqPtgUgkNTy2~aQ9q+UXi=9
zq?(h?*0s_bey`HFpc=O*zsQrxPGBdbEWH9}%_M4$UzgG}@-rFD)VMvFX_@@h`We6V
zfy{!9f`NMtrUy*kW__2@JYKKKax?jsueBjp{{Br^rQ$M-THa9}SAMrW{pNxbLK4EQ
zgwY*}FRN>T57GL=n~`NMusc&XtWuYcE32&<W)d?M2(C3lUrpMjzJ@G6b%}6jbnbD?
zc6{{Mm3(jZ)yBE*qI(YlZRbnm9W)zW5IQ3&qbspqVbT@yC&Azvd3R^CWSgElxuxGi
z=N;qu;`CaS5!JZDx3R<0Z&a$Wi}^(+qQ?G`0>;J0vPNTs8GOUq^-kOC=R<~5Dlm^a
za&R=84x5RA`L`og$E0oLX`tv^k+OOcJKCTv>8AS4PfyHIwiZ?KkBrNV9R!_*oH6E=
zm2)8!qi=?ZwkAQv?hE<DgLMIQC7-^$>9Z*~XX~(vFL7P!41BuL#g-Qn7ji2^byEbP
zulzl%bECHAOy2#9k4>Qyp&uh+SwcFKIy>d=$YF|o9^9pTb$#C0KIR|R_wEJm0imGk
zrU)}tf+`Z0wzlxCNt+t<`=g|a5RWxyZ}!T(yF7DFqpGj3_ew4#&5N8m>R(aSdaqzA
zsC{Wwv`^E9<n-QuX#evoxjb2>Gg;}Uu2bedew^+@*Bb{erC-q`6@yO*pK;wo%d^3>
zjE}3IP6v#B+bTWKC6MIe=KQI4+UGv$S!nhY`=+QG+c7qEsxY+{RCit!%v27ZjN824
zXIo#AS;z0Q(ED;Ld1BsTkT7akEmq%FxBkIqqj<K8gr|+zlLSJlrZao%<2LWC3WdVA
zOP%Kc%~NxlYM)&jqtwwOrBbIcyDOnjK8UTOTB5s<%fG!fXgwYx1)g?U^WQo%kb1kI
z<+SXs(`Lu?(50vTTQCJab)_Arm94&#wDW~e-|Y4-VwT8nR<>;gOWa*<KCuWRjmyn@
z@7CSA#G_cVRJtU4`c~+N^&=}^77G&0vO-x_qn9`T)oQE$YC|D^sTq2Bx^RHS)8!0C
z+xp*?Ef%khCt-gA5D<xVcgN5r42DMG2*8y<Z!Dfd0#p&8FNHwH;4toF2ow$n-3WBm
zg+#&vpa&7<NAGb_XbPj%MPrF*inlus;}3vjEY6J%d85!o0v-S_5HSoxx+e;armG?l
z&<#sZMZ%H*P##YpyJ6e`koe<}XnN<5!=e6$ds6ToC?drhhoX=Hkl;bUW4!3uP(LD*
z7@fQG-{hPhz5dTe{7*gS$0Pt+Q7({w$U6*>K4O@nyy-`xPX|Bh@~4d<2Iq$%W6>xG
z__HnmqoRM8fBcCUcZdRAqJb#<`T&HIlA;pe4*bGkbdihk0r+1S6hWVZ{uhHP!RhkT
zKQIK0K5YC0Q&yzUNdLr?>0{0RVhS+mzv3ytRR4v+;fR0bfGflPW=AHXus95nuF(OY
w6*dq<zb^ou!MhXa(;1^4pe2Do2Ivld)`mnz5y?L)16P5nLL?>i%=97u0TT1?5&!@I

diff --git a/testWAS/results/forest-continuous.pdf b/testWAS/results/forest-continuous.pdf
deleted file mode 100644
index 301fd687901b79ef8ea88b4e83208c43fa93ca26..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4309
zcmZ`-c|4SB8@7Z@WtW^RFChxA84QZDOvsWoJDJ8iOwFQMgp56ee6)zNuPw)skS$8K
z%D$vXDJ?{HAxnMlIMwOb`M&qR=X&n#x!>#g{qF0Lx5gN&AXPOG@?j%kZ^Jsndfli9
zB%lURj-Np2>LNfRDuksIC}a$l4grdoeJC}gmKstWiBwZZYicPWKue0(|J}1E(21~@
zEnq^$`uY%XG{A~M_hrywH)|@zi-CivtX*|gbyXycZRDb`ASJ|s0|<!h#i|1Jvl;-j
z5AcNm&<IP%5-HvQXpQxTXn;CwU<>@?p!F{YmL1EQ3X$pG%|RmynGP>$fcAzvXbaIO
z3@Q$er}0}nmcfs9SU_ZNx(}ePg#<uj0+9|M05m4Ty}>{@iWkHxn?{FQMnd?fUADEf
zdZ;f@J6)WzOG3gAU-M$Je#>U#9EHJGp+-hFGS<5U)>o&t-&OI6k;0=XbFbPuc<Og-
zc9@aLNc*hb{X%be5nCrLlz*vfu_iz%aF^fOC$2{=S*Lg`1Z}rs;>`^1hINdQ{X-fy
z^>*B;#9#HRNi1qqyY)o!#5P;UR_az#CZCkMM*obwf_kF$=J1lz%i1D=0*ylM$894M
z?%yc6bRFjuw{V3osGpYJkK7}9`&l{?@Wg8%`V$fP%FTDNn}*bml#+qhQL~xOCf=og
zsU;aS9c`GJZAive9$11(MmFUi5X}8r<970F5~20u^zkDi3fzAhCVTAvn74XQd5*;1
zl(WUmaFmuhePvp{ZFp{u(VcF9<};!BX%}c})7)m?Hy}lD5>>MVi|3M|Hi#m`b7}o{
z&$2;vj=L#xJjPv{j2~#`mb>;3GF4?4D@av?lbzSURbu0M90)0I_E04y*ZtVntJen`
zwebIGiiHXDz4_m%!(ztYf9?l${$>nlPVk}ut^kWEwtzd#C<+5+2K-MgAuj^fkm3)x
zs=<3`4Q&9ag>wIeTr6_@AOp-{oE=05TtWC2!5JWbx;p|ihKU8}>Vn270LconK~3<G
zKi$NR&bmapx~y<;N+N}7=ZnQ{ARx`w#1PQ@Vg3)9Sr99qD|pz*$Pi0|yZ|)I%KgXn
zV6A?f(SSurGAjv)f&2(Kh!qq(0#hFRPF$3x4*GvGmsQhG=B6i~wznJD9^F~1Rcn1g
zAofz%NgrDd(`~h;NveC0yaHwq64_+Y7<*rgO;W*Tl;Z2qT%)8C+1Rt}HYF3A#4&xR
zd_{I9ANDE|AHEkb-F|IYdMWs1W^Lz7?`dX#HCy6Z1lL0gC*IJl*-j#FMw?>J$9a45
zuy37V<0P`Z_AM<n+`eqgk;)PDJf4^;yew8*%?Es0-P3z8zMkz{=A|lUStOfvO8n^D
z@_?JT*kf%y$29p0tH&vKH7q;S%sOP9!%+?wX>-c1L5rynzs@`=Qdhi{)-eLjyc^p6
z_PmZzU3`+KxY+3C<tkL20xp7nI7+{n@~%WY=m?rk@xqYm2b!H1Q3ZEF5f!fTHh}H5
z>EmNFJ)l!tr*JEOQ%9SNs*qe^jJSfEUDWxD!f|gpJ1s}!n%dO(w9fAuK|W6vcb0fV
zWy5!_Ig134#P7)Nf3B^gtwvHI)hYIcxUXHiu;iEQ5g9Tij*(t&Sab|d-Ntsi04g6?
zi6#+5+1w}Ct!vh!+dV<H*gJI$KISQ@>L3r>#t~)jy|;#j+|1cCPbz_9(<_;LAx6i7
zI&JG-PQS<c&tKHOKt~#RXp!-O1(?Q19V5P9wtmQrUY6$h%8=SFxF%N@1713WX)Uk-
zERxGz9|^~Br5Z-*FK1p|=5P3#H&B&)Zg5$!E7^%7U*-?}U@11mJ%Rx58Lz!v)66Z+
zCY)c|<GNE*(pNjfzcL*wExtb5E*Fx3Gr^!Lg={)LU0rkG&;ZyD>2r?j?;PV4ez588
z1Hno*{@0uw28uD8PI)3<hl?b#V*oLc(DJMum%@uYcVFjbWC?0SpnZ4H!*H)Pw0Ts+
zT0J)nvD>QgWrQltAlmc=BF{b&5Veh_14fS|?woZ>-1&}^%Sg~#;?ib$BSTXTQT>T5
z+v^fHqsFuGW3FS0^gpWEwi#7q+4u;phz9cPQsXVCceyP{<SO;F(%zg9DOUe@2Bj^n
z8eZ|>vM*<lz*MAF!<E;9PKbkHg@U01g8Opf)C9LbG&W4~m*W+bxs@oC$3f5^Oq9x#
zu8K8HDtzQB!hPrv(%RzEmM7ApXGe09vmLV0xi@WitkbU_=sVh{cmnZR(9-y1PJ4I9
z7#m%ApLlwls<C6%wJyoA(lNmW?G;h(h-_RuGGRxEg@l4=DMzVz>CWB+owMT2@z{bA
zr$6`gOH9OWYogvhPDI{CoDn!9bcSnx0yARQ2;KLt_AxizBQbj$kG^*C-R<k>D_kjl
zPw17vbUc64{7maCv}_#2AH4ZsLZ*zOa6Io{k*0=wuqxRXx{S5WE6x24<b*!Q9BK5;
z39XEBI;@M)wUUspleaO&%b}IR<YY)Ple^PpU&_`37atpY@l$o3EnXRt6*6Upci*?k
z+T*lmlj4ks!`?nc`W`nM|AH7t4f_Rqw?9*gowEHgr%Y#W2zfj<Jy+Cd<C1TmOSk_R
z)l-H#I$*40uVWNYN{yT2dT%14+!Qln^u{JuaaGY<5oaHH(+aYy>SOd7k2{ZRZ!=Oa
z;Oep^<d;!=BFdCRV`J0J%FHs%&ObRNswA1cvzv45;{s|0yTVx6e^lkD)zP|fg{SDJ
zdv6v${ct_t`s`D)zQL!)eXnk)_c8l=`zmgnzA@aZPe>s|o_u%WQ@8f5Svnsnp;clk
z?SfA9=um&8X1m5N4M(4;UWu->{+|B1-X}dbd-mmiw!ik*WOn+^TBFs-#XP>FEtV!(
zrADK99k?h&!^NbI>s|}U0=gz+ogrWSxjLd+J+L{@Ch%<FWZ=>Q?~3xe+`8mS?TQ`u
zK5iLqF76`kifyw|$_*k7?osPe#pmPCACE`xV2JCX9Mt*LI<&}|btetcr?i~$J)YEd
z5!@4$rTR-!;J{XNsg^nN7IG&lx?Qr3>qAX@v`W|ZBijw*xZ`$O2=!h}Z6HDkE<Gv{
zDqB=)lrx>hotj)prQ;PVJ|s#y`K1rDCF;1akDX61ZvIwLs>-vw&5MI>b8f>`w?_Rs
znPhKrOT`P^>E4VJ(#P-D*FF0h5s@45D7zio)BOP1UD7|x9oLM&k?`rK4fhA=6#4D@
zu+TqH@BAs~`4=&<0`sfpajm%l&GXCie~g~58n2?dO}OWiw^m>(YRR^z-A^Z+iCE?2
zXYyy5Tx^Lfyy1Ow)S%Pbqu)cen6LO5(R<NsQHj#{qO+#)Ojq=X@O!&S+OT7%)<DJ6
z(@#2IQO*u4g_MOPenWrT$?n2F6#60bhJMR~%VA7|yeuw*rw07t?cs;&^<;caJs&M5
z*354k-7<RmY<O2_*JSq6!}x~evrOp(Uj8k`ydgWV!Zrfk_d}~aABQJ%70cGjw#rUp
zQZa{f0&_)kg>$au9&7P$xzu8cD?0Aj>=D}=BizVn!M)9Wdu^mpY(;sf%2&7kK(_Xq
z71_6k0^(Ga`<>G2mY0Q^<kRF$gu2fm&#9c7*2zxe?pWvv%`Z1EtRzmUP6=o5P<SX8
zrk{YLR&rGemv`%#`B}co&`ll5uuYn?f3V$tOGe>JVSj$zku8=lG8dKf6l(Q2uOy|M
zvNshh;9W(PtIwmNOWR6gOY>UOu1vb2<q<-fkkLTgoth?%fzH0rM$8=#)YZ4ib{FRr
zwDv8TW>7QKD4taVbC#{U=a}>NJ;Ge--Md}0TyNj?r00)5SvlHuGyhhg<3zEli(Xwb
zr9G@XvYc=-M6pa&84RjY#k*T2IdosmDft{c;Tk6zd#FhZT}izDB4$wWxppOCs^F%j
zw7I{$n0b-;UbA7!2Xfu=<#xx*$C!f`%AnhA*~CtVHitL;6EB7;Bo!T1J3;9eQg>=;
zJUElb30L-C`E<`3>u6IEcia4qxr?~lfIDPeUOvubn0+=q?`V0Z2tQdc_^c+NruftB
z^InI-qmC|1<YLe1_Q3lq9o)H5vCI^v&Z-pJSZgt)eWkkUaPEz=j}5_Zf<K1Ea5CEy
z+S^sGsz61)xAGWso)7C=hy6o(Up5nOQ3@-rNO45VMN5TTTb}&fU=V%gKerRgnBL3o
zB%bozJmGP-S2|BG_sCBsOh_I0!@sPeCBN|Pnbzqg>0Uhtn%jH-0p5q_a)dLFe8@~Y
zaG5dw@#DK*T&;QFblN#RS`m1UGDGYdm>&)5O#is_>0rRC&ub-HI>Zt@yxc!kzw^C8
zdk~!UmS<IZAGaj;{%DElYH+{%lz4_#&`9j+<zC0y;*6T@zLPzV){@>#*gT`WGOd)U
zeO&Wp%weTyw1P(N9CD_KF)Q9>^wh?#UR~N5tg&9=J`U&|7}wi3<5?fEA2(F8`yljf
zA^6^y%ol7^WCvz`*9(&t1?Fz>pvSWR+Ts3-sfA4k_kMF*ZF@Ix`o8}fN=<aX=2y3c
zwcg@u$FARh{;g*Unx;QrSa%dJ#(P}3XA?qutujISR+DmCSiNexWP0zxl;E*1I~HcA
z3gfLZgE^NX=U4wVYU}(QLlNIq+95KmhzMj@rm@D>e^!nJvH_V!_yHh53;~aaU}?tE
zUao+KCP*TX88ko#4W439=nxUY(-BAw4bY1Mt4SJ-0D#_9tRFn%VsQ-CsEZ>|aSRfk
z2>Amboj~-0ArclxrH}#eI2B?U!k$<h4pw^z(2D@4q7i5Ss7j{Ly&yaQQa6gk!IM9c
zi2VikVUWGCR0fHNWzYeT;!Pn#C*f?^jmR|C<nI29Ox>9Ee+=S(>(mV;0NOE*(>G)*
z3xp-c5iAKV304OiefitQ6e9XTbOH{G0DtraV0H9YS?f=Q@CY>kiA1RV`~bA3rn)A8
z2YzBGSiG}7fcz6fqG28M8>XcW3z^?AZ6vIRe#3N7a2da2Xjqc}hM|!E1Jn5z#(FdO
z-B(=)`AaQyDwaTmsPNkZ0NN1(A^5xic$kc*!0L?E573rEp#!WpfsNkK=vXR!qca+6
NaA)M@4_O%_{tLj<Kkxtm

diff --git a/testWAS/results/forest-ordered-logistic.pdf b/testWAS/results/forest-ordered-logistic.pdf
deleted file mode 100644
index 1ad0d41a5402b72b1405e6ca6d2133abfa8f5b70..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4748
zcmZ`-c|4T+_ZOi?*_T}6Nr;Naj4}4YO!l4Z+cXA~F*D5)$-eJ5lv38n*7a2(Ta+Zp
zGPV?1if%-qEQ!?jnYMes_x^tK&%B=Z=X}mNpYwX1_v^fmoHfQ!39gKU$wf{@PDe5$
z2i>SJIG_TMPn?A5=)gcU6_2G8$RrGwjt3MlS_l=mx(Zws4p&h{si`Z%Knt?>|9fUn
zpcA26wtx{8OYtM%Xn+-iPGQiYYt~e<Hv@;KvQAZ%Rh8io_AM5f1u4Rg9D(6U-mEMT
zzp?>9`!f_g0HU#UERpOBfYw-FJPlBV3~YhFE~x+O0?UqNO~sSwKg>ZinM8*^X@JJJ
z>!2;3MrKfPkU!)<{ILvvmctxR@}>I$8gLZ=G$at|&;vk2B2*g;9!K`Zv(l!~p^^o_
zf^urJEUYBqyW$p?4^hXO>rHZ)XwbSWX`?W!d0Tu^fE@qx3gWddTcg?DQzJ5kg_LKJ
zfh!u86DE=pjFObb<9FkjE83&Gs@6Owq{`>F3iSOeU(v$1a6ycdiIX9Yag*g=_SBy8
zx{ByW%bzTDC|$IixZ7Hw?5O_y=^o{${<RHzEm6$N%1(OE2S+qYs^8-W8n*W*(pyTF
z!AxG=Jz)1A9=(~V*=bBgyQ`~RGoRiY1xqw<sJgi+R^E=b@m*=!`w%J3g;R4vXYcZ^
zI-O~`jpz{A_K9P8#)fa2T}GC8d75{}lbU0CMGL|LX}*dH6`TUQG=wovqKrjrgfLHX
zZ;Tr3$n|sKU#UusDs$mabGW$OrWT0exX20p35J?0B~)lfj-rLr)RcBe1T_U-J*i*M
z=BzPX0~l-vCH-Ca72n^Uhx|}3_eL(&5cHqzQqT!-;a5hB=Ao1l%367>u83k#Ua_SX
zf%hdiOoB#hfW9ap=xoRD1rW$1BeVS_VKC|u?;lGwZ!EWKEt{}>`}n3nac>qc)JHek
z)xf9Y2I54-qy|lQ|Gs6c(i6}0;Jc00M>hO+CuPlyYV<+%(QLhJ;4pqM5kJq_sT3eS
zaaty%b4=--sJiO7a%bAgf$jj$jJ?hANE)vgo++@>t>u<6j-Bc`bbmbKt+LOO29IaP
z_;afEW8FI0rqX$Z4(IiZxBB}p9d2urnb|9{qtdn^Nnzu}Dxqoo`lecHnS*Ee<Yza}
z@*Zb@AzqhQAPHS|abtg_!9Cks&!|0>?4h3$EG{0pD`>KuHoCf_@#vWG(4|B_oOw){
z=kVFfIjUJr;wN|2hiz@%rtgQJUp8y2FE-`2mijn3)c+NfOeYzS78Taq##Obgh7aky
zNlX~ic-wgTMF+_*sr*6lfhiN7zz~r?5$2DXEtD@|A<n@kZ>7rzq&MClqHj*c7L4Z3
zi-9i7tzIX-n-e^94x$31CMchR)?)44jYA4AoX&5W8ub+(Z*-L}!>cu}^~lMvmzbu&
zbIWk%4+cd^)N)Ovc@3XZ$eP3#cJ9T;R#2b+=$RiK&uZxJ4fnI-zc)H)M)0Nqt^ljS
zZ2@<P2*?bmq2T~%f%hh04ah-&s|s|6LTUgAHBI-waRRHSfA%m$Jvci&9dHF9@`EDa
zgXr!s&=BGSKt~5OL;z58|6mmGSP<RFj?Usf9UYcCC?t_gwWDBh-w=>yYh(bZ{WSlJ
zgIEwNo-1gAMjK#hcy9p3vU2~)YOJrHZ`5b;2#FO0#Nba8aCnwe&=O)Q@CV;OuR;Bv
zgu}||7vbb)1=`z<h{iLU)tjv|_><1}`TN<j8}Dv5PFFq%-^p)!?;M*93S&>f*rb>2
zKq$P3C_<;-lu3%+W^;3XyEta(6h&Af%f!1<eDcnj#hxpZ(jUY8^P8FDzKfy5jcn&W
zM|0jcciI`TtI$dK<xG1*YO=2v_qJV2Y#c<k7nJI11JQLu_H6c$XDP&Np>?t5Mqc34
z=E1?EDXnZ@^UpUp%fQ*JFQv?USRZi{7wgo}b<B~gv+BHb8)?z2V%jU?9EEVmq<uK-
z8nTv+=hIw9#OR23(R!!wORvTbOs8rJwxpzciHptbSZ_eI$m62vCUJTX$**pThghQ6
z6f(w?U(@WoiAuN(1w@q6^cl7n#+{x^1E5nkQ>crty|>#%Sx~k#L0sO=E-p1wDETFm
zX)%-B-mSu`p1OYu{w!PES@I>7&4>BfS@=wV_$?XjXBwIsDgjCXEeb>7?w_w@d_0}y
z5fi>3j*(t(TXPJ{-pzKS1YbL{5g$MhVRN6~X5F+E-{S?cCEaRa@P?j}Y7FtPeK2*{
z_t5pRF*mbq`TmOF?BYf~Z#ddBglXF{zW66LXgO0OgAPY~sFQqxOE3=}^-fVf?RuRb
zzb?)Fg&`#>@L9Gc0X%;U(^X;)m}k|xJ`zgc%r=PAThG6|&e!&(c%&gKadcgvFUyJj
z+P>fP!lc*~4hjG}&w3x~TMXrSXvFcUC%HfSQtl=*>Px6&z4@0%qO##>I3o<AUeKoZ
z?d8ud>_~v^m>$QRp2RGN(7o-q?+Mhi@x9<+*H=i`e##5}B1-t&HVhyp98p`a=X_M9
z*MTxFMu7k_8b#SdkHoz|YH%w@c6n_d+h(i6n-`(D1nbu0kBNQ6FJc={2hfisZ^b&D
zlX%6!i59SyJikK@ZD7nUqBmb)TP9f%H&^I0>pH7I|E-a2H@dFC#!qlVB$#`@%FdEj
zmm2~^&T20!jU8z*Vy&G^2n}iFsJeR>C>$aD3o%x07hecC!H!0j3PkV=XkATK5fHs^
zXpkNxyHjA_^>c#7>;%2hb5h094N1o7rH@>NxsDx!TbrNfc`PjwJ9Ray(4i1jRAIwy
zom+Nf==hMrN!Y4@g`xk|p8nohHu_;L@!Vu(L&t(EeS2rCX9ZR?Hbl6h3vns%v_0YG
zlJX+e?A7Ac5`$@)vEmO?uq8L0{?HnhoKM=_PQ7u02)_+G%YRnzET?u_X!QOmI_0*8
zr<;yt!l4JpU${^XP#h^j_2PE~C-@gr_}Z73y58g04MX_Cb{tL1-=`pyvhzPN#s&wm
zN`)DHh8kw|W<mP0f~yIZ4=7h7>f@YDbTB$rl5#C_HpV`(D8)$GeF6L49mtg#muUtv
zI}N@0s5;K(69y#t{C$%L?%EU_bUL_QVM*BG(2xTCpqoulNrEHNe#PGHk4sffg+Z88
z#_!7oJvxmOD<9anT(d8t+rNn$s6iYbG1RozM4zdqCV$}k(@6Mmd%_g@rA?B;rh=~m
z&OWBX3UAji#27N1bDq=KjaDt;?6W0Yt0DVE*C>i4CFPpdnC6+LK0YO)xVKQEpJVpT
z3StAh!Pw9~u5{e$c*~sp6V#JK6;)4Omz^nl|HO1?^oilnM7iov=+NL$U3p;n<e(nm
z5+TO_)ycR08rR>`c>~hABo}frG~;K+hGW!vko%F2ehY(=eL2Ge!yg784^#|j6|LG|
z`Omw;+=^!OX3SbK@9_=`qk?MmOmQzR4%U{L-dpCq0xzMfF}4_TjjN5(jjF*9gKdIi
zgWm;zT-mvCcuRI`??&^69hVl@J}yqKO0K%y@8b@)3Aee&ZN*ikrlg)oLG59P>mnRf
zc~yGVNop<r2B=f&&OQTP)E;5nV}yn3ctG&TE>yL;8T>k20ukS{x0~~IQ%}57pQxp%
zK{8jegt_2gW_BA9pXM^46s^Rg)O0xCS=_1JHK2N~Zo@BtoJ$(_3+;$IA>?Q0H;7xl
zUYV`*^!CHGQMV6nlMUBrPBTMEzNC)2=eWSZypz%=?zXl({SqBr6#b~M2RqPz58i)s
z_&rzhLl`c=CpXYQ`;2DgX|2~Q!y~QEZ$qAa5)&&iyKI)+RdnXz^7`^`GpP-84OF*z
z_iLnGb(p$ll5L=SVA|Q}O%A?LzC5GMj+oMN--;Q1rmx4ahfEc3)l;JHn(3M%`N4B$
z(}S~p@l!&7+P$MqIx^Kq>YfBXW=@b}Cl$kM!q0t0eU;eevTZElbws&d$Gr=Yq58!I
zocd4n`J#HFj<xFUqZoTVT07UYynBXcCNMUtFQV^V;m7+aZCSCQ(rG*Sc&c`W@4*V$
z@b}-1X!Pog%Hph&X_o1dS;S;xOs)nOi4+N4y;9`a5!7+M!x&e2!ttR;(qMwn14ajK
zx@h{!RH@j;;jspaPV13Ejh7oT)5p#vD<2+q%4u0&7i^cykuwtPPlP8bB`#_f=5Y0{
z3`AV3H7l(rE+{Vu<#Cg_$r+1}!5J&r2KfsIbWKlNOyucgPvzOBf3UwNYR{8bx=}iO
zt;LeZ;(7j>qON?iF2}|6OQ-CO1xj{qB5GBa5%JaC)k)RGT{#!uxuN7>f@*m5Nb;?w
zcH{_iDB=O;mIvbUbe3J_vb?(1N8>zdo(kEkVdR5F*MSeA%Xd8@U0U7yT?<@q-1eeh
zn|Zu(yszTg^<c;ODrFblmWSk?$l91%f`7O|jq+hIq(Rxo-74Lo|MJzFt6}r5$s$R|
z+SO6@#IolJqYBS7>In-a6&BKFL2_bdm1c)bC&{l#E$bJ094~l=j%L*0Z*&(DnGW3!
zFNf!!kJar}a8zc3($A%CHPg6pMxALFwJ*NCV~usRsY||LcFW8~+-<}iZ(UnE7s@bQ
zHBNQ3I9uuSu4MFS)0w8Kx9^_~I+PxFboofC@>=W(zPr)ORTP&LdMQ+MQwn9Mz82oI
z(b!;8R9^F@E$n63o5%!?(4MrO9;M4l_)5z4V#Wur`>kD*LE(er4~f^wrF9pj*yCm6
zrNXbQzgun7k3akSjkKCj-*xu@?%JYap*gn+%_kQI<ld#tOC9+wsHU#tTIuxJuEmei
zgSrkhw?BhMcHU3CDwJ>eIzQ*g1;*T)H?Ib9&1S)iIf=ToO7ISOiP$%?JQKpqee?0{
z(K8dPpKtQ?ilupYyT5IGMJcD<3oDrB-jvqj+RLRKFB#tmYP&Cp=c$KGC2d|9bZoB5
zYZ9fr8+i0N{pGyPQ}Tpy{l4bTrcbjD8<jJ4G!k>nnI;xm_bP9oIcf9qM~N`x)=l?0
zK=;U;uGW%QYqU0Q?B;=^_^&HrcV_o}!nVisVwU$mH|mfNJpdl{SP%MaGMt%R+J5xV
zSGUdXS0jOUgFYivM6}huxUGC1th(Y^cK6xWfd%{`{n^Tvqj;5%$HhA~;j|Y@^8sI*
zE(HpyHZ0y;JaqI@*zBi0EAJOdQ>^mCI6lTKZ~kl4*8DYw!hVSBV<bq(5=fA`W{s_X
zeK`_H`Xn0RCjbL61Rozfq^Mbv*%d&lfdK>(g9d1#z*7t|9Z$sj&|z>S67(iRO8^>;
z0D!(!>}hDm#o`#OQ5Q#`;urxwM0^kc(g{Rw2nxXBsALiVo}l7chR{tc4hM;J80bxa
zLeU5`08}QC>E3uB0Hl6P5(iEGL?ZTYxF3V$i={FGh*$<40Li{&65bz*hW+N5#+uyS
z|F&KDHtYW!#J{f>z9|9Fj&XwiZQH;CA&Fs$4S-UD)WNs9e77;i6Hnvm1RNFy{;UhY
zs_5Uh5kXYE4@?Ds!(l4FegH~MO;ru>0e)c!Xy?NE0i<6T90lp1|Ha^N^?zdsHAqYR
z!%h_iEiL|mAyxhz0|AG&EB|Y!g3$aIhE##%$3N_ls%n4Bg-*p1h<GZrG6Fz5LNFeB
rF94d5e8`YGW7Pw+C6nm@YnAh@HZ(exO8-_FBvMTUCMS2y$`JNnH6$Be

diff --git a/testWAS/results/modelfit-log-1-3.txt b/testWAS/results/modelfit-log-1-3.txt
deleted file mode 100644
index 367b2a8..0000000
--- a/testWAS/results/modelfit-log-1-3.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-[1] "--------------"
-[1] "1"
-[1] "--------------"
-[1] "3"
-[1] "--------------"
-[1] "5"
-[1] "--------------"
-[1] "21"
-[1] "--------------"
-[1] "23"
-[1] "--------------"
-[1] "24"
diff --git a/testWAS/results/modelfit-log-2-3.txt b/testWAS/results/modelfit-log-2-3.txt
deleted file mode 100644
index 8556cbd..0000000
--- a/testWAS/results/modelfit-log-2-3.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-[1] "--------------"
-[1] "25"
-[1] "--------------"
-[1] "26"
-[1] "--------------"
-[1] "27"
-[1] "--------------"
-[1] "34"
-[1] "--------------"
-[1] "36"
-[1] "--------------"
-[1] "38"
-# weights:  24 (15 variable)
-initial  value 1039.720771 
-iter  10 value 681.237234
-iter  20 value 640.982533
-final  value 640.972945 
-converged
-# weights:  20 (12 variable)
-initial  value 1039.720771 
-iter  10 value 929.343312
-final  value 928.584224 
-converged
-[1] "--------------"
-[1] "39"
-# weights:  30 (20 variable)
-initial  value 1609.437912 
-iter  10 value 985.695843
-iter  20 value 754.544728
-iter  30 value 672.672425
-iter  40 value 648.291452
-iter  50 value 645.548335
-iter  60 value 645.331365
-iter  70 value 644.462326
-iter  80 value 640.987036
-final  value 640.974782 
-converged
-# weights:  25 (16 variable)
-initial  value 1609.437912 
-iter  10 value 1480.736936
-iter  20 value 1478.821800
-final  value 1478.820650 
-converged
diff --git a/testWAS/results/modelfit-log-3-3.txt b/testWAS/results/modelfit-log-3-3.txt
deleted file mode 100644
index 8fcf9b4..0000000
--- a/testWAS/results/modelfit-log-3-3.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-[1] "--------------"
-[1] "41#A1"
-[1] "--------------"
-[1] "41#A2"
-[1] "--------------"
-[1] "41#A3"
-[1] "--------------"
-[1] "42#A1"
-[1] "--------------"
-[1] "42#A2"
-[1] "--------------"
-[1] "42#A3"
-[1] "--------------"
-[1] "43#A1"
-[1] "--------------"
-[1] "43#A2"
-[1] "--------------"
-[1] "43#A3"
-[1] "--------------"
-[1] "44#A1"
-[1] "--------------"
-[1] "44#A2"
-[1] "--------------"
-[1] "44#A3"
diff --git a/testWAS/results/modelfit-log-all.txt b/testWAS/results/modelfit-log-all.txt
index a23f31a..43a0e86 100644
--- a/testWAS/results/modelfit-log-all.txt
+++ b/testWAS/results/modelfit-log-all.txt
@@ -54,12 +54,6 @@ iter  20 value 1478.821800
 final  value 1478.820650 
 converged
 [1] "--------------"
-[1] "41#A1"
-[1] "--------------"
-[1] "41#A2"
-[1] "--------------"
-[1] "41#A3"
-[1] "--------------"
 [1] "42#A1"
 [1] "--------------"
 [1] "42#A2"
diff --git a/testWAS/results/qqplot.pdf b/testWAS/results/qqplot.pdf
deleted file mode 100644
index 209a76f011d0886e31a67cbce6bea5d510caf67a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4916
zcmb7Ic{r5&`xfCCsSr}+P1Y>4VC)i;HOZ2lj4_yunQ0bVc0x#@M0P@vkS#=HOJeLL
zDyghliV&*bJ34jF_nhDNkMDb3bIpA2_u1ZOuJ^v4`w>5l(vySBD?-FWCqmza(nALv
z$PhRH1Bfo?AQ~DFs4f|crs9YM6q<?!q)=)I7+e_!SAfG|3P>eoX$aJW==xvpS>mX8
z(90aqC!<LoI1B|arBO*VD(H5aOmwATuw=$v1$hN|IEZcMA~GOph?W)vOK@dWf%shw
z0JZcZVF9Qvnu^8~-2v!nv^$mpD1Zj$z+Vo^|8ij1F|5g00`*UGs4kH}1&<U!W!oKU
zj-?Q3WDFQj@gMOR2EW>I5=(HWdH^a&1OU~;;i=#QK=troZ%|ka(G|-mn?eO!<^}Oh
z&$l)q@~iG@q?az0YmT?aFVk3?jjnr(SC>|@kB1l?FS6s8D*iBqF_lR2D=wlw&KdZ$
zxo#*lTCTZsN}I%Ww($89pU!c~s~RyOSDjs!F2P^{^<lx|%T({RH4*yTwl>ssI!le8
z)5CR7atia-u?9c(-C8GiPHp&TCP+7BB}GKv<y?N)C1*h0FB-izePpbut;j5R=D>9`
zCFYIn0}&tHxO8kk6eXCYlM&{y%O_UDgxt796x?RLeKlcUON34GM*0-%gta7O$L5y|
zdqG4?Rrbzk-+O{SZSq4#og5(@x9D>tt8gl6<|7}Dx?5x@BZ|1U%r%hmrOiU;Qq$d>
zXwrj=Nof`;4)mgdu9nj2bPmHGX02D*rkm4bQub?d^)@J|v3|L<tG9D(r`REj5!1p6
zIkV2kT`Vit%BL4HO<tN_*^&2n@+(Z+njV!)z2XNfh-oY?tn4~cKbKP_boEHwk$ad1
z1H+jE-$U;nDKSjUMp2Jc*GIpme+*5R5yTvl7{DuIRyR9sVQf!2<KCDII}CYdvh-`;
z7pu;7dmrUeBAlnZQ@M9<rQ12|y?H%Or3%O5qjrHBk;VI_*wDAJx$pU$TDhFk&s6i3
zx^&pyudR9;`{I1*NTO?-!1-|f=4Zknd?mgj^WMA3jn_;I>huFmBQp0A`b6uQzL;Sh
zjW{2P)nDNkx+tsRX(J-e%b6%t?#HgKaQ~P~*T9Hu&&~E^VV^SN`Kg26L29FO5jnX2
zk;)EFB=1GvR;LClcgcqQ9*M}Km(HXkqBakmOi3FwGw&cdy*)8D{<0=)&X-TUhsVp+
zb6u3MWPW$L6g)4qIAk1bj21o{Kd{t2Q+;oAeQu=vvQCPqD^a2wec{Rc6>`bh4mEtl
zj%tD&&8By?BCSUY*@biV>B%=y(<rNdZs;yO&iJnWFn4=mb72(s!<#42Y$$&VX@)BE
zh_BwI!k_lJZHzOhJ{QjF`SF^7=_A!zwV}tHlE)ey9D^QhFwqI$zo8c9J8#q!7A}sH
zzM~|kiw}6^-7B71{{DG0c##;NY#(rcQRa4r>7;w(^Of02HOcypmuF=-x_pd!kVB&x
z=VIAA_E0UIH{;4wtK}PItzbkg|00jrhRh)5`Op`wAUFK^p8d%Z3=;VHlKx_epX2~F
z#<@}edw@X-=71we9z+^Q0&oCof_25Ab%?%zJq*031XE;u9RJ263?BIP-h*_7vA|LR
zdniblU<Rx&)e!>K133iH&;Tg_4!{{v%)y@s0L~!LZAOBg@TKZoP#N3_nt|j1rpFV>
z79=!g8-Y^H^>qNHU*>;t7z1MDwuc((>gu2=SXThauyXvxij32*=RMBgAp#=_6ovK1
zVX%y#&{H7OLI30{gpw-q|0OI&O}_~%BiYB&VuU}6-k{uYI&p8zwO&sTb7ljs27?6o
zqi~MBMom|lM3E><63Q&0;17h<cu1aZLb+(n#T{nlb2|l5gXc;64kjDAmI%Ii=(pIN
z_C{na$TO>f{>ptZaJY`?>Q*@G<C8WVA-i&H_Pw2Ii@qG|?##Yp*Af#8o@ty^QK7@X
zuE(6p9PlCzpUS%~&``G<__}#?;8<KU(~qocwYH*grqd~LQ=ivI90UbARWz;B#cNDE
zQ>qnBdSFI9qPAfOt0c;2S^I$1RO}wrWkiIAU>Bul0=x8nO!D1jRi4JU1ZP2ksXx|h
z5sea<aH?UXc02KXxnRI4B$HI)nEWKg!WA!vNt8l_$-VPq8aL=XyVMW0c}C~$+SAta
z%ub$1tT0+o!oec)auRRsTRPojDz@zzY`5~|BNOl!se-mbZ^=w<^ex+ceqMt2Mb%%Z
zsH(uc<h&ZC27?{9(h}Falbs@h7X(ov>n*F+L8)9!Wd+!(k&P%X93PY8+>X=r-=eym
zp-eIN8)>@(&mXD_a58V5kad?S9vgEo-jU@g4V_-x$l4vOdp3Y>-uP<q1KM{vNhOgA
z*L6}RxcL{LTA%bxkiPDk%!*nUVgF7$#LvAY)))=Fb^_H^a1uC~TxI`+H<~q7CsKPo
z>*o5NmhZPlYLhR$T<7jhwqedcd_X(s5R=qVZh+&0t4!}=AZNQi%h&GMzSNYAO?ue(
zK<fu5zdzv@3y#O=qYw{x%z8fF+_Ga<1ei`}v&?EAoMz!|+F9Mi{eWrDI1BS}spy^O
zo#EqQ`>yUl0RsC%s<QWA3oCJ!yv;_-=2i?xlJ-+WG2@CV?DC;q&O66;n8S8whDa|#
zo@wumxcFo*pLrA&(0w9w|Dw&+gYQ{bb-7OqUHd~^SI2;vPkS!g{I*b0<ZO=HwEeUc
z^*|jHmu^kAnFr4XpFjH%7)L>~T^To?wZhp{<&XFXf#%L7go=oKSWVM)5=+3|g$UD@
z8{^zIkYk~R+#!3p)pBED-29LAbP{~UIJgfNU*);QjMIL3_0TPm+8Bd`!YB6o*iM{)
zpFVkw^Qj2m#fjYH9IG5;UXdC5>5SW2gJ%Y%&OyF#o9KDwcK7v6Gf`#L1T$jg^{lhg
zdWEMerny&CHu%`Wb1-r6`2E2rg(UbYm@5P;4i3btUKDJPLl>0W+*KPEnv3CTBbT}0
z;nk1}doS=@U{#L~3_mhKB~`1Ob<j8!Ez^2t+>RtkvL^995PZn<dhcS~p0?$su20x?
zy?{MIe;kX?IxNK-$8j&hK<6l0E+?^9PsR9wvF~v)o-fg-T1mMf4<c<0HBcI+LgJ0$
zW(ICzNa;|q!(NByB{M`{i8cU9oqDc&$QrgMU+WMgvJStId}Nk=)aK|;sil2ZGJ{go
zqYh@i1<}@umMfMHcT-Aja(q$e4L;rBaq2X<RMKi@mv5OzwVa9UuSA>~(Nncl)%B|&
z$9`u0pubPHEqX%ttyzrJrj)xB#xkPF6l+mCNE_6fwVhSr(p4y6?KQ{cR}wwKE2a5j
zVls>>jWUfcKRwSUEu3?(k7ash1+jtNplzt1kvn60rg2tc2stEER5~<y+wb<LA)~>U
zLwbX+?<fog4h{^~-0`{dW<VR4f{XBcf9_+SO7SP^Zm;+*p@sBB)u^el;RvN}#UqN=
z9t#6Pz3IdK!=DG9_80Z5<$bYCyEmVcQPiNj8L@h6_n8h8{p<?esarjmNJvXkLeFj2
z6?g$viS~^qUiYOgyiUQt-QUdrqW`@A+6u>p>^HG*!W#`67Hn#4huK)!O4w?+K1Is5
z>}zq1{1#byIqtGc9CANRP!nOLup8E+Oi*g{)IpwCwsq@wCU@_{JVlr&yz=rN*@di7
zHij3&4<e$vg`crb)^|tA_41$M*NJ6|J$RC5ASty4kBztMmkXEUl&hD`vK6#xv-hf)
zt=aJKB4!X?c?5Puy6}2fcnn~ci%U}Fo>#Z8zI6EP@TRtS%9|cYa3^%sjADESGS7*)
zJZf%y{yjWAFZ@YPH@d&C3Eo#e{E02L9fI+4%ka@r_fswLR-0TI9%;7y81Uk&fIxxq
zP2<?EJiqqk_2mOomuqKh$qsXl`Gj3HsG0_XxsRhy{DtsMmOX)cGWC->A`0)g7fl_f
zyE_d#iI(mzeU5iuHCmM>wvN*4TQBrRP4IrOn5Vq4rYn!s4Ea2zzb0OMBOP2BeDw$N
z$H5(TJH|pLL+)sIG+hr3Jbo*i_4v^7Jz?EpCz>@6lMI}ntX{2O=9=Q1^0^q+8`3+U
zv-UWyCHZ2YNIb_L&Qgxx{b*jZy?u{D>YO{nl37bd8$`QA7g4Dw!(9J7zC7OCw7jz&
zz8%*(3@{}w*6mI)1JS&#v<}R>ymx66g#sJ0W3?oWX005Rw;Q7GPWZ*j%MRP5H?FVq
zw27yS>+|$of?txmw5Xbs&epTiACg~XT=)RLAiuzy$xdV^CN4gOPMM0;N?e!JH1ali
zovD#Jk!har*|Le>k~6b#qi{IC@f4@YXx6H<rbL4#%Z-GT^OgqO1st1*Durc4RK>H3
zn2K9n={M#bkm3*?C9Lj9?EU&S#S!{oNGs~T6XNE(WQ(L_31zi4gG_QJjObiD^4X+I
z@^j$wBd1WiX2(AJZ2PimXKMb`(~UE|Mft`4)^nxucAAat#O~0lh$@_CuvDeIEHt22
z-p$c8!K&|OZuys>Ir~_?m=kTv$Orh_qtP#=UZ_04Eff@)h#32d3mBIe%NV^OP7)f|
zuXkHtKO6Wmu@YPMEC)}wdS>-@cy4s8Mp(*Po(>fmJ#@c;!j94JjK86N<Kx5AXlt{Y
z*fQh$#&&`ZBaYb9RaLWrG@~yDm#s}Ml(@|oynJ5oS6}+^(~AMC!ZX%(YlKqg#cuyc
z8$E1!kuiZOfvTH_kb26i!QC5mwT5|jDrZ`P-UiKtMzaKV$9H$j-IT+Ykcw~7K07~d
z?t0@JJn*U=Ura2lxp9a&N-XM7aN7F(mzLvE7p|1WR|dMTJ9@EK<=x_)b$G2hbiH4E
zK7Q_y)&bwjnvVR!cNe-A*F*+1ttbv3d`CDQU&`gpIyIS<u63O@J2Ufs0MlUXznFeW
zlTreGNL<4Ajx0|F&@*P%J|6RX{bj41vqvD_$<^^=-Fwm<N>fnwJN8WxH8x>3^(djJ
zI;gtif?%d{z(maE^#SXK(#(2((tQ7ut%SF8X3vSQ4IUhB=&b)bZM9J{RYM`r$80GA
zfi>?l`x|05Z>}8-Qv6o#I16ZM&1$MGIX8!^W5&uQk70kT1U;NS{1x36(Suq(GOFJp
z5hw{g=Ctm+WjLIaTG)0>=7+=Pv-cxDk9@ZfFg|sq?+z<l1EpzaZ$EnRqkjRrNPV&L
z&04V3&FRKNvtY`&+?>~s`V=2th1$jPMVVtMLDOINuY6i4j5Ey&Vp)q=-u&0tt@?Wm
zh5RYbPY^&Mj3a>RoH4fkb+X10juR-jUjPJ(!nwI&K}F7x-1dN?64VPvpiux-B=kIu
zNX6o@Zd3?dQ4#7&1eXXD3J!p}lhNMbjElz57^5x*N5;^+-0)am07}K-T|vkTjUf{W
z0MvzyWf+2<Xbc7v`4Ffp4opSCQ2?ksfk<`5x&ct~c99rR1>o`Mzu_J<f;*Z_^TMNP
zQ~*kJClaupU^eu2WC~+)cl_ISVtefWHHiOQPi(6Ks0Gc1y1lJnfS}wsh4unV0)@hM
zUw+!yp-FBiunZS8xQF;__}|tK|3gf07s2TD&zJ^Syf>DL!=NG1U%dtx|I*(#B)(*<
z8w3Wx5fIq#3qUF<DJTJMz;6rz${5B45PoBDB&f6g6N4iVpji0_rT_z%CjY<`6~Il-
zKQZLLd|?RqzwBTL)qi1%a3rX>{xhDUA{^9t|G<=Be|rup8I8kZ$>3@WfLh@Ev0x7X
ps3F0P2r9yD`DRWeQUNg8&wf#;Xfk!XTZ&4mDiCq;6Q+8Q{{#HiYfb<F

diff --git a/testWAS/results/results-combined.txt b/testWAS/results/results-combined.txt
deleted file mode 100644
index 4ec2f03..0000000
--- a/testWAS/results/results-combined.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-varName	varType	n	beta	lower	upper	pvalue	resType	description	isTraitOfInterest	Cat1_ID	Cat1_Title	Cat2_ID	Cat2_Title	Cat3_ID	Cat3_Title
-1	CONTINUOUS	1000	0.994704869763404	0.987619714556551	1.00179002497026	0	LINEAR	FIELD1	YES	100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-21	INTEGER	900	1.15654660441432	1.13814239309567	1.17495081573297	0	LINEAR	FIELD21		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-39-4	CAT-SIN	300/1000	-999	-999	-999	0	MULTINOMIAL-LOGISTIC	FIELD39		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-36	CAT-SIN	889	4.26406070415127	3.97401193807006	4.55410947023248	1.45251953559895e-128	ORDERED-LOGISTIC	FIELD36		100000	UK Biobank Assessment Centre	100071	Verbal interview	100074	Medical conditions
-38-4	CAT-SIN	300/750	-999	-999	-999	2.36929803366859e-124	MULTINOMIAL-LOGISTIC	FIELD38		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-27	INTEGER	891	6.00635627030678	5.57811072838858	6.43460181222497	2.40837709920153e-120	ORDERED-LOGISTIC	FIELD27		100000	UK Biobank Assessment Centre	100071	Verbal interview	100074	Medical conditions
-25	INTEGER	900	4.47054036959362	3.98675684857394	4.95432389061329	1.07328655178677e-62	ORDERED-LOGISTIC	FIELD25		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-42#A3	CAT-MUL	495/505(1000)	-2.53879885591662	-2.86719437561936	-2.23511475752686	5.60824611427478e-56	LOGISTIC-BINARY	FIELD42		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-34	CAT-SIN	300/600(900)	2.67342038082319	2.31959546662729	3.05528828132202	3.8648784862921e-46	LOGISTIC-BINARY	FIELD34		100000	UK Biobank Assessment Centre	100071	Verbal interview	100074	Medical conditions
-26	INTEGER	600/291(891)	2.21647829260412	1.92186361168347	2.53418345127812	8.11560488921865e-46	LOGISTIC-BINARY	FIELD26		100000	UK Biobank Assessment Centre	100071	Verbal interview	100074	Medical conditions
-24	INTEGER	760/131(891)	1.75433797759218	1.46468192507283	2.06597680366808	2.17988718368106e-30	LOGISTIC-BINARY	FIELD24		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-23	INTEGER	800/100(900)	1.93633598767705	1.60360866435611	2.29882481880602	7.31196027630102e-28	LOGISTIC-BINARY	FIELD23		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-3	CONTINUOUS	1000	0.661807286044053	0.534467956678656	0.789146615409451	3.01434968530619e-23	ORDERED-LOGISTIC	FIELD3		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-5	CONTINUOUS	120/380(500)	3.18943911658698	2.46928754995082	3.99516617340677	2.32981040891531e-16	LOGISTIC-BINARY	FIELD5		100000	UK Biobank Assessment Centre	100006	Physical measures	100008	Anthropometry
-42#A2	CAT-MUL	965/35(1000)	-1.9048315094404	-2.41596033097747	-1.45191305152567	6.6036582021544e-15	LOGISTIC-BINARY	FIELD42		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-43#A2	CAT-MUL	500/35(535)	-1.34326623535089	-1.76166165073619	-0.959249118076131	3.96615578379219e-11	LOGISTIC-BINARY	FIELD43		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-41#A3	CAT-MUL	25/505(530)	2.60299104913144	1.92190675044312	3.49344841921067	4.01993700982098e-11	LOGISTIC-BINARY	FIELD41	A2	100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-44#A3	CAT-MUL	25/505(530)	2.60299104913144	1.92190675044312	3.49344841921067	4.01993700982098e-11	LOGISTIC-BINARY	FIELD44		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-41#A2	CAT-MUL	495/35(530)	-1.24250185838579	-1.62960042688341	-0.887102760424099	4.02529697948092e-11	LOGISTIC-BINARY	FIELD41	A2	100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-44#A2	CAT-MUL	495/35(530)	-1.24250185838579	-1.62960042688341	-0.887102760424099	4.02529697948092e-11	LOGISTIC-BINARY	FIELD44		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-43#A3	CAT-MUL	30/505(535)	1.08228383424423	0.702061178293818	1.48159863585603	4.2383532774057e-08	LOGISTIC-BINARY	FIELD43		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-42#A1	CAT-MUL	986/14(1000)	-0.742168551681468	-1.27958997966834	-0.21074132857573	0.00625348360881744	LOGISTIC-BINARY	FIELD42		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-43#A1	CAT-MUL	521/14(535)	-0.118660670448722	-0.660567582659025	0.404639992765588	0.672574033804116	LOGISTIC-BINARY	FIELD43		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-41#A1	CAT-MUL	516/14(530)	-0.0923557834369921	-0.604318268636898	0.415860123576197	0.731283609278491	LOGISTIC-BINARY	FIELD41	A2	100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
-44#A1	CAT-MUL	516/14(530)	-0.0923557834369921	-0.604318268636898	0.415860123576197	0.731283609278491	LOGISTIC-BINARY	FIELD44		100000	UK Biobank Assessment Centre	100071	Verbal interview	100076	Operations
diff --git a/testWAS/results/results-linear-1-2.txt b/testWAS/results/results-linear-1-2.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-linear-1-2.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-linear-1-3.txt b/testWAS/results/results-linear-1-3.txt
deleted file mode 100644
index b17e57f..0000000
--- a/testWAS/results/results-linear-1-3.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-1,CONTINUOUS,1000,0.994704869763404,0.987619714556551,1.00179002497026,0
-21,INTEGER,900,1.15654660441432,1.13814239309567,1.17495081573297,0
diff --git a/testWAS/results/results-linear-2-3.txt b/testWAS/results/results-linear-2-3.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-linear-2-3.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-linear-3-3.txt b/testWAS/results/results-linear-3-3.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-linear-3-3.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-log-1-3.txt b/testWAS/results/results-log-1-3.txt
deleted file mode 100644
index 1517d43..0000000
--- a/testWAS/results/results-log-1-3.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-21022_0|| Excluded cat-single: YES || 
-31_0|| Excluded integer: YES || 
-22000_0|| Excluded cat-multiple: YES || 
-1_0|| CONTINUOUS MAIN || CONTINUOUS || IRNT || SUCCESS results-linear
-2_0|| CONTINUOUS MAIN || CONTINUOUS || IRNT || CONTINUOUS-SKIP-500 (200) || 
-3_0|| CONTINUOUS MAIN || CONTINUOUS || >20% IN ONE CATEGORY || cat N: 208, 458, 334 || CAT-ORD || order: 0|1|2 || num categories: 3 || SUCCESS results-ordered-logistic
-4_0|| CONTINUOUS MAIN || CONTINUOUS || >20% IN ONE CATEGORY || cat N: 16, 109, 75 || CAT-ORD || order: 0|1|2 || CATORD-SKIP-500 (200) || 
-5_0|| CONTINUOUS MAIN || CONTINUOUS || >20% IN ONE CATEGORY || Inc(>=10): 0(120) || Inc(>=10): 1(380) || sample 120/380(500) || SUCCESS results-logistic-binary 
-6_0|| CONTINUOUS MAIN || CONTINUOUS || >20% IN ONE CATEGORY || Removed 0: 5<10 examples || Inc(>=10): 1(195) || SKIP (number of levels: 1)
-21_0|| INTEGER || CONTINUOUS || IRNT || SUCCESS results-linear
-22_0|| INTEGER || Inc(>=10): 0(891) || Removed 1: 9<10 examples || SKIP (number of levels: 1)
-23_0|| INTEGER || Inc(>=10): 0(800) || Inc(>=10): 1(100) || sample 800/100(900) || SUCCESS results-logistic-binary 
-24_0|| INTEGER || Inc(>=10): 0(760) || Inc(>=10): 2(131) || Removed 1: 9<10 examples || sample 760/131(891) || SUCCESS results-logistic-binary 
diff --git a/testWAS/results/results-log-2-3.txt b/testWAS/results/results-log-2-3.txt
deleted file mode 100644
index 461413b..0000000
--- a/testWAS/results/results-log-2-3.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-25_0|| INTEGER || Inc(>=10): 0(500) || Inc(>=10): 1(200) || Inc(>=10): 2(200) || 3-20 values || CAT-ORD || order: 0|1|2 || num categories: 3 || SUCCESS results-ordered-logistic
-26_0|| INTEGER || Inc(>=10): 0(600) || Inc(>=10): 1(291) || Removed 2: 9<10 examples || sample 600/291(891) || SUCCESS results-logistic-binary 
-27_0|| INTEGER || Inc(>=10): 11(106) || Inc(>=10): 12(106) || Inc(>=10): 13(72) || Inc(>=10): 14(87) || Inc(>=10): 15(84) || Inc(>=10): 16(89) || Inc(>=10): 17(74) || Inc(>=10): 18(70) || Inc(>=10): 19(56) || Inc(>=10): 20(46) || Inc(>=10): 21(23) || Inc(>=10): 22(31) || Inc(>=10): 23(27) || Inc(>=10): 24(10) || Inc(>=10): 25(10) || Removed 26: 4<10 examples || Removed 27: 2<10 examples || Removed 28: 1<10 examples || Removed 29: 2<10 examples || 3-20 values || CAT-ORD || order: 11|12|13|14|15|16|17|18|19|20|21|22|23|24|25 || num categories: 15 || SUCCESS results-ordered-logistic
-32_0|| CAT-SINGLE || Inc(>=10): 0(891) || Removed 1: 9<10 examples || SKIP (only one value) || 
-33_0|| CAT-SINGLE || Inc(>=10): 0(300) || Inc(>=10): 1(199) || CAT-SINGLE-BINARY || BINARY-LOGISTIC-SKIP-500 (499) || 
-34_0|| CAT-SINGLE || Inc(>=10): 0(300) || Inc(>=10): 1(600) || CAT-SINGLE-BINARY || sample 300/600(900) || SUCCESS results-logistic-binary 
-35_0|| CAT-SINGLE || Inc(>=10): 0(300) || Inc(>=10): 1(100) || Inc(>=10): 2(99) || ordered || CAT-ORD || order: 0|1|2 || CATORD-SKIP-500 (499) || 
-36_0|| CAT-SINGLE || Inc(>=10): 6(39) || Inc(>=10): 8(43) || Inc(>=10): 7(52) || Inc(>=10): 5(24) || Inc(>=10): 10(55) || Inc(>=10): 9(42) || Inc(>=10): 14(78) || Inc(>=10): 11(69) || Inc(>=10): 12(94) || Inc(>=10): 13(73) || Inc(>=10): 15(62) || Inc(>=10): 20(30) || Inc(>=10): 16(49) || Inc(>=10): 17(44) || Inc(>=10): 19(41) || Inc(>=10): 18(44) || Inc(>=10): 21(16) || Inc(>=10): 23(11) || Inc(>=10): 22(13) || Inc(>=10): 24(10) || Removed 25: 7<10 examples || Removed 27: 1<10 examples || Removed 29: 1<10 examples || Removed 26: 1<10 examples || Removed 28: 1<10 examples || ordered || CAT-ORD || order: 5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24 || num categories: 20 || SUCCESS results-ordered-logistic
-37_0|| CAT-SINGLE || Inc(>=10): 0(300) || Inc(>=10): 1(100) || Inc(>=10): 2(99) || CAT-SINGLE-UNORDERED || CATUNORD-SKIP-500 (499) || 
-38_0|| CAT-SINGLE || Inc(>=10): 0(50) || Inc(>=10): 9(200) || Inc(>=10): 1(200) || Inc(>=10): 4(300) || CAT-SINGLE-UNORDERED || reference: 4=300 || SUCCESS results-notordered-logistic 
-39_0|| CAT-SINGLE || Default related field: x99_0_0 || default value 100 set, N= 250 || Inc(>=10): 100(250) || Inc(>=10): 0(50) || Inc(>=10): 9(200) || Inc(>=10): 1(200) || Inc(>=10): 4(300) || CAT-SINGLE-UNORDERED || reference: 4=300 || SUCCESS results-notordered-logistic 
diff --git a/testWAS/results/results-log-3-3.txt b/testWAS/results/results-log-3-3.txt
deleted file mode 100644
index 1255194..0000000
--- a/testWAS/results/results-log-3-3.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-41_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || NO_NAN Remove NA participants 470 || Not numeric || sample 516/14(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || NO_NAN Remove NA participants 470 || Not numeric || IS_CM_EXPOSURE || sample 495/35(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || NO_NAN Remove NA participants 470 || Not numeric || sample 25/505(530) || SUCCESS results-logistic-binary 
-42_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || ALL || Not numeric || sample 986/14(1000) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || ALL || Not numeric || sample 965/35(1000) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || ALL || Not numeric || sample 495/505(1000) || SUCCESS results-logistic-binary 
-43_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 521/14(535) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 500/35(535) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 30/505(535) || SUCCESS results-logistic-binary 
-44_0|| cat-single to cat-multiple || CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || NO_NAN Remove NA participants 470 || Not numeric || sample 516/14(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || NO_NAN Remove NA participants 470 || Not numeric || sample 495/35(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || NO_NAN Remove NA participants 470 || Not numeric || sample 25/505(530) || SUCCESS results-logistic-binary 
-99_0|| Excluded continuous: YES || 
diff --git a/testWAS/results/results-log-all.txt b/testWAS/results/results-log-all.txt
index a0d0940..5d26780 100644
--- a/testWAS/results/results-log-all.txt
+++ b/testWAS/results/results-log-all.txt
@@ -22,7 +22,7 @@
 37_0|| CAT-SINGLE || Inc(>=10): 0(300) || Inc(>=10): 1(100) || Inc(>=10): 2(99) || CAT-SINGLE-UNORDERED || CATUNORD-SKIP-500 (499) || 
 38_0|| CAT-SINGLE || Inc(>=10): 0(50) || Inc(>=10): 9(200) || Inc(>=10): 1(200) || Inc(>=10): 4(300) || CAT-SINGLE-UNORDERED || reference: 4=300 || SUCCESS results-notordered-logistic 
 39_0|| CAT-SINGLE || Default related field: x99_0_0 || default value 100 set, N= 250 || Inc(>=10): 100(250) || Inc(>=10): 0(50) || Inc(>=10): 9(200) || Inc(>=10): 1(200) || Inc(>=10): 4(300) || CAT-SINGLE-UNORDERED || reference: 4=300 || SUCCESS results-notordered-logistic 
-41_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || NO_NAN Remove NA participants 470 || Not numeric || sample 516/14(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || NO_NAN Remove NA participants 470 || Not numeric || IS_CM_EXPOSURE || sample 495/35(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || NO_NAN Remove NA participants 470 || Not numeric || sample 25/505(530) || SUCCESS results-logistic-binary 
+41_0|| cat-single to cat-multiple || CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || [1] "ERROR: 41_0 Error: Categorical multiples variables need a value for CAT_MULT_INDICATOR_FIELDS\n"
 42_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || ALL || Not numeric || sample 986/14(1000) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || ALL || Not numeric || sample 965/35(1000) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || ALL || Not numeric || sample 495/505(1000) || SUCCESS results-logistic-binary 
 43_0|| CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 521/14(535) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 500/35(535) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || Indicator name x99_0_0 || Remove indicator var NAs: 465 || Not numeric || sample 30/505(535) || SUCCESS results-logistic-binary 
 44_0|| cat-single to cat-multiple || CAT-MULTIPLE ||  CAT-MUL-BINARY-VAR A1 || NO_NAN Remove NA participants 470 || Not numeric || sample 516/14(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A2 || NO_NAN Remove NA participants 470 || Not numeric || sample 495/35(530) || SUCCESS results-logistic-binary  CAT-MUL-BINARY-VAR A3 || NO_NAN Remove NA participants 470 || Not numeric || sample 25/505(530) || SUCCESS results-logistic-binary 
diff --git a/testWAS/results/results-logistic-binary-1-2.txt b/testWAS/results/results-logistic-binary-1-2.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-logistic-binary-1-2.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-logistic-binary-1-3.txt b/testWAS/results/results-logistic-binary-1-3.txt
deleted file mode 100644
index 9fe7ab7..0000000
--- a/testWAS/results/results-logistic-binary-1-3.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-5,CONTINUOUS,120/380(500),3.18943911658698,2.46928754995082,3.99516617340677,2.32981040891531e-16
-23,INTEGER,800/100(900),1.93633598767705,1.60360866435611,2.29882481880602,7.31196027630102e-28
-24,INTEGER,760/131(891),1.75433797759218,1.46468192507283,2.06597680366808,2.17988718368106e-30
diff --git a/testWAS/results/results-logistic-binary-2-3.txt b/testWAS/results/results-logistic-binary-2-3.txt
deleted file mode 100644
index 3b68a8d..0000000
--- a/testWAS/results/results-logistic-binary-2-3.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-26,INTEGER,600/291(891),2.21647829260412,1.92186361168347,2.53418345127812,8.11560488921865e-46
-34,CAT-SIN,300/600(900),2.67342038082319,2.31959546662729,3.05528828132202,3.8648784862921e-46
diff --git a/testWAS/results/results-logistic-binary-3-3.txt b/testWAS/results/results-logistic-binary-3-3.txt
deleted file mode 100644
index 2613fbf..0000000
--- a/testWAS/results/results-logistic-binary-3-3.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-41#A1,CAT-MUL,516/14(530),-0.0923557834369921,-0.604318268636898,0.415860123576197,0.731283609278491
-41#A2,CAT-MUL,495/35(530),-1.24250185838579,-1.62960042688341,-0.887102760424099,4.02529697948092e-11
-41#A3,CAT-MUL,25/505(530),2.60299104913144,1.92190675044312,3.49344841921067,4.01993700982098e-11
-42#A1,CAT-MUL,986/14(1000),-0.742168551681468,-1.27958997966834,-0.21074132857573,0.00625348360881744
-42#A2,CAT-MUL,965/35(1000),-1.9048315094404,-2.41596033097747,-1.45191305152567,6.6036582021544e-15
-42#A3,CAT-MUL,495/505(1000),-2.53879885591662,-2.86719437561936,-2.23511475752686,5.60824611427478e-56
-43#A1,CAT-MUL,521/14(535),-0.118660670448722,-0.660567582659025,0.404639992765588,0.672574033804116
-43#A2,CAT-MUL,500/35(535),-1.34326623535089,-1.76166165073619,-0.959249118076131,3.96615578379219e-11
-43#A3,CAT-MUL,30/505(535),1.08228383424423,0.702061178293818,1.48159863585603,4.2383532774057e-08
-44#A1,CAT-MUL,516/14(530),-0.0923557834369921,-0.604318268636898,0.415860123576197,0.731283609278491
-44#A2,CAT-MUL,495/35(530),-1.24250185838579,-1.62960042688341,-0.887102760424099,4.02529697948092e-11
-44#A3,CAT-MUL,25/505(530),2.60299104913144,1.92190675044312,3.49344841921067,4.01993700982098e-11
diff --git a/testWAS/results/results-logistic-binary-all.txt b/testWAS/results/results-logistic-binary-all.txt
index ec88660..31b11a5 100644
--- a/testWAS/results/results-logistic-binary-all.txt
+++ b/testWAS/results/results-logistic-binary-all.txt
@@ -1,18 +1,15 @@
 varName,varType,n,beta,lower,upper,pvalue
-5,CONTINUOUS,120/380(500),3.18943911658698,2.46928754995082,3.99516617340677,2.32981040891531e-16
-23,INTEGER,800/100(900),1.93633598767705,1.60360866435611,2.29882481880602,7.31196027630102e-28
-24,INTEGER,760/131(891),1.75433797759218,1.46468192507283,2.06597680366808,2.17988718368106e-30
-26,INTEGER,600/291(891),2.21647829260412,1.92186361168347,2.53418345127812,8.11560488921865e-46
-34,CAT-SIN,300/600(900),2.67342038082319,2.31959546662729,3.05528828132202,3.8648784862921e-46
-41#A1,CAT-MUL,516/14(530),-0.0923557834369921,-0.604318268636898,0.415860123576197,0.731283609278491
-41#A2,CAT-MUL,495/35(530),-1.24250185838579,-1.62960042688341,-0.887102760424099,4.02529697948092e-11
-41#A3,CAT-MUL,25/505(530),2.60299104913144,1.92190675044312,3.49344841921067,4.01993700982098e-11
-42#A1,CAT-MUL,986/14(1000),-0.742168551681468,-1.27958997966834,-0.21074132857573,0.00625348360881744
-42#A2,CAT-MUL,965/35(1000),-1.9048315094404,-2.41596033097747,-1.45191305152567,6.6036582021544e-15
-42#A3,CAT-MUL,495/505(1000),-2.53879885591662,-2.86719437561936,-2.23511475752686,5.60824611427478e-56
-43#A1,CAT-MUL,521/14(535),-0.118660670448722,-0.660567582659025,0.404639992765588,0.672574033804116
-43#A2,CAT-MUL,500/35(535),-1.34326623535089,-1.76166165073619,-0.959249118076131,3.96615578379219e-11
-43#A3,CAT-MUL,30/505(535),1.08228383424423,0.702061178293818,1.48159863585603,4.2383532774057e-08
-44#A1,CAT-MUL,516/14(530),-0.0923557834369921,-0.604318268636898,0.415860123576197,0.731283609278491
-44#A2,CAT-MUL,495/35(530),-1.24250185838579,-1.62960042688341,-0.887102760424099,4.02529697948092e-11
-44#A3,CAT-MUL,25/505(530),2.60299104913144,1.92190675044312,3.49344841921067,4.01993700982098e-11
+5,CONTINUOUS,120/380(500),3.18943911658699,2.46928754995082,3.99516617340677,2.3298104089149e-16
+23,INTEGER,800/100(900),1.93633598767705,1.60360866435611,2.29882481880602,7.31196027630188e-28
+24,INTEGER,760/131(891),1.75433797759218,1.46468192507283,2.06597680366808,2.17988718368084e-30
+26,INTEGER,600/291(891),2.21647829260412,1.92186361168347,2.53418345127812,8.11560488921906e-46
+34,CAT-SIN,300/600(900),2.67342038082318,2.31959546662729,3.05528828132202,3.8648784862923e-46
+42#A1,CAT-MUL,986/14(1000),-0.74216855168147,-1.27958997966834,-0.21074132857573,0.00625348360881725
+42#A2,CAT-MUL,965/35(1000),-1.9048315094404,-2.41596033097747,-1.45191305152567,6.60365820215384e-15
+42#A3,CAT-MUL,495/505(1000),-2.53879885591661,-2.86719437561935,-2.23511475752686,5.60824611426784e-56
+43#A1,CAT-MUL,521/14(535),-0.118660670448726,-0.660567582659025,0.404639992765588,0.672574033804106
+43#A2,CAT-MUL,500/35(535),-1.34326623535089,-1.76166165073619,-0.95924911807613,3.96615578379178e-11
+43#A3,CAT-MUL,30/505(535),1.08228383424425,0.702061178293763,1.48159863585598,4.23835327740328e-08
+44#A1,CAT-MUL,516/14(530),-0.0923557834369891,-0.6043182686369,0.415860123576197,0.7312836092785
+44#A2,CAT-MUL,495/35(530),-1.24250185838579,-1.62960042688341,-0.887102760424098,4.02529697948063e-11
+44#A3,CAT-MUL,25/505(530),2.60299104913143,1.92190675044312,3.49344841921067,4.01993700982103e-11
diff --git a/testWAS/results/results-multinomial-logistic-1-2.txt b/testWAS/results/results-multinomial-logistic-1-2.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-multinomial-logistic-1-2.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-multinomial-logistic-1-3.txt b/testWAS/results/results-multinomial-logistic-1-3.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-multinomial-logistic-1-3.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-multinomial-logistic-2-3.txt b/testWAS/results/results-multinomial-logistic-2-3.txt
deleted file mode 100644
index fd4d32b..0000000
--- a/testWAS/results/results-multinomial-logistic-2-3.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-38-4,CAT-SIN,300/750,-999,-999,-999,2.36929803366859e-124
-38-4#0,CAT-SIN,300#50,-2.40849608862529,-3.24568232787998,-1.5713098493706,1.71326515197023e-08
-38-4#9,CAT-SIN,300#200,3.84847729362016,3.17580034120106,4.52115424603927,0
-38-4#1,CAT-SIN,300#200,-2.04799198103754,-2.53421642573651,-1.56176753633858,2.22044604925031e-16
-39-4,CAT-SIN,300/1000,-999,-999,-999,0
-39-4#100,CAT-SIN,300#250,-3091.67248735407,-3091.68083398189,-3091.66414072624,0
-39-4#0,CAT-SIN,300#50,-2.40847366034011,-3.24564980598633,-1.57129751469388,1.71311120844564e-08
-39-4#9,CAT-SIN,300#200,3.84809435137089,3.17549284227653,4.52069586046524,0
-39-4#1,CAT-SIN,300#200,-2.04793321181161,-2.53415006998987,-1.56171635363335,2.22044604925031e-16
diff --git a/testWAS/results/results-multinomial-logistic-3-3.txt b/testWAS/results/results-multinomial-logistic-3-3.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-multinomial-logistic-3-3.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-multinomial-logistic-all.txt b/testWAS/results/results-multinomial-logistic-all.txt
index fd4d32b..01ad9c0 100644
--- a/testWAS/results/results-multinomial-logistic-all.txt
+++ b/testWAS/results/results-multinomial-logistic-all.txt
@@ -1,10 +1,10 @@
 varName,varType,n,beta,lower,upper,pvalue
-38-4,CAT-SIN,300/750,-999,-999,-999,2.36929803366859e-124
-38-4#0,CAT-SIN,300#50,-2.40849608862529,-3.24568232787998,-1.5713098493706,1.71326515197023e-08
-38-4#9,CAT-SIN,300#200,3.84847729362016,3.17580034120106,4.52115424603927,0
-38-4#1,CAT-SIN,300#200,-2.04799198103754,-2.53421642573651,-1.56176753633858,2.22044604925031e-16
+38-4,CAT-SIN,300/750,-999,-999,-999,2.36929803366805e-124
+38-4#0,CAT-SIN,300#50,-2.40849608862529,-3.24566694438637,-1.57132523286421,1.71326515197023e-08
+38-4#9,CAT-SIN,300#200,3.84847729362016,3.17581270179792,4.52114188544241,0
+38-4#1,CAT-SIN,300#200,-2.04799198103754,-2.53420749124824,-1.56177647082684,2.22044604925031e-16
 39-4,CAT-SIN,300/1000,-999,-999,-999,0
-39-4#100,CAT-SIN,300#250,-3091.67248735407,-3091.68083398189,-3091.66414072624,0
-39-4#0,CAT-SIN,300#50,-2.40847366034011,-3.24564980598633,-1.57129751469388,1.71311120844564e-08
-39-4#9,CAT-SIN,300#200,3.84809435137089,3.17549284227653,4.52069586046524,0
-39-4#1,CAT-SIN,300#200,-2.04793321181161,-2.53415006998987,-1.56171635363335,2.22044604925031e-16
+39-4#100,CAT-SIN,300#250,-3091.67248733479,-3091.68083380925,-3091.66414086034,0
+39-4#0,CAT-SIN,300#50,-2.40847366034002,-3.24563442267806,-1.57131289800198,1.71311120844564e-08
+39-4#9,CAT-SIN,300#200,3.84809435137093,3.17550520148713,4.52068350125473,0
+39-4#1,CAT-SIN,300#200,-2.04793321181159,-2.534141135641,-1.56172528798218,2.22044604925031e-16
diff --git a/testWAS/results/results-ordered-logistic-1-2.txt b/testWAS/results/results-ordered-logistic-1-2.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-ordered-logistic-1-2.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-ordered-logistic-1-3.txt b/testWAS/results/results-ordered-logistic-1-3.txt
deleted file mode 100644
index 093e33c..0000000
--- a/testWAS/results/results-ordered-logistic-1-3.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-3,CONTINUOUS,1000,0.661807286044053,0.534467956678656,0.789146615409451,3.01434968530619e-23
diff --git a/testWAS/results/results-ordered-logistic-2-3.txt b/testWAS/results/results-ordered-logistic-2-3.txt
deleted file mode 100644
index eeea18b..0000000
--- a/testWAS/results/results-ordered-logistic-2-3.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
-25,INTEGER,900,4.47054036959362,3.98675684857394,4.95432389061329,1.07328655178677e-62
-27,INTEGER,891,6.00635627030678,5.57811072838858,6.43460181222497,2.40837709920153e-120
-36,CAT-SIN,889,4.26406070415127,3.97401193807006,4.55410947023248,1.45251953559895e-128
diff --git a/testWAS/results/results-ordered-logistic-3-3.txt b/testWAS/results/results-ordered-logistic-3-3.txt
deleted file mode 100644
index 5682a47..0000000
--- a/testWAS/results/results-ordered-logistic-3-3.txt
+++ /dev/null
@@ -1 +0,0 @@
-varName,varType,n,beta,lower,upper,pvalue
diff --git a/testWAS/results/results-ordered-logistic-all.txt b/testWAS/results/results-ordered-logistic-all.txt
index a7b9803..0c3e84e 100644
--- a/testWAS/results/results-ordered-logistic-all.txt
+++ b/testWAS/results/results-ordered-logistic-all.txt
@@ -1,5 +1,5 @@
 varName,varType,n,beta,lower,upper,pvalue
-3,CONTINUOUS,1000,0.661807286044053,0.534467956678656,0.789146615409451,3.01434968530619e-23
-25,INTEGER,900,4.47054036959362,3.98675684857394,4.95432389061329,1.07328655178677e-62
-27,INTEGER,891,6.00635627030678,5.57811072838858,6.43460181222497,2.40837709920153e-120
-36,CAT-SIN,889,4.26406070415127,3.97401193807006,4.55410947023248,1.45251953559895e-128
+3,CONTINUOUS,1000,0.661807286044053,0.534467956678638,0.789146615409469,3.01434968534588e-23
+25,INTEGER,900,4.47054036959369,3.98675684857102,4.95432389061637,1.07328655338457e-62
+27,INTEGER,891,6.00635627030785,5.57811072839006,6.43460181222563,2.40837709810009e-120
+36,CAT-SIN,889,4.26406070415127,3.97401193807007,4.55410947023247,1.45251953558349e-128
diff --git a/testWAS/results/variable-flow-counts-1-3.txt b/testWAS/results/variable-flow-counts-1-3.txt
deleted file mode 100644
index b09ba23..0000000
--- a/testWAS/results/variable-flow-counts-1-3.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-name,countValue
-cont.binary,1
-cont.main,3
-cont.main.500,1
-cont.onevalue,1
-cont.ordcattry,2
-cont.ordcattry.ordcat,2
-excluded.catMul,1
-excluded.catSin,1
-excluded.int,1
-int.binary,2
-int.continuous,1
-int.onevalue,1
-ordCat,2
-ordCat.500,1
-start.cont,6
-start.exposure.cont,1
-start.int,4
-success.binary,3
-success.continuous,2
-success.exposure.continuous,1
-success.ordCat,1
diff --git a/testWAS/results/variable-flow-counts-2-3.txt b/testWAS/results/variable-flow-counts-2-3.txt
deleted file mode 100644
index e3620ad..0000000
--- a/testWAS/results/variable-flow-counts-2-3.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-name,countValue
-binary.500,1
-catSin.case1,2
-catSin.case2,3
-catSin.case3,2
-catSin.onevalue,1
-int.binary,1
-int.catord,2
-ordCat,4
-ordCat.500,1
-start.catSin,8
-start.int,3
-success.binary,2
-success.ordCat,3
-success.unordCat,2
-unordCat.500,1
diff --git a/testWAS/results/variable-flow-counts-3-3.txt b/testWAS/results/variable-flow-counts-3-3.txt
deleted file mode 100644
index 1c778f3..0000000
--- a/testWAS/results/variable-flow-counts-3-3.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-name,countValue
-catMul.binary,12
-catMul.over10,12
-catSinToCatMul,1
-excluded.cont,1
-start.catMul,4
-start.exposure.catMulvalues,1
-success.binary,12
-success.exposure.binary,1
diff --git a/testWAS/results/variable-flow-counts-all.txt b/testWAS/results/variable-flow-counts-all.txt
index 4dadad2..78c716d 100644
--- a/testWAS/results/variable-flow-counts-all.txt
+++ b/testWAS/results/variable-flow-counts-all.txt
@@ -1,12 +1,12 @@
 name,countValue
 binary.500,1
-catMul.binary,12
-catMul.over10,12
+catMul.binary,10
+catMul.over10,9
 catSin.case1,2
 catSin.case2,3
 catSin.case3,2
 catSin.onevalue,1
-catSinToCatMul,1
+catSinToCatMul,2
 cont.binary,1
 cont.main,3
 cont.main.500,1
@@ -26,12 +26,10 @@ ordCat.500,2
 start.catMul,4
 start.catSin,8
 start.cont,6
-start.exposure.catMulvalues,1
 start.exposure.cont,1
 start.int,7
-success.binary,17
+success.binary,14
 success.continuous,2
-success.exposure.binary,1
 success.exposure.continuous,1
 success.ordCat,4
 success.unordCat,2
diff --git a/testWAS/results/variable-flow-counts-combined.txt b/testWAS/results/variable-flow-counts-combined.txt
deleted file mode 100644
index 4dadad2..0000000
--- a/testWAS/results/variable-flow-counts-combined.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-name,countValue
-binary.500,1
-catMul.binary,12
-catMul.over10,12
-catSin.case1,2
-catSin.case2,3
-catSin.case3,2
-catSin.onevalue,1
-catSinToCatMul,1
-cont.binary,1
-cont.main,3
-cont.main.500,1
-cont.onevalue,1
-cont.ordcattry,2
-cont.ordcattry.ordcat,2
-excluded.catMul,1
-excluded.catSin,1
-excluded.cont,1
-excluded.int,1
-int.binary,3
-int.catord,2
-int.continuous,1
-int.onevalue,1
-ordCat,6
-ordCat.500,2
-start.catMul,4
-start.catSin,8
-start.cont,6
-start.exposure.catMulvalues,1
-start.exposure.cont,1
-start.int,7
-success.binary,17
-success.continuous,2
-success.exposure.binary,1
-success.exposure.continuous,1
-success.ordCat,4
-success.unordCat,2
-unordCat.500,1
diff --git a/testWAS/testdata.csv b/testWAS/testdata.csv
deleted file mode 100644
index 513de9c..0000000
--- a/testWAS/testdata.csv
+++ /dev/null
@@ -1,1001 +0,0 @@
-"geno","x21022_0_0","x31_0_0","x22000_0_0","cont1","cont2","cont3","cont4"
-"1",-1.20706574938542,47.9466658037648,0,1,1.2438155595732,NA,0,1
-"2",0.27742924211066,63.0146673976471,1,1,0.116104203964638,NA,0,0
-"3",1.08444117668306,44.6085477640437,0,0,2.22954950810941,NA,0,0
-"4",-2.34569770262935,66.3537072349362,1,0,0.361772739183953,NA,0,0
-"5",0.42912468881105,67.0295177475515,0,0,-0.63109008650694,NA,0,1
-"6",0.506055892157574,40.9411713538266,1,0,1.6467781891876,NA,0,0
-"7",-0.574739960134649,69.3892144955624,1,1,0.373786490804675,NA,0,1
-"8",-0.546631855784187,57.7550789450421,1,1,-1.54388992456,NA,0,1
-"9",-0.564451999093283,53.2618322461205,0,1,-0.510904308698956,NA,0,1
-"10",-0.890037829044104,64.4578744480567,1,0,1.14870093595012,NA,0,0
-"11",-0.477192699753547,72.8061710689861,0,1,-0.718869720667227,NA,0,1
-"12",-0.998386444859704,75.6513013425123,0,0,1.7378226082064,NA,0,1
-"13",-0.77625389463799,47.994539638105,0,1,0.267475750107545,NA,0,0
-"14",0.0644588172762693,55.6230503362601,0,1,-0.550524203305469,NA,0,0
-"15",0.959494058970771,61.4638148182447,1,0,0.391629853781272,NA,0,1
-"16",-0.110285494390774,60.6601907555389,1,1,1.32923582506416,NA,0,1
-"17",-0.511009505806642,55.5496398470349,0,1,-0.80160294770982,NA,0,0
-"18",-0.911195416629811,36.578876135585,0,0,0.667314523667813,NA,0,1
-"19",-0.83717168026894,57.2161763249215,0,1,0.591688789537777,NA,0,0
-"20",2.41583517848934,48.1819636765515,1,0,0.123586982982799,NA,0,1
-"21",0.134088220152031,60.33125272564,0,0,-0.462237562002588,NA,0,0
-"22",-0.490685896690943,51.6228298815946,1,0,-0.688925532781386,NA,0,0
-"23",-0.440547872353227,58.8238891975371,0,0,-0.0535212677736979,NA,0,1
-"24",0.459589441005854,52.584774288708,1,0,-0.572706502414098,NA,0,0
-"25",-0.693720246937475,55.9165303081683,1,0,0.21375633645251,NA,0,1
-"26",-1.44820491038647,56.2222923904128,1,0,0.359803188599339,NA,0,0
-"27",0.574755720900728,73.2836671199718,1,0,2.27695732166317,NA,0,0
-"28",-1.02365572296388,67.1643630684198,1,1,-0.440291411753061,NA,0,1
-"29",-0.0151383003641817,57.6771081174904,0,1,-0.318971349212263,NA,0,0
-"30",-0.935948601168394,47.7377309975733,0,1,-0.15261217570315,NA,0,1
-"31",1.10229754620026,70.4753809093163,1,1,-0.131889299757686,NA,0,0
-"32",-0.475593078869057,45.6359281317617,0,0,0.446197218612174,NA,0,0
-"33",-0.709440037512506,48.175310716134,1,0,2.24976919884358,NA,0,0
-"34",-0.501258060594761,52.2379516418684,0,0,0.181818520118387,NA,0,0
-"35",-1.6290934690787,50.2768649052425,1,0,-0.270863415704329,NA,0,0
-"36",-1.16761926209419,50.9634243880893,0,0,-0.037073218575801,NA,0,0
-"37",-2.18003964894867,72.8720394604443,1,1,1.40637562902228,NA,0,0
-"38",-1.34099319210003,58.9381239901269,1,0,-0.266690213501999,NA,0,0
-"39",-0.294293858763713,52.8412904634293,1,1,-1.29619552537285,NA,0,1
-"40",-0.46589754040611,66.7172219647766,1,1,-0.249005464442341,NA,0,1
-"41",1.44949626528713,61.9763362591688,1,0,-1.18874052663649,NA,0,0
-"42",-1.06864272367458,47.7990327466637,0,1,-0.309260543638855,NA,0,0
-"43",-0.855364633737507,54.3903487954904,0,0,0.589199536628837,NA,0,1
-"44",-0.280623001957454,65.3373263441171,0,1,-1.68359211475623,NA,0,1
-"45",-0.994340076295665,79.5031546468388,1,0,-0.530653818657169,NA,0,0
-"46",-0.968514317872049,75.8194322960051,1,1,-0.0166840920859275,NA,0,0
-"47",-1.10731819264177,68.3778752223611,1,0,0.189736857901506,NA,0,1
-"48",-1.25198588563894,59.8693033731552,0,1,1.34757691898234,NA,0,1
-"49",-0.523828118987356,42.7420403798901,0,1,1.19532024763734,NA,0,1
-"50",-0.4968499572667,52.6914642631802,0,0,-0.620171719988641,NA,0,1
-"51",-1.80603125680195,57.8458613465304,1,0,-0.175189000933116,NA,0,1
-"52",-0.582075924689333,64.9020666855371,1,0,-1.64714780166645,NA,0,0
-"53",-1.10888962442678,60.5939342375615,0,0,-1.3320729000756,NA,0,0
-"54",-1.01496200949201,60.5831205292457,0,1,-1.00968055555978,NA,0,0
-"55",-0.162309523556819,78.3836310657496,1,1,0.227512840423866,NA,0,1
-"56",0.563055818994517,63.8765089941351,1,0,-0.789891569760385,NA,0,1
-"57",1.64781747281506,57.1944927534534,1,1,0.943648066445016,NA,0,0
-"58",-0.773353423943695,55.7148226203643,1,0,0.346260971880386,NA,0,0
-"59",1.6059096288573,60.9532493553194,0,1,1.3637663943923,NA,0,1
-"60",-1.15780854769441,51.8666859105858,0,1,1.10482445945345,NA,0,0
-"61",0.656588464332807,73.6819007742547,0,0,0.10188234898138,NA,0,1
-"62",2.54899107071786,57.1246396990977,0,1,-0.301585633083307,NA,0,1
-"63",-0.0347603898038376,54.7144757652248,1,0,-1.51063939131438,NA,0,0
-"64",-0.669633579661957,51.2698707674302,0,1,0.529007075806006,NA,0,0
-"65",-0.00760475571097463,69.3589307240067,0,1,-0.129745906677617,NA,0,1
-"66",1.77708444814343,47.2273267697779,0,1,0.325945377059653,NA,0,0
-"67",-1.13860773675091,55.3126016106077,0,0,-0.215780727869325,NA,0,0
-"68",1.36782717942326,63.1251627333967,1,1,1.51600368457984,NA,0,1
-"69",1.32956479078236,58.9167769835649,1,1,0.691256197053224,NA,0,1
-"70",0.336472797203817,62.5079041733303,0,1,0.656187161645506,NA,0,0
-"71",0.00689283844195796,46.8380660579378,0,1,-0.855226548741805,NA,0,0
-"72",-0.455468738161428,55.6992455083261,1,0,-0.279060276153948,NA,0,0
-"73",-0.366523932873933,42.9589030241387,1,0,-0.742372375227027,NA,0,1
-"74",0.648286567503148,51.5786263478554,1,0,0.777492837430792,NA,0,1
-"75",2.07027086133094,71.4825059530873,1,1,-0.289240352525919,NA,0,1
-"76",-0.15339841246697,62.5283153590922,0,1,0.666245990233836,NA,0,1
-"77",-1.39070094670149,60.8528718612452,1,0,-1.9264631392104,NA,0,0
-"78",-0.723581777110154,50.6637386947879,1,1,0.176749020379374,NA,0,0
-"79",0.258261762167853,54.4318039384037,0,1,-1.09468471677104,NA,0,0
-"80",-0.317059114961199,49.8362968057923,1,0,0.13939769112346,NA,0,1
-"81",-0.177789957664294,60.5352524982115,1,1,0.707865641700464,NA,0,1
-"82",-0.169994076854541,67.6513360614726,1,1,-0.56074604752531,NA,0,1
-"83",-1.37230188587669,62.991948153815,1,0,-0.00518705947728593,NA,0,0
-"84",-0.173787170414033,53.823842517868,1,1,-1.25973317328134,NA,0,0
-"85",0.850232257121884,65.5278731357432,0,0,-0.699440510273066,NA,0,1
-"86",0.697608711668253,63.8790849886898,1,1,-1.87625722455871,NA,0,0
-"87",0.549997351091101,68.4321094225036,1,0,-0.152968849695331,NA,0,1
-"88",-0.402731975447623,55.5937241366341,0,0,-0.334822262418323,NA,0,1
-"89",-0.191593770478477,48.2063462674728,1,0,0.986650708606245,NA,0,1
-"90",-1.19452787960251,59.1543093464118,1,1,0.455944722358052,NA,0,0
-"91",-0.0531588187433239,63.0277769505261,0,0,-0.0192355444580243,NA,0,1
-"92",0.255196000995833,75.5909373112435,0,0,0.529381440271203,NA,0,0
-"93",1.70596400724007,59.3033747269476,0,0,-0.780230182566805,NA,0,1
-"94",1.00151325236345,64.270394144678,0,1,0.171797365839054,NA,0,1
-"95",-0.49558344300146,59.1673680172382,1,0,-1.60905679167636,NA,0,1
-"96",0.355550296999749,81.5639774076182,0,0,-1.6751510908634,NA,0,1
-"97",-1.13460804353385,73.3796805938236,1,1,0.942514927592197,NA,0,0
-"98",0.878203626952291,57.6769555260476,1,0,0.0795745542571885,NA,0,0
-"99",0.972916753376122,58.7259591005261,0,0,1.41140206718487,NA,0,1
-"100",2.12111710537568,59.7831459572825,1,0,-0.170360912244337,NA,0,0
-"101",0.414523534374985,61.1325408438459,1,1,-1.37681034516758,NA,0,0
-"102",-0.47471847352936,54.3490458950753,0,1,-0.899420361203481,NA,0,0
-"103",0.0659934937044614,60.807003872445,1,1,1.54358338788558,NA,0,1
-"104",-0.502477782393866,53.0422859341106,1,1,-1.81914236035552,NA,0,0
-"105",-0.825998587129336,49.0379786704802,1,0,-0.834698956166578,NA,0,1
-"106",0.166989279752984,68.0444055775452,0,1,1.2821658455835,NA,0,1
-"107",-0.896264626244759,66.5664411993471,0,1,-0.155315871583421,NA,0,0
-"108",0.168185387655757,54.3196932265546,1,0,-1.78837946522822,NA,0,1
-"109",0.354968261014854,72.7278584148814,1,0,-0.299097216479544,NA,0,0
-"110",-0.0521051168711336,58.0745608424303,0,0,0.446961511361309,NA,0,0
-"111",-0.195934618716253,57.1633478141118,0,0,1.067308412976,NA,0,0
-"112",-0.64906975160576,50.4632828706075,0,1,-0.986960123284403,NA,0,1
-"113",-1.10976723139873,59.7564935396584,0,0,1.85707765802524,NA,0,0
-"114",0.849274202655058,58.6301403514248,0,1,-0.0138826606861277,NA,0,0
-"115",0.022362526076024,60.6751839112109,1,1,0.565559334823796,NA,0,1
-"116",0.831140617306835,64.4516835983198,0,1,-0.602297404440719,NA,0,1
-"117",-1.24428785134423,69.0660038824601,1,0,1.28512367047536,NA,0,0
-"118",0.169026413766175,63.2741056646057,1,0,1.33601685013132,NA,0,0
-"119",0.673166306947679,69.9256162675209,0,0,-0.556389298983822,NA,0,1
-"120",-0.0262763764485261,70.6540057402709,1,1,-0.716651176935715,NA,0,0
-"121",-0.191392168556024,30.6951794461942,0,0,-0.759021685044223,NA,0,1
-"122",-0.781906646699876,56.3722625701225,0,1,1.56851089568209,NA,0,0
-"123",2.05816198776349,65.6993253809256,0,1,0.535463570505942,NA,0,0
-"124",0.750501453155831,48.8036452769496,0,1,0.209412200921675,NA,0,0
-"125",1.82420830237829,50.3934964139591,1,1,0.992473242702389,NA,0,1
-"126",0.080059641091988,62.7373647191221,0,1,-0.53725012215092,NA,0,1
-"127",-0.631409298966366,55.5857671135984,1,1,-0.268172819727498,NA,0,0
-"128",-1.51328812003373,42.6851218232149,0,0,0.929007556651931,NA,0,1
-"129",-0.636099831085051,69.0585484397637,1,1,-1.55826690472119,NA,0,0
-"130",0.226301532497161,51.5916035849323,0,1,0.64991826547118,NA,0,0
-"131",1.01369034746575,63.8634785492597,0,1,-1.51152614736535,NA,0,0
-"132",0.252750135066342,64.1243011445702,0,0,-0.865319866372719,NA,0,1
-"133",-1.17194831284112,75.2126864304235,1,1,0.347684637661273,NA,0,1
-"134",0.668714328525034,61.9656423546082,1,0,-0.186095036517136,NA,0,1
-"135",-1.65010093484436,66.6561035283678,0,1,0.595346298736087,NA,0,0
-"136",-0.365852248191901,71.4059109224811,1,1,-1.08826919443383,NA,0,0
-"137",-0.31611832883308,55.9361887195241,0,0,-0.379360773245381,NA,0,1
-"138",-1.94824604732137,43.4443864631071,0,0,-0.133805150064513,NA,0,1
-"139",0.920057522390905,61.429300364911,0,0,0.293893744779534,NA,0,0
-"140",-0.622871594878875,73.2591430830882,1,1,-0.801214309674805,NA,0,0
-"141",-0.334036649997458,45.9372233936045,0,0,2.30573365422277,NA,0,1
-"142",1.39514789252962,70.1213161499515,1,1,-1.01508205682307,NA,0,0
-"143",0.63667441103895,59.3986915704217,1,0,-0.942518263570327,NA,0,1
-"144",-0.108431696990383,60.9948142428027,0,1,-0.107134274345954,NA,0,1
-"145",0.513762778404674,56.5077820249413,1,0,-1.13134071861089,NA,0,1
-"146",0.399271807273958,70.3685327959033,1,0,-0.51857369605202,NA,0,1
-"147",1.6628564474304,74.2415647914164,1,1,0.319354057090191,NA,0,0
-"148",0.275893403585483,75.7084879138618,1,1,0.599169972037201,NA,0,0
-"149",0.506272623207687,50.3259926693272,0,0,0.56608886040822,NA,0,0
-"150",0.347551975023811,67.5928584434279,1,0,-0.814755450314007,NA,0,0
-"151",-0.377237647496313,56.2381034554186,1,1,0.393385013408062,NA,0,0
-"152",0.0976194631092374,50.1659058608481,0,0,-0.161707331153351,NA,0,1
-"153",1.63874464512173,63.8989510110322,0,1,-0.870589063755767,NA,0,1
-"154",-0.875592474189569,59.8204928625175,0,1,-1.36678640380301,NA,0,1
-"155",0.121759998641046,67.9172460509004,0,1,0.400568533158509,NA,0,0
-"156",1.36213066084498,77.9239787563701,0,1,-1.36484949210307,NA,0,0
-"157",-0.23462108692271,70.3671688493632,0,1,2.49552373193942,NA,0,0
-"158",-1.05338280794309,53.0431438443961,1,0,-0.839692875155713,NA,0,0
-"159",-0.869783605635912,61.3962266496932,0,0,0.954222182420512,NA,0,1
-"160",-0.390127029647055,74.1324549389255,1,0,1.25069343500921,NA,0,1
-"161",-0.847350073129572,71.7443759629564,0,0,1.06574895295509,NA,0,1
-"162",-0.260639392099399,44.1521841733571,1,0,1.15944236767692,NA,0,1
-"163",-0.414419706341893,69.464299206338,0,0,1.27198726757385,NA,0,1
-"164",-0.183050797719434,56.5948591488133,1,1,0.927341416264284,NA,0,0
-"165",0.407056097880578,53.7463730929629,0,1,0.101609421110908,NA,0,0
-"166",0.624633128202147,48.9879777101729,0,0,0.55776883720937,NA,0,0
-"167",1.67820574370471,56.8820453992648,1,1,0.463414412674698,NA,0,1
-"168",-0.0686936535649907,66.8865650538364,0,1,-0.660694489533208,NA,0,1
-"169",-0.320839912658177,74.3527187786736,1,0,0.398961252222583,NA,0,0
-"170",1.47100571704891,61.3890709820022,1,1,-1.30582701702516,NA,0,0
-"171",1.7043293978271,59.2694761357826,1,0,-0.443201083998582,NA,0,1
-"172",0.0432440375575291,57.976640618882,0,1,-1.21740052305349,NA,0,1
-"173",-0.332657319133519,69.5379933574217,1,1,-0.83982983727178,NA,0,1
-"174",-1.8222354175631,49.1880655255796,1,1,-0.677318403999919,NA,0,1
-"175",1.41126239874678,62.5001870455132,1,0,2.19605464828752,NA,0,1
-"176",-0.837582434076861,70.2162198057182,0,1,-1.15467197745111,NA,0,0
-"177",-1.12376279439532,70.6185151113365,1,1,-0.233399703728512,NA,0,1
-"178",3.04376588609529,64.1707021007276,0,1,-0.389744443583323,NA,0,0
-"179",0.235021308250671,48.3763384863002,0,1,0.0287354454515505,NA,0,0
-"180",-0.0332586101133057,51.7891527018534,0,0,-1.70836970797038,NA,0,0
-"181",-2.7322195229558,52.795117422846,0,0,-0.561405110125317,NA,0,1
-"182",-0.0997905884418961,45.5672803130302,1,1,-1.139342962383,NA,0,0
-"183",0.976031734922396,60.4002033444342,1,0,1.09505700857317,NA,0,1
-"184",0.413868915451097,72.2294668396681,0,1,0.166786940665951,NA,0,1
-"185",0.912322161610113,35.5330608337029,1,1,1.17248789106492,NA,0,0
-"186",1.98373220068438,67.5838789198152,1,0,0.688176022899368,NA,0,1
-"187",1.16910851401363,57.9733834587649,1,0,1.51547424437832,NA,0,1
-"188",-0.50873701541522,52.3086615280499,1,1,-0.636050707196628,NA,0,1
-"189",0.704180178465512,67.7000881141061,1,1,-1.16362571218479,NA,0,0
-"190",-0.198416273822079,66.4838515883954,0,1,-0.427164795592366,NA,0,0
-"191",-0.538070788884863,60.9621737861886,1,1,0.196091333321825,NA,0,1
-"192",-2.85575865501923,50.8035664683063,1,1,-0.997047804364754,NA,0,0
-"193",-0.789646852263761,52.9513822276208,1,1,0.517466397560559,NA,0,1
-"194",0.487814635163685,43.8276593772242,0,0,-0.671756980072126,NA,0,1
-"195",2.16803253951933,78.8089390836698,1,1,0.375318025739388,NA,0,0
-"196",0.500694614280786,58.4440563915239,1,0,-0.415601683558189,NA,0,0
-"197",0.62021020366732,52.4548625121876,1,0,1.15966859915141,NA,0,1
-"198",-0.965903210133575,38.3904755583043,0,1,-0.856041968843027,NA,0,0
-"199",0.162654708118265,55.705383196996,0,0,-0.13875389573912,NA,0,0
-"200",-2.07823754188738,54.508720953604,0,0,-0.617461910983908,NA,0,1
-"201",0.485226820569252,45.9768411711567,0,0,0.264669172152278,NA,0,0
-"202",0.696768778564913,83.8397182312468,1,0,0.812430386935776,NA,0,1
-"203",0.185513915583057,68.7294435097848,1,0,0.573408898080292,NA,0,0
-"204",0.700733515544461,44.6433411282576,0,1,-0.611756195058755,NA,0,1
-"205",0.311681028661359,71.339752513847,0,0,0.927582036106653,NA,0,1
-"206",0.760462361967838,70.4484016061832,0,1,0.459632168468754,NA,0,1
-"207",1.84246362620766,65.8983068874561,1,1,0.0777094202079709,NA,0,0
-"208",1.1123628412626,63.9910373489487,1,1,-0.794012677255858,NA,0,1
-"209",0.0326639575014441,61.4494918465514,0,1,0.755647696808639,NA,0,1
-"210",-1.11444896479736,66.1916121760774,1,0,-1.49004740974421,NA,0,1
-"211",0.418057822385083,49.6738089390188,1,1,-1.95210194506861,NA,0,0
-"212",-0.400235237343163,53.5064386172432,0,0,-0.452398750065752,NA,0,0
-"213",1.49349310261748,49.9799274180761,0,0,-1.36862613526372,NA,0,0
-"214",-1.60708093984972,58.341190400115,1,0,0.295810885487959,NA,0,1
-"215",-0.415751788401515,61.4665629511642,1,0,1.62348352547161,NA,0,0
-"216",0.42200837321742,66.0205652414274,1,0,-0.52968985535574,NA,0,0
-"217",-0.151736536534977,65.2905552549282,1,1,-0.257188260480086,NA,0,0
-"218",-0.60615111526422,74.4569584076136,0,1,0.750955500249836,NA,0,0
-"219",-0.304721068966714,65.9730928742892,0,0,-1.63589691767136,NA,0,1
-"220",0.629536099884472,72.5631692128956,1,0,-1.53165443855064,NA,0,0
-"221",0.895171980275539,70.9744204280521,0,1,1.83455634245432,NA,0,1
-"222",0.660212631820405,54.1153958690102,0,0,-0.527524602007166,NA,0,0
-"223",2.27348352044748,67.5130128150681,0,1,-0.223240229593461,NA,0,1
-"224",1.17349757263239,64.5351434101759,1,1,1.30636084938437,NA,0,1
-"225",0.287709728313787,61.1917823416888,1,1,0.772907132748949,NA,0,1
-"226",-0.659770093821306,50.9661229169478,0,0,-1.85157391464842,NA,0,1
-"227",2.91914013071762,65.879824566943,0,0,1.16872326218352,NA,0,1
-"228",0.677415500438328,60.6372874823125,0,0,-0.471723098985895,NA,0,0
-"229",-0.684320344136007,56.4516414506322,1,0,-0.788011402128258,NA,0,0
-"230",0.186492083080971,69.2249574441879,0,0,-1.67479075787562,NA,0,1
-"231",-0.324393300483657,69.9232536859556,1,1,-1.96975711534648,NA,0,0
-"232",-0.274704218225806,68.7356390179351,1,1,0.0873659274372556,NA,0,1
-"233",-0.933503340589868,33.8895653889269,0,0,0.844907183544593,NA,0,1
-"234",0.116845344986082,51.0268316728995,0,0,-1.71285342452175,NA,0,0
-"235",0.319160238648117,69.3723531860462,0,0,-1.5756685439766,NA,0,0
-"236",-1.07754212275943,52.5466366519901,1,1,0.858715130088106,NA,0,1
-"237",-3.23315213292314,64.0853962974254,0,0,0.327499502368055,NA,0,1
-"238",-0.254874652654534,61.8291267264276,0,0,0.482599276074397,NA,0,1
-"239",0.0295178303214797,71.0740229118283,1,0,-0.861239506548091,NA,0,1
-"240",0.594273774110513,52.8962832962036,1,0,-0.465507726476707,NA,0,1
-"241",0.0591351681787969,59.4219763063927,1,0,-1.38840970257199,NA,0,0
-"242",0.413398894737046,63.4492341671265,0,0,0.494310883699177,NA,0,0
-"243",-1.09777217457042,72.72654447469,1,0,-0.447774737710839,NA,0,0
-"244",0.711175257270441,58.2261916475795,0,0,-0.845243025822087,NA,0,0
-"245",0.718888729854143,62.9040599697828,1,0,0.692127259774552,NA,0,1
-"246",0.251651069028968,46.5715179399608,0,0,-0.554400351447954,NA,0,0
-"247",1.35727443615177,66.3305002993816,0,1,0.805144115271628,NA,0,0
-"248",0.404468471278607,62.0681970608308,0,1,-0.343846725138525,NA,0,0
-"249",0.264364269837939,66.3930734251053,0,1,0.169318041646852,NA,0,0
-"250",0.268043904143697,55.8932943503209,1,0,0.280421965396122,NA,0,0
-"251",0.436930577039483,76.8535662126586,1,1,0.717654589428342,NA,0,1
-"252",1.06012390548736,58.6017389575769,0,1,-0.398660716202873,NA,0,1
-"253",0.452190396528715,61.2113874885007,0,0,1.0280059648229,NA,0,0
-"254",0.663198615687065,58.9432082370055,1,0,0.53352921437778,NA,0,1
-"255",-1.13637355399084,68.4630489128897,0,0,1.98337843440331,NA,0,1
-"256",-0.370497517092117,52.3155097607692,0,0,-1.20073705622921,NA,0,0
-"257",1.47696958979247,62.014624052689,0,0,-0.414474090581805,NA,0,0
-"258",-1.22390375051376,65.7456959846799,1,1,0.451136490702432,NA,0,0
-"259",0.258068387396106,47.5476011201701,1,0,-0.262450560666971,NA,0,1
-"260",0.405002805433516,62.2771880814336,1,0,1.02042672061091,NA,0,0
-"261",0.97580332180945,54.0619793773133,1,0,-0.43403248030227,NA,0,0
-"262",-0.348876736539909,74.2881078576221,0,0,-0.335940938967553,NA,0,1
-"263",0.158625439491262,67.0489806899413,1,1,-0.218092381570754,NA,0,1
-"264",-1.76325506654115,42.7486609059989,1,1,-0.114706806432635,NA,0,1
-"265",0.338596047099905,69.19284781933,1,0,0.090373957608694,NA,0,0
-"266",-0.666565029559273,62.1774092677819,1,0,-1.29027186439312,NA,0,1
-"267",-0.238646624327644,54.386752381008,0,1,-0.445302431780947,NA,0,1
-"268",-1.18776528245719,56.4441437218518,0,0,1.05959611849342,NA,0,0
-"269",0.384935321768578,47.2785783338835,0,1,1.60176387946255,NA,0,0
-"270",0.66657951565448,63.8619685165391,0,1,0.290704963562124,NA,0,0
-"271",-0.304613889457735,43.6147686198574,0,1,-1.5398402644936,NA,0,0
-"272",1.8250110640863,58.5973722272885,0,0,1.43152193223662,NA,0,0
-"273",0.670559370567588,64.5517560783696,0,1,1.1475890314884,NA,0,0
-"274",0.948632573255153,60.8338336199505,1,0,-0.0369320076461112,NA,0,1
-"275",2.04940300167091,68.5917332231044,0,0,0.390688747616043,NA,0,1
-"276",-0.651113609237535,60.5054810455568,0,1,1.11208742842588,NA,0,1
-"277",0.808619272846293,83.0894410422706,1,0,0.852353197942567,NA,0,0
-"278",0.986580613954563,51.4053682526367,0,1,1.14932956130465,NA,0,1
-"279",-0.00617079609070003,75.3911616866609,1,0,-1.37341062252185,NA,0,0
-"280",0.319052357931912,43.8553382265105,0,1,-1.33478426311924,NA,0,0
-"281",-1.01182190274184,49.2268354169066,0,1,-0.324747858050283,NA,0,0
-"282",0.470167547726389,50.986394529512,1,0,-0.922288234919009,NA,0,1
-"283",-0.700970331629097,44.7888763883714,1,1,0.92873970287116,NA,0,1
-"284",0.813682862526491,74.8469384506777,0,1,0.705086309642083,NA,0,0
-"285",-0.811430783992772,70.3370332012997,1,0,-0.0590209227685563,NA,0,0
-"286",0.31939748709946,73.7126649842358,0,1,-0.740775868739709,NA,0,1
-"287",-0.846522653163624,53.9107855854228,1,0,-1.67173099749737,NA,0,0
-"288",-0.245763179128733,60.8559709330906,1,1,-0.612541215827006,NA,0,0
-"289",-1.55285901110546,71.1850749108373,0,1,-1.48286853528974,NA,0,0
-"290",0.128434032925753,70.6052978815381,1,1,-0.71213118391869,NA,0,1
-"291",0.985443389404868,65.6013677357227,0,1,-0.277137445806187,NA,0,1
-"292",0.183247523067191,61.5814754090066,0,1,-0.00722792989139269,NA,0,1
-"293",-1.76622921343046,48.3582677768336,1,0,0.505992335498047,NA,0,0
-"294",-0.620533696615944,63.0968619619025,0,0,1.23163275222356,NA,0,1
-"295",1.65604303699874,58.215061851263,1,0,0.276685312887525,NA,0,0
-"296",1.80980538558388,53.4547542797855,1,0,-2.09970117138004,NA,0,1
-"297",-1.17503676801214,75.7615726400038,1,0,-0.803775430626082,NA,0,1
-"298",-0.366703258827641,52.9859626261871,0,1,0.804082706755595,NA,0,1
-"299",0.353625448927287,62.4287638937281,0,1,-0.177674868331488,NA,0,0
-"300",0.319156220867898,65.1603575646107,0,0,-0.494156769663437,NA,0,0
-"301",-0.579956989019402,70.5115476204457,0,1,0.980121542524162,NA,0,1
-"302",-0.953278701946907,63.6730439621612,1,1,-0.10056813602669,NA,0,1
-"303",-0.179428586945621,79.7321531497352,1,1,0.970848486101545,NA,0,0
-"304",1.0098082149443,64.4513674789454,0,1,-0.310366879864164,NA,0,0
-"305",0.0236266149214088,55.1151983150947,0,1,0.654996309218518,NA,0,0
-"306",-0.649028219583901,61.0251153583565,1,0,-0.376913155880549,NA,0,0
-"307",-0.504374222048191,66.6912924560925,1,1,0.510511530249471,NA,0,0
-"308",1.61439149545836,38.0949119182938,0,1,0.43622840726424,NA,0,0
-"309",-0.446959811255663,59.0443594865297,1,0,-0.113637816241966,NA,0,0
-"310",0.763176761113315,60.4152570851599,1,0,1.33940885629495,NA,0,1
-"311",1.4717186861858,57.6139912610199,0,0,-0.0218513586569336,NA,0,1
-"312",0.443664903687416,60.5136853648673,1,0,0.373164528251115,NA,0,1
-"313",-0.421721870070189,40.0819363979294,1,0,0.975058565890974,NA,0,0
-"314",-0.0400016262060736,63.4623628474444,1,1,0.428690951318362,NA,0,0
-"315",-0.492279968227887,60.0302934280113,1,1,-0.271214166419369,NA,0,0
-"316",1.22771711731275,55.0757149923363,0,0,-1.46472965857938,NA,0,0
-"317",-0.149553565244392,31.3558127740949,1,1,1.12136237253886,NA,0,0
-"318",1.54998338398246,55.8878726310216,1,0,-2.09093337363249,NA,0,0
-"319",-0.561612535381314,61.1820853805227,1,1,0.594813435824133,NA,0,1
-"320",-0.647117250632361,59.2883923223199,0,1,-1.22252855803645,NA,0,1
-"321",0.143132159050427,53.913025710452,1,1,-0.0678768826920807,NA,0,0
-"322",0.0241886479910512,63.2164528804192,1,0,-0.0834526806469843,NA,0,1
-"323",-0.504451524580869,50.895104679573,1,1,-1.77454980173004,NA,0,1
-"324",-1.58139680993034,66.9928104731946,0,1,-0.651151381128995,NA,0,1
-"325",0.030066421255418,68.2573237519313,0,1,-1.27796221073948,NA,0,0
-"326",-0.716576699070701,69.5754341191487,1,0,-1.07304892113685,NA,0,0
-"327",1.08261095547612,67.9884627036529,1,1,-0.20598745128514,NA,0,0
-"328",-0.952685454939293,56.4787278994997,0,0,-0.474454059463991,NA,0,1
-"329",1.12648272900916,59.7505317176422,1,0,0.736065656861654,NA,0,1
-"330",-0.649043017379116,72.4507337169484,1,0,-1.01819225085773,NA,0,0
-"331",0.292470078406726,66.1094195854966,1,1,-0.78662902278494,NA,0,1
-"332",0.898702724402737,63.4688303266061,0,0,0.0376161805801649,NA,0,1
-"333",-0.518742356791874,72.7917971095465,1,1,-1.85714141506033,NA,0,1
-"334",0.554438552345125,53.7315415813036,0,1,-0.168876560809693,NA,0,1
-"335",-0.0879736711840128,60.2526327160146,1,1,-0.247124779081277,NA,0,1
-"336",-1.13521292940752,73.0150900263725,0,0,-0.954434193635686,NA,0,1
-"337",-0.270079610999902,57.5021030543229,0,0,-0.550610577398466,NA,0,1
-"338",1.61978987753494,61.349983447954,0,0,-1.64139744423389,NA,0,0
-"339",-0.21413117369664,58.5332676938911,1,1,-0.781575921650955,NA,0,0
-"340",-0.817782462924957,72.3393059413951,1,0,-0.394591207093351,NA,0,1
-"341",-0.0540229240816071,61.3542598236762,0,1,-0.469383314539063,NA,0,0
-"342",0.330141609857869,60.672402924147,1,0,-0.735999128417405,NA,0,0
-"343",0.955324614657399,53.1877789148867,0,0,-2.1491238970204,NA,0,0
-"344",1.14395987779244,55.815370597461,0,0,0.132892471811892,NA,0,1
-"345",0.100522398375063,44.3078579091802,0,0,-1.22840510433402,NA,0,0
-"346",1.16457524565738,64.0958654077477,1,1,-0.24767481305761,NA,0,1
-"347",-0.764259947799148,68.4780319293457,0,1,0.0465609630034831,NA,0,1
-"348",-2.3445134046903,46.6261569044753,0,1,0.402484703932059,NA,0,0
-"349",-0.471683425529536,64.0135472863779,1,0,-0.112161426828015,NA,0,0
-"350",-0.51585548950739,63.2893589749903,1,1,-0.567274117442933,NA,0,0
-"351",-2.31603615150256,61.9532100004021,0,0,-0.992337662046278,NA,0,1
-"352",0.562471756969533,60.7146261011216,1,1,1.03936805870832,NA,0,1
-"353",-0.783775139422381,76.349332608997,1,1,0.0598427201580219,NA,0,0
-"354",-0.226053986228165,63.330070775793,1,0,-1.14933906139856,NA,0,1
-"355",-1.58710298765404,74.0181262288016,0,0,-1.87055140838007,NA,0,0
-"356",0.547524201868316,64.5101702086016,1,0,0.462124610549455,NA,0,0
-"357",1.89122701735548,64.8732359573994,0,0,0.286521464155645,NA,0,1
-"358",-0.878077110224044,54.6026254488847,0,1,1.08188443726883,NA,0,0
-"359",-0.112558911044787,86.9090741575432,0,1,1.09134840256743,NA,0,1
-"360",1.94871306497599,59.4018392431068,0,1,2.25769840603041,NA,0,1
-"361",0.933816332207727,55.0252413213957,0,0,-1.28702661360351,NA,0,0
-"362",1.91305942169705,63.8241714991964,1,0,-1.22080303572,NA,0,0
-"363",-0.00523405793193957,64.0918017843792,0,1,-1.50047956507908,NA,0,1
-"364",-0.152260048921635,67.2374592278896,0,0,-0.590727059737978,NA,0,1
-"365",-0.509631657179118,50.8507542562753,1,1,0.767959651027252,NA,0,1
-"366",1.43457370342612,58.6281490848044,0,1,0.409779947427045,NA,0,1
-"367",-1.28583853233299,75.7429327787333,1,1,0.973400941237074,NA,0,0
-"368",0.307314215255018,55.1708229695724,0,0,-1.36392238722292,NA,0,1
-"369",-0.046318533277383,56.4380948688943,0,0,-0.0115796151778788,NA,0,0
-"370",2.25184180012688,66.1022227422258,0,0,0.482908374800972,NA,0,0
-"371",-0.608033728609708,53.9396485263296,0,1,0.15500012159551,NA,0,0
-"372",-1.50928816985104,71.3451200339725,0,1,1.21508975014897,NA,0,0
-"373",0.232631769508576,64.2246828892729,0,1,-0.83778584449963,NA,0,1
-"374",-0.0396486960510932,60.8954971140714,1,1,0.621594843184185,NA,0,0
-"375",-0.839125067505345,44.0828600603638,0,1,0.155524900096752,NA,0,0
-"376",0.132291088244156,56.5283298575697,1,0,0.408982790756931,NA,0,0
-"377",-0.275524746705939,80.3429598442535,0,1,-1.57755448926486,NA,0,1
-"378",-0.678758641268032,57.1409484440775,1,1,0.836836094188595,NA,0,0
-"379",0.500835931775825,66.5437878298221,0,0,0.723514559652086,NA,0,1
-"380",-0.331662312006789,50.7525238303052,1,0,0.13147873919411,NA,0,1
-"381",-1.83498025077253,44.0929696074596,1,1,-0.347255182614316,NA,0,0
-"382",-2.65174124837268,67.093984696494,0,0,1.57682166298993,NA,0,0
-"383",-0.580582172084415,74.4932679629708,1,0,-0.196836397057139,NA,0,0
-"384",1.45418687087119,44.5561969061596,0,0,0.410923281958588,NA,0,0
-"385",0.838129379964693,68.460692482214,1,1,-0.553991035221659,NA,0,0
-"386",1.21505358044614,63.3093247067837,0,0,-0.175077428542885,NA,0,0
-"387",0.982505442732795,56.3381268914086,0,1,-2.57507720718692,NA,0,1
-"388",0.315764024018431,77.4928498341616,1,0,-0.986967654557719,NA,0,1
-"389",-1.50706262771632,49.94855038544,1,0,1.18030861235152,NA,0,0
-"390",0.205569787166277,58.8256070115816,0,1,-0.311600897935701,NA,0,0
-"391",1.59722809034381,66.685055465381,1,0,-1.28928390016459,NA,0,0
-"392",-3.39606353457436,49.1752597856717,0,0,-1.55573527655244,NA,0,0
-"393",-0.781352284302082,71.0596780578619,1,0,0.351864193767146,NA,0,0
-"394",1.10246464075191,58.8584661806551,1,1,-1.02361871850118,NA,0,0
-"395",0.528745019445293,72.291483044291,1,0,2.5831692892774,NA,0,0
-"396",0.789394403514854,59.7670318390833,0,0,1.22632376791003,NA,0,1
-"397",0.457099505856081,52.6489453337983,1,1,1.08451182102266,NA,0,0
-"398",0.538833115658123,46.5948516686648,1,1,0.126743197570183,NA,0,0
-"399",0.0146431224111012,46.2693410762816,0,1,-1.12144201654816,NA,0,1
-"400",-0.916489135703093,58.5094290602607,1,0,0.289207990325689,NA,0,1
-"401",-1.22681509076766,39.9372404080284,1,1,0.638154888999721,NA,4,0
-"402",0.0361528681302234,44.4043146258191,0,1,-0.0953672363546544,NA,3,0
-"403",-0.42139311357139,79.6270156915108,1,0,-0.806979203185191,NA,3,0
-"404",-0.899364408462905,58.2168177639437,1,0,1.60819567906429,NA,2,1
-"405",0.417441317486562,73.5762067316143,0,0,0.411553303651001,NA,4,0
-"406",0.15344473686401,76.4899204713817,0,0,-0.402484324529066,NA,2,0
-"407",1.46328304519325,61.1948457059363,1,0,0.110962357974858,NA,0,0
-"408",-1.12150249732554,60.0362061633761,1,1,0.654036068679298,NA,3,0
-"409",-0.517788083055024,61.9644344592465,0,0,1.06906574617112,NA,2,0
-"410",-0.0749470883385233,66.797395067307,0,0,-1.88341014148936,NA,2,0
-"411",-1.40779008143908,68.0112484496439,0,0,-1.4429269621452,NA,1,1
-"412",-0.284705868110477,39.8479942022391,1,1,0.461029846216729,NA,2,0
-"413",-0.708171178213376,60.4739598808669,0,0,0.598279919438516,NA,4,0
-"414",-2.14763900113752,66.1309660685985,1,0,-0.8844762950372,NA,4,0
-"415",-0.283837160611358,58.299815189466,1,1,0.360707882967097,NA,5,0
-"416",-0.534072163976087,62.2575952443179,0,0,-1.69402405553405,NA,3,1
-"417",1.1330101941542,68.9240414112506,0,1,-0.462399627624698,NA,3,0
-"418",-0.604068922847656,58.5238690124354,0,1,1.02525119985162,NA,2,0
-"419",0.557511595592472,75.3718231037047,1,1,0.123430942155291,NA,4,0
-"420",0.142629293255087,64.5261935487076,1,1,0.756816132810634,NA,5,1
-"421",-1.23686021484532,44.0430574742946,0,0,-0.987323492483483,NA,2,1
-"422",0.37414397250319,58.1283470796627,1,1,0.140070935388242,NA,5,1
-"423",-0.106086123466453,65.6763446812895,1,1,-1.53748694519716,NA,4,1
-"424",0.166061986509049,61.626791167636,0,0,0.0257718325228052,NA,0,1
-"425",0.658034848342406,46.2801698707418,1,1,-1.02303046739119,NA,2,0
-"426",0.113962919971386,41.1604748830867,1,1,-1.75905799813544,NA,4,0
-"427",1.58515916403119,59.3195299183527,1,0,-0.888458790043248,NA,1,0
-"428",0.115553440443324,66.8730693385291,1,0,1.96280409188127,NA,3,1
-"429",-1.25399903928352,52.9199927122785,1,1,-0.494058066622838,NA,3,1
-"430",0.74186477930074,63.7767726881616,0,1,-0.489052716862196,NA,2,0
-"431",-1.22290161499985,65.8075773927448,1,1,-0.531672209620644,NA,2,0
-"432",-1.91889413176211,69.7462609239334,1,1,-0.70578379825765,NA,1,0
-"433",-0.809936389920794,65.8745250933105,1,0,-0.787245598526667,NA,5,0
-"434",-0.744677228785278,62.4527877882343,1,0,0.0407062842653212,NA,4,0
-"435",-0.607505922693199,44.339962958283,1,1,-0.160257426739252,NA,3,1
-"436",1.60675956606494,54.9187185190614,1,1,0.707436075130338,NA,3,1
-"437",1.63559944628363,68.5827885392439,0,0,0.67259647060077,NA,1,0
-"438",0.726091096698492,64.518667192148,0,0,0.444869491301001,NA,4,0
-"439",-1.27383492504346,51.0209576314687,1,1,-0.132896094565712,NA,2,1
-"440",0.0231465554333402,63.1901114488487,1,1,0.236582456988663,NA,1,0
-"441",0.487121233826478,59.0603548942023,1,1,1.11388056515839,NA,5,0
-"442",-0.137027503269731,72.6430832492208,1,1,-0.109997400872429,NA,3,0
-"443",0.171745482456009,59.6599063472136,1,1,0.041185051013977,NA,1,1
-"444",-0.944793914899987,56.1183096987207,0,1,-1.79925084231441,NA,1,1
-"445",-1.28762030600621,57.3954516809066,0,1,-1.05031744528165,NA,1,1
-"446",1.40776573875103,51.2256548591289,0,0,0.380665228128833,NA,2,0
-"447",0.26737268905316,72.9275143050665,1,1,-1.49439331682552,NA,3,0
-"448",-0.408397642146012,61.6518785360755,1,0,-0.0653339484432832,NA,3,0
-"449",0.325271219916941,53.967929242885,0,1,-0.364169952975103,NA,4,1
-"450",2.06248097311935,63.9860390080151,1,1,0.845529325639494,NA,1,1
-"451",0.362666955663951,68.5895212545866,1,0,0.435429302544269,NA,0,1
-"452",1.4114046447039,55.7808753143062,0,0,-0.53413789339427,NA,1,1
-"453",1.36754431457543,56.945111105703,0,0,0.943239360539817,NA,3,0
-"454",-0.407176147121539,59.4081198549152,0,1,0.301835461893759,NA,3,1
-"455",0.762541730885853,39.9812781204761,1,0,0.0574454909242895,NA,5,0
-"456",-0.651200823031723,61.5543642135745,0,0,1.17823339900018,NA,5,0
-"457",-1.47365577946832,60.4256222751568,1,0,0.294456918483786,NA,3,0
-"458",-1.20166579640511,57.3796080120584,1,1,1.14526303524807,NA,0,0
-"459",-0.148720478130092,49.788067416777,0,1,-0.154182678560202,NA,4,1
-"460",1.79706242855231,70.6535710343492,1,0,0.636476192810274,NA,2,1
-"461",0.104808717855571,55.4240916212448,0,0,0.910773865410215,NA,3,1
-"462",-0.802837973351476,61.3530391287928,0,0,-0.662806250607168,NA,3,1
-"463",0.230374964163675,81.1278759570127,0,1,-0.290630354786015,NA,1,1
-"464",0.697975193218088,54.081663046136,1,0,-1.46382575920573,NA,2,0
-"465",-1.29520173194588,53.3126210595422,1,0,-0.109033197419866,NA,1,1
-"466",-1.05468021797766,61.5387874093147,0,1,-0.718369922643492,NA,1,0
-"467",-1.94160748967507,57.1460838840075,1,0,-0.298180863282341,NA,1,1
-"468",-1.26956059300372,55.0730356079902,1,0,-0.605295550729906,NA,0,1
-"469",-0.887438810816108,64.097715393881,0,1,-0.377983953309427,NA,0,1
-"470",-0.291915762703194,62.2935645078686,0,0,-0.521744921541124,NA,5,0
-"471",1.41009405297303,49.2537436668267,0,1,-0.61820601713851,NA,2,1
-"472",0.88864143026394,46.1924914556153,0,1,-0.436407984532611,NA,1,1
-"473",0.272974319690261,65.9823324144569,0,1,1.36410917391319,NA,1,1
-"474",-0.566641466026189,51.6053135785225,1,1,-0.948246264578507,NA,0,0
-"475",-0.0402069374365745,50.4457493995265,0,1,0.901429133926489,NA,3,1
-"476",0.547612245301063,72.3621807067256,0,0,-0.800665134230229,NA,5,0
-"477",-0.504033321605536,60.759841521696,1,0,0.307890055852477,NA,1,0
-"478",1.0590701295588,54.6042709236522,1,1,-0.910998770565304,NA,3,1
-"479",1.09745356643579,67.7353811942751,0,0,-1.07592057138663,NA,1,1
-"480",-1.16650462962559,73.0430052382949,0,0,0.445539081785094,NA,5,1
-"481",-0.74691131008606,69.8510997410927,1,1,-0.198342170801574,NA,4,1
-"482",1.20673873333927,49.8827848931382,1,0,-1.50647668921738,NA,3,0
-"483",-1.68721988054335,53.9397097428161,1,1,0.725739408768338,NA,4,1
-"484",0.419709894906327,63.1672605550687,1,1,-0.583429191806288,NA,4,1
-"485",0.233498761638796,46.3530889135471,0,1,0.140482006085695,NA,5,0
-"486",3.19590119785739,56.1803372047234,0,0,0.360450314117149,NA,4,0
-"487",-2.72968034122695,59.8570725884825,0,1,-1.21243708583669,NA,3,0
-"488",-0.840795801424047,59.3458186800103,0,1,-0.675731348315961,NA,0,0
-"489",0.674028668650456,62.2343241886172,1,0,1.5696434732764,NA,0,0
-"490",1.67432650967139,61.8318216667889,0,1,0.619497282943088,NA,3,0
-"491",0.839687083840494,68.9066124222916,0,0,-1.43622916032155,NA,1,1
-"492",-1.42836843555519,59.4692815517991,1,1,-0.314778857388245,NA,3,0
-"493",2.23938816916782,51.7014188448492,0,0,-1.14509720885026,NA,4,0
-"494",-1.75726622410175,58.7897090217489,1,0,1.04451293282252,NA,3,0
-"495",-1.11301608790466,57.9034195656516,0,1,-0.00689052928968512,NA,2,0
-"496",-0.0431403906133336,60.3958956171276,1,1,-0.290987619256496,NA,4,1
-"497",2.2255869089066,74.8099019747618,1,0,1.01085610285753,NA,4,1
-"498",0.50631658733822,65.6666005839204,0,1,1.1116436393317,NA,0,0
-"499",0.730396051435768,77.8958035863141,1,0,-2.29398597219737,NA,4,0
-"500",1.72807759473682,71.1404785115473,1,1,0.65588476565193,NA,1,1
-"501",0.984779968314556,77.9407452864789,1,1,-0.399908439414822,NA,5,1
-"502",-1.22473787596427,46.3545110010355,0,0,0.706900239659698,NA,3,0
-"503",0.709726218065825,52.9256001500028,1,0,-0.841793729346687,NA,3,0
-"504",-0.109219992578064,54.4371565411251,1,0,0.627844674789504,NA,4,0
-"505",1.78260789538833,56.8991890624728,0,0,-0.865794010742884,NA,2,1
-"506",-0.243444679602975,56.2382066427167,1,0,0.315109395167471,NA,2,0
-"507",-1.52710702225461,64.5852600842686,1,1,-1.23613033525829,NA,2,1
-"508",0.491834371466622,47.3885085056887,1,0,0.0770462714583238,NA,1,1
-"509",0.354503660926992,54.7253483679385,1,1,2.08698542160568,NA,3,1
-"510",-0.0176263476303775,54.4318577820146,0,0,1.21353587898004,NA,4,0
-"511",-1.05655032211723,56.2556127378398,1,0,0.888689083732765,NA,3,1
-"512",-0.838914666078269,53.9560001901894,1,0,-1.04095179499479,NA,4,1
-"513",-0.0126257164904259,72.0312707053266,1,1,1.31100543436612,NA,5,1
-"514",1.03811014600433,67.6887323340547,1,0,0.858565664121565,NA,5,1
-"515",-0.364457713905061,62.3835102324136,1,0,-0.549804243968874,NA,4,0
-"516",-0.871449472185395,45.8497185861131,0,1,-0.331829226096368,NA,2,1
-"517",-2.7042030254467,89.3377438660602,1,1,-0.60923508470323,NA,0,0
-"518",0.494249836540647,69.3502575735832,1,1,0.828145323839061,NA,2,0
-"519",0.427340324999113,72.3388452172586,1,1,-0.86453131416218,NA,1,1
-"520",1.30919086010968,41.0861527820195,1,1,-0.342300730886153,NA,1,1
-"521",1.4667226423107,57.7734870688206,1,1,-1.1273804260992,NA,0,0
-"522",-0.143056066294616,67.6812752836092,0,0,0.725663561640484,NA,3,1
-"523",1.29851372621074,63.8795365826285,0,1,-1.50442814718175,NA,4,1
-"524",0.00131354755592194,66.0913303155833,1,1,-1.22740506524656,NA,0,1
-"525",-0.676410075685581,65.9051860827632,1,1,-1.53475342481241,NA,0,0
-"526",-0.848196596885468,55.6485924671455,1,0,-0.631744162117857,NA,1,1
-"527",-0.346941100407436,57.4923010150099,1,0,0.287560601294239,NA,1,0
-"528",0.227479443027071,50.7373061627669,0,1,-1.36246786870891,NA,2,1
-"529",-0.0274014036111113,70.8743580325344,0,1,1.48183223446739,NA,1,1
-"530",0.200877790585033,40.5604078409579,0,0,0.206621169315406,NA,3,0
-"531",0.198942671822796,57.1937956166365,1,1,-0.448302654690569,NA,4,1
-"532",0.71459935608369,68.1207755667137,0,0,0.0588951606055983,NA,1,0
-"533",-0.273181774889849,57.9229631299196,1,0,0.412204811749354,NA,5,1
-"534",1.63534745793173,74.5075731996683,0,1,0.172681707982134,NA,1,1
-"535",0.437693886479643,68.4149317815536,1,1,0.254266315050371,NA,4,0
-"536",-1.19353585074908,51.3426217787062,1,0,0.00467235118544757,NA,1,1
-"537",-0.276649804199181,59.4012219329904,0,0,0.379782069197037,NA,1,0
-"538",0.515883050535119,60.1153115324976,1,1,-0.116966273461431,NA,4,1
-"539",-0.411055332086343,62.6911145139178,0,0,-0.235452686427768,NA,2,1
-"540",0.344995049472574,54.7183949407659,0,1,-1.50127941204146,NA,4,1
-"541",0.434395860831138,53.8292615863329,1,1,1.22310588631937,NA,3,0
-"542",1.30347768300503,57.3762748497832,0,0,-0.590962958826448,NA,2,1
-"543",-0.651695910733076,69.1234717100544,0,0,0.838119396605941,NA,3,1
-"544",-0.0168808623279884,71.2361058688565,0,1,-0.593254569869571,NA,1,1
-"545",-0.567692455293591,53.9648028631747,0,1,1.26517771265799,NA,1,0
-"546",-1.12713286976328,60.5532339363837,0,1,-0.904355807064555,NA,2,1
-"547",-1.84897031117903,49.9934156711519,1,0,-0.505215327863061,NA,2,1
-"548",0.19552905518189,52.4182117086078,0,1,-0.0855138246373087,NA,3,1
-"549",0.601565225195587,53.8856418549474,0,0,0.0398872990007094,NA,4,1
-"550",0.639158159789933,46.8327773679206,1,1,0.691657670066614,NA,2,1
-"551",-1.38999281746208,39.9258498278283,0,0,1.6034764267485,NA,1,1
-"552",-1.12967785339758,67.6800212752914,0,0,0.835300881754122,NA,2,1
-"553",-1.14115419483757,64.0576131350423,0,0,-0.103643379450682,NA,4,1
-"554",-0.688671499600886,51.6828412108396,1,0,-0.0872413331772054,NA,4,1
-"555",0.170197818152129,69.5807577311228,1,1,-1.82243053694214,NA,2,0
-"556",0.59448342725741,78.1071925143842,1,1,0.520602167106094,NA,1,0
-"557",-0.815032650724957,34.499855151722,1,0,0.65845054053139,NA,3,0
-"558",-2.86434683706673,71.5025508071742,0,1,1.53474704885865,NA,3,0
-"559",0.95382348170826,72.2789365442077,0,1,-0.608853092026856,NA,4,1
-"560",0.615284770613444,61.5210316268381,1,1,-1.25002578560839,NA,4,1
-"561",-0.700870700550263,54.7691619081263,1,1,0.050433788343753,NA,3,1
-"562",-1.07366134146979,48.9840663565416,0,0,-0.817017518601282,NA,5,0
-"563",-0.299991196799737,51.2026971858702,1,1,-1.20516090614205,NA,5,0
-"564",0.108106562802571,53.6329974224591,0,1,-0.812208718091528,NA,5,0
-"565",0.723200763587674,38.940685691571,1,1,-0.126940403277922,NA,0,0
-"566",1.39009509607427,53.5374163987757,1,1,0.425982751248675,NA,1,1
-"567",-0.848883398378404,69.8679342631946,0,0,0.526296647949334,NA,3,0
-"568",0.290722844643498,52.4509985406741,0,1,0.840636091160457,NA,2,0
-"569",1.33417603370242,75.6520486289813,0,1,-0.1230836694655,NA,4,0
-"570",-0.901342155259083,59.4995073900847,1,1,-1.1952174160169,NA,5,0
-"571",-0.222763412932851,61.110287061467,1,0,0.297898909938711,NA,2,1
-"572",-1.7714680282034,64.7547104599742,0,1,0.269973033352478,NA,5,0
-"573",0.287422250114445,50.6099609529286,0,1,-0.487325909162453,NA,1,1
-"574",-0.641191762323999,52.2595438035702,1,1,-0.21760252906034,NA,1,1
-"575",-0.924937642238947,55.7644153276736,0,0,-0.175365093537259,NA,5,1
-"576",-0.779977469185323,77.7287949441054,1,0,0.328622386725262,NA,1,1
-"577",0.444503373594371,64.265506583303,1,0,-1.25986632646462,NA,4,0
-"578",-2.16246900261166,59.0552982691128,0,1,0.118448413456417,NA,3,0
-"579",-0.425510899920945,48.8340456618434,0,1,1.28970835771858,NA,4,0
-"580",-1.01371765920455,59.1256485347046,1,0,-1.62452495612896,NA,0,1
-"581",-0.962119028303747,63.9252609017327,1,1,-0.6251284659458,NA,1,1
-"582",0.474634627277364,56.3937783074627,1,0,0.898480565702257,NA,2,1
-"583",0.351754269604537,51.1201563838252,1,1,2.69864035559375,NA,2,1
-"584",-0.107020356246981,50.5358344620414,0,0,-0.476011271828003,NA,4,1
-"585",0.537517256600346,57.8008844392544,0,1,0.768070900586023,NA,0,0
-"586",-0.60210489844813,63.4222011692021,0,0,-1.60136744074432,NA,3,1
-"587",-2.02329764533368,69.6992412941553,0,0,-0.895774214016735,NA,3,0
-"588",1.15442128059814,46.6454685980288,0,1,0.0343952662533795,NA,4,0
-"589",-0.230692997694109,51.9912052459385,0,0,-1.68357887922913,NA,0,0
-"590",-0.365088768214911,39.5320365581593,0,1,0.598280628779714,NA,0,1
-"591",0.157517929375723,61.1005621819368,0,0,1.5794687911285,NA,0,0
-"592",0.765932222984396,67.161003737353,0,0,-1.71301936747456,NA,4,1
-"593",0.426086966202313,62.5412060237561,1,0,1.45877019242422,NA,4,0
-"594",0.927265399965311,72.2876218801499,0,0,-1.13148864482625,NA,1,0
-"595",0.0461345928404875,65.8340400595612,1,1,-1.21309833985091,NA,0,1
-"596",-0.504285643091111,54.1287522627104,0,1,-1.1964310966019,NA,4,1
-"597",-0.300449408355562,74.466653579162,1,0,0.141451563358798,NA,2,0
-"598",0.822915978645871,64.4339403542803,0,0,-0.393832432131072,NA,1,0
-"599",-0.849268796129039,65.7794688571545,0,1,-0.0619582961780062,NA,3,1
-"600",-0.953610095681196,50.7816503612272,0,0,-0.287800864267034,NA,0,1
-"601",0.313478715882576,57.5067329483484,0,0,-0.825144206271538,NA,2,0
-"602",0.612473307433314,47.7810135362433,0,0,0.347168217045257,NA,0,1
-"603",-1.69108322753688,59.2746035395449,1,0,-0.920092945237873,NA,4,0
-"604",0.784698627442325,73.6490124503674,0,1,-0.287336465542101,NA,4,0
-"605",0.0119251352600229,55.4246898015577,1,1,-0.551130291554083,NA,1,0
-"606",-0.181255287869961,60.6520021936009,1,0,0.848645639995586,NA,3,1
-"607",1.10695440248067,67.839803359237,1,0,1.30351300254165,NA,2,0
-"608",1.47939487098826,44.122919268352,1,0,-0.528633316317668,NA,3,0
-"609",-1.14739944099984,46.3063568676183,0,0,0.762950659152549,NA,0,1
-"610",1.011654376514,50.7248019737764,0,0,1.79031043197109,NA,5,1
-"611",-0.632069194974882,63.0084155983022,1,1,0.515653432567394,NA,0,0
-"612",0.13236131951909,60.8046894610394,0,0,0.50995632659946,NA,2,1
-"613",0.480926064725324,72.4674683051722,1,0,0.877534755180437,NA,2,1
-"614",0.133177627584607,47.6555089979498,1,0,-0.305159380450246,NA,4,1
-"615",-1.46286446232819,55.6093070567735,0,1,-0.998621027342654,NA,2,0
-"616",-0.506401780474741,38.984060469444,0,0,0.779717443181239,NA,1,1
-"617",1.53966323733755,76.1875617137326,0,1,-0.202455829498655,NA,4,1
-"618",0.172793618387291,70.8432849074413,0,1,0.0576926549857799,NA,3,1
-"619",-0.835823924079678,53.5919909308699,0,0,-0.784453934886022,NA,1,0
-"620",-0.655928703879608,57.4718017374294,1,0,-2.30477854744121,NA,0,0
-"621",1.05756608653532,58.6517072677753,0,0,0.705845185436105,NA,1,0
-"622",0.346083489865367,77.9535599171383,1,1,0.285270288864788,NA,1,0
-"623",-0.897186541993694,60.3839671736902,1,0,-0.185219169053537,NA,5,1
-"624",0.00626521125563042,64.1299828889006,1,1,-1.53285677380564,NA,1,0
-"625",0.59667050907573,66.9957212296699,0,1,0.0330032767412004,NA,3,1
-"626",1.72255180794401,66.8979094457615,1,1,-0.689865477443077,NA,4,0
-"627",-0.881749483409345,43.2894053470812,1,0,-0.660371007482841,NA,3,1
-"628",-0.379945955166868,67.4004830752263,0,0,0.88886063882028,NA,1,0
-"629",0.996110015873762,50.7587510088624,1,1,0.878608946597332,NA,1,1
-"630",-0.459120892943841,59.6831142668903,1,1,0.366984772412494,NA,2,1
-"631",1.46026574621257,40.7220701847977,1,0,-0.305016749827467,NA,5,0
-"632",1.04324617695035,62.1430302967833,0,1,0.0843869852438986,NA,2,0
-"633",-1.65904097034683,49.9496113449304,1,1,-0.317787011202578,NA,4,1
-"634",0.423957776227396,38.2104650771021,1,1,-0.882748434256429,NA,3,1
-"635",-1.41916292618524,43.2745086944509,1,1,-0.420131846914995,NA,5,0
-"636",-1.46469245667068,45.7927786444898,1,0,0.211772323155758,NA,2,0
-"637",0.0704209721478317,65.5053163877836,1,1,0.178742568384748,NA,4,0
-"638",0.0566982219802716,63.1043317691679,1,0,0.159911059881684,NA,1,1
-"639",-0.0327081772655874,66.8507391296196,0,1,2.34022278036937,NA,2,1
-"640",1.26428122406637,54.6808950918317,0,1,-2.08364389013229,NA,2,1
-"641",0.294987885730239,61.8178545986328,0,1,-1.65874663082723,NA,1,0
-"642",-0.673086820504489,64.1159617779636,0,0,0.586514202862168,NA,5,0
-"643",0.119588291490956,59.1494095104168,1,1,-0.00838215866917112,NA,1,0
-"644",-0.514396508991294,48.9329379646391,1,1,0.263408012095898,NA,5,1
-"645",-0.0681115738665369,71.2913359573999,1,1,-0.861975614928186,NA,3,0
-"646",-1.14801191183674,76.6801562437414,0,0,-1.82646160006687,NA,4,1
-"647",-0.115244480129169,80.1910292593947,0,1,0.157684692593684,NA,1,0
-"648",0.102936391849009,68.9254803557245,0,0,0.457828090493501,NA,3,1
-"649",-0.701743487103139,58.1279531679218,0,1,-1.81147770998711,NA,3,1
-"650",0.041230624270294,58.723025843893,0,1,1.16992415129467,NA,3,0
-"651",-0.648768350598479,56.9466986034193,0,0,1.36303655425287,NA,3,1
-"652",-0.644007859466443,57.3713316214369,1,0,-0.248942773977312,NA,2,0
-"653",-0.204438825066372,59.4853135557367,1,1,1.09936446212006,NA,3,1
-"654",0.490840917661593,40.2733664026596,0,0,-0.107551492563844,NA,5,0
-"655",-0.20533698727053,60.1122825038515,0,0,0.0894339240327522,NA,2,1
-"656",0.190691323925256,62.3534485729383,1,0,0.606327319614907,NA,3,1
-"657",0.970024192661124,50.4963014647889,1,1,-0.0804754643912515,NA,3,0
-"658",-0.285261077847069,42.4285202480467,0,0,0.810892128943754,NA,1,1
-"659",-0.103619268541766,56.4003934908483,0,0,-0.520685306740175,NA,1,0
-"660",0.511254541203706,28.7841044157547,0,1,-1.311726827066,NA,1,0
-"661",-1.25816245001572,76.7254711224307,1,0,0.13926896706412,NA,1,0
-"662",0.543825968956194,66.9054981644897,1,0,2.05028132773367,NA,1,0
-"663",-0.534963586582989,50.2332967992888,0,0,0.408030144810329,NA,2,0
-"664",-1.13804602678007,60.1513417560489,0,1,0.205003350637751,NA,4,1
-"665",-1.76800377087565,58.3454582474957,1,0,-2.25286871899928,NA,1,1
-"666",-0.714978202867742,43.3978663147313,1,1,-0.565403743167805,NA,3,0
-"667",-1.17020226435962,51.215656197345,0,0,0.950213555862081,NA,3,0
-"668",-1.17844814841946,69.6581583860238,0,0,-0.372804808189972,NA,2,1
-"669",1.62729133795333,48.44178693266,1,0,-0.1492126428995,NA,5,1
-"670",-1.21214666562853,52.486042883844,0,1,-0.0922176935601109,NA,4,0
-"671",0.997509917441285,52.5491564414526,1,0,-0.574496620028689,NA,2,0
-"672",0.545245641420171,72.2705248547405,1,0,-1.96122664049141,NA,3,1
-"673",-0.88478911517512,58.0914200617858,0,0,0.971325375264889,NA,3,0
-"674",-0.611230697881371,65.5431074597468,0,1,-0.713199115251178,NA,4,1
-"675",-0.410199512833357,67.7143312710373,1,1,-1.51317945394904,NA,1,0
-"676",0.190409369836815,56.1993320020924,0,1,-0.687123937818725,NA,0,0
-"677",-0.348324928959338,48.7144861164402,1,0,0.0118395408472662,NA,4,0
-"678",0.826847530284988,61.7010177703998,0,0,-0.116434978511697,NA,3,1
-"679",-0.292686902302723,56.4145766794893,1,0,-0.0422275015339314,NA,2,0
-"680",-2.16534620996324,74.429384906866,0,0,0.762295840447976,NA,2,0
-"681",-0.0962167762033804,54.8005313675673,0,0,-0.726095094064359,NA,0,1
-"682",-1.85195684624858,56.8935790560068,0,1,-0.0677972470128565,NA,2,1
-"683",0.937811135232089,66.8273746460155,0,0,-1.26107920253219,NA,3,0
-"684",-0.747686481439497,53.3642157963456,0,1,0.206892040110787,NA,1,0
-"685",-1.11524143570017,59.8204945136309,1,0,1.36441064967075,NA,4,1
-"686",-0.591506415298824,64.9659101653161,0,1,1.14095787081644,NA,3,1
-"687",0.585230683909193,55.957252994943,0,0,0.878779615734706,NA,3,0
-"688",-1.91855819930986,47.8835118978589,0,0,0.809802914368629,NA,2,0
-"689",0.253816791504093,63.1274371673616,1,1,0.902337900109669,NA,2,1
-"690",-0.972241766246169,57.2013813581033,1,0,1.38438037496282,NA,2,0
-"691",-0.00851344803801686,39.8014486545528,1,1,-1.14947882024253,NA,4,0
-"692",0.679041911007479,62.6957886317779,1,0,3.04137005820899,NA,3,1
-"693",0.569631366692422,61.3179925086918,1,0,0.308372526803296,NA,2,0
-"694",1.5029616611284,43.2122872961847,1,0,2.63980862992207,NA,4,0
-"695",0.129085401165474,54.7412049669595,1,1,2.15875203633165,NA,4,1
-"696",-0.701182951676618,58.2487537655487,1,1,1.0255478204364,NA,0,1
-"697",2.15370721597723,54.4336923660839,0,0,-0.712636079025502,NA,1,1
-"698",-0.30324613826175,65.5184840910853,1,1,0.547552248004117,NA,2,1
-"699",0.807579776908013,44.2394848671974,0,1,-0.0912041102216604,NA,4,0
-"700",0.930991466126415,50.4570722760887,1,0,-0.0847202595553852,NA,1,1
-"701",0.454902098272933,55.5013657540147,0,1,2.828918800837,NA,3,0
-"702",1.4254206900636,74.4390639515157,0,1,0.386740110051894,NA,1,1
-"703",-0.20899117251275,58.4474591118618,0,1,0.035752971150918,NA,4,1
-"704",-1.23140277909061,49.782810920101,0,1,0.0953234242182932,NA,5,1
-"705",-0.360603577153347,75.3846974718311,1,1,-1.87627347653956,NA,2,1
-"706",-0.547213299484056,57.6882402587354,1,0,-0.836680050194782,NA,4,1
-"707",0.327996007514467,65.5192386215171,1,0,-1.59098150147176,NA,3,1
-"708",-0.933299832093657,71.2179403686998,0,0,0.679609200101788,NA,4,0
-"709",0.680810824800176,64.0954539235249,1,1,0.0394033025755432,NA,4,0
-"710",-1.16388240018636,73.0026152604088,1,0,-1.13110578221052,NA,2,0
-"711",1.75544950988335,52.0907872846478,1,0,1.54246283173142,NA,3,1
-"712",1.44963736425491,75.9861945374855,1,1,0.979382929015041,NA,4,1
-"713",-0.448794941211084,41.7709972649556,0,1,0.479663328297167,NA,0,0
-"714",0.140067288930464,50.2463688602315,1,0,0.640033119859951,NA,2,0
-"715",-0.373650328177582,54.1353934114822,0,1,-1.1331543057143,NA,3,1
-"716",-1.20643411723808,62.3310955180241,1,0,-0.370884779043532,NA,1,1
-"717",-2.66134597494101,55.2655137517466,1,0,0.446182869021833,NA,1,1
-"718",-0.0250944932655905,65.8311098068857,0,1,-1.09814310255957,NA,1,1
-"719",-0.0343732572490637,64.2002944729188,1,0,2.71111481120823,NA,0,0
-"720",1.64235903904485,66.7886090857055,0,0,1.32545606767588,NA,2,1
-"721",-1.99546214753883,74.8246102516947,0,0,1.12590205791424,NA,4,0
-"722",-0.662170758602409,45.7471596223815,0,0,0.0540639496507907,NA,3,0
-"723",0.880037992310033,69.6997498065008,1,0,-1.41726775744781,NA,5,1
-"724",0.342387420001643,57.3453423213813,0,0,1.1157841655732,NA,1,1
-"725",0.121753855523786,51.9689444931483,1,1,1.42687828227381,NA,3,0
-"726",-0.620761034511813,42.2296589808404,0,0,0.317797102510567,NA,1,1
-"727",0.200945267574023,51.5096310837433,0,1,-0.496273411043631,NA,4,0
-"728",-0.235905348023341,57.0052178085398,1,1,0.599696904650334,NA,2,0
-"729",0.113661114970227,40.5975552081015,1,0,0.473425513276744,NA,2,1
-"730",-0.577929195287818,56.3996721236615,1,0,-0.789327412942633,NA,4,0
-"731",1.18856488583833,65.1010618716982,0,0,-0.609905289258947,NA,2,1
-"732",1.05993627812147,47.0202958147036,1,0,0.219664136878533,NA,5,0
-"733",1.33675821086468,59.9515259765206,1,0,0.992447441204347,NA,4,1
-"734",-0.367169171130217,46.5514776511103,0,1,2.26042217472389,NA,3,0
-"735",-0.304336704778395,59.4831581476884,1,0,-1.11485663619506,NA,2,0
-"736",-0.350279624235713,68.9484979335908,1,1,-0.572319724030329,NA,5,1
-"737",1.37414476272803,38.2011772720581,1,0,1.35270086680642,NA,4,0
-"738",-0.299720491875069,83.0641643226721,1,1,0.107868729562373,NA,2,0
-"739",0.0976351442252321,48.3224964444907,1,0,0.633432253650029,NA,1,0
-"740",0.381188028723722,69.4042971867315,0,1,-0.879849328007825,NA,4,1
-"741",0.678212556818783,80.7910159606188,0,0,-0.332886377356052,NA,3,1
-"742",-0.26767327223427,50.5617597672666,0,0,0.324992574993461,NA,3,0
-"743",2.34596566433491,53.2454585487364,1,0,-0.938415415864997,NA,5,1
-"744",-0.927494655524829,70.2165032799741,0,1,1.21315153594942,NA,5,0
-"745",-1.42655717369774,51.8249656933302,0,1,-1.26881966939225,NA,4,0
-"746",0.0423026643898127,50.4287926598901,1,1,0.840155824165012,NA,4,1
-"747",1.76744347000481,75.811192798267,0,0,0.165133587479138,NA,5,1
-"748",0.0527503271453671,77.6644280148737,1,1,-1.3149626812135,NA,2,0
-"749",0.109726039969968,57.7296484111446,1,0,1.70603877388749,NA,5,1
-"750",0.863202615558153,82.8111019497527,0,1,0.177503683488102,NA,2,0
-"751",-0.0989732047696172,79.0374721659382,0,0,-0.393377531770713,NA,4,0
-"752",0.103696546086221,59.3137846349919,1,1,0.578306036031158,NA,5,0
-"753",1.20848487568464,51.8614086073333,1,1,-0.191761909283258,NA,1,0
-"754",0.0937460210527718,53.2674468560267,0,0,-0.282224274118901,NA,3,0
-"755",-0.752196518946083,74.02030463378,0,0,-0.683561574477846,NA,3,1
-"756",0.394924219872661,65.4001660829658,0,0,0.580194679530021,NA,1,1
-"757",-1.09152043398742,66.4227416879848,0,0,-1.06993557582488,NA,3,1
-"758",-1.45829242499569,59.8728023501304,1,1,0.147119675762222,NA,3,1
-"759",-0.122712284955754,61.8287955322963,1,1,-0.17676429931807,NA,1,1
-"760",-1.1009291916684,55.1148541976927,0,0,-0.341774689629685,NA,3,1
-"761",0.580923071839757,71.3523673607646,0,0,0.677529248771208,NA,0,0
-"762",-0.146947489359788,56.7394897775078,1,1,0.42083523424221,NA,3,0
-"763",-0.766334615072354,79.9186253382779,1,1,-0.251818711919797,NA,0,0
-"764",1.62289757854462,65.8677289435209,1,1,0.525188913077835,NA,3,1
-"765",-0.109940544035991,69.8833322371712,1,0,-1.65274959549991,NA,2,1
-"766",1.42170485426206,74.7562393528371,0,0,-0.962670395989128,NA,1,1
-"767",-0.113421048996398,59.5304662014389,1,1,0.0850246742327054,NA,4,1
-"768",-0.329280941581153,42.3246890079977,0,1,1.51904565227881,NA,1,0
-"769",0.373460983597143,53.903660282301,0,0,-1.16116797368342,NA,0,1
-"770",1.02988813093981,65.9325691225055,0,1,-1.60993793483563,NA,3,1
-"771",2.70577504719934,72.0338721999315,0,1,0.0862946021244864,NA,4,0
-"772",-1.03494587124924,62.0825682693907,0,1,0.0107823868250337,NA,4,1
-"773",-0.183429858640727,68.3913935853181,0,0,-1.41869621389183,NA,0,0
-"774",1.07777130745133,69.4304143399232,0,1,1.48418828060195e-05,NA,3,0
-"775",0.341620225298793,70.280931190928,0,1,-0.279330391091714,NA,2,0
-"776",-0.187343624118674,67.4756384421526,1,0,0.173484296127729,NA,3,0
-"777",-1.30285910852947,58.0857293471377,1,1,-1.0706374396176,NA,4,0
-"778",-0.278658261975666,70.2469016466156,0,1,-0.583210546544618,NA,2,1
-"779",-0.184753908458266,54.9577802523955,0,0,-0.678069216586378,NA,4,1
-"780",-0.0817774971735639,63.4854885945376,0,0,1.14598833725543,NA,5,0
-"781",1.61224179372707,54.9062612481006,1,0,-0.51436710134668,NA,3,1
-"782",-0.461939839504668,64.7782445730895,0,1,-0.370051597234955,NA,4,1
-"783",-1.57605030081864,65.0098539560201,1,1,-1.83842242580121,NA,2,0
-"784",0.496433073531127,58.944194827226,1,1,0.0434175847865763,NA,0,0
-"785",-0.111953783818969,60.2730873890106,0,1,-0.623635815887942,NA,3,0
-"786",-0.204346400211973,64.3371580457113,0,1,0.883466273862089,NA,3,0
-"787",0.635568162856401,48.6893551555668,1,0,0.994831062739734,NA,2,1
-"788",-2.90667395407155,70.2068690052451,1,1,-0.381437329858756,NA,2,1
-"789",-0.553658252760955,62.6927428999677,0,1,-1.49760050303526,NA,2,1
-"790",-0.153451828250923,65.485486413912,1,0,-0.354756523414952,NA,3,0
-"791",0.60529051576044,58.3057883764132,0,1,-0.00319690430103726,NA,3,1
-"792",-0.261006596184361,41.2155975004951,1,0,-0.932423892622701,NA,4,1
-"793",-1.00513846241125,67.4830719512744,1,1,-0.669411139308381,NA,5,1
-"794",0.372566114312583,57.3989985999219,0,1,0.467315695223277,NA,0,0
-"795",0.0194461765201242,66.1826065906371,1,1,1.44357713657811,NA,4,0
-"796",0.110528815395276,80.4793696163662,1,0,-1.26792012422636,NA,0,1
-"797",1.15657043692358,48.4437516202214,1,0,-0.55280402285121,NA,4,0
-"798",-0.542220460211763,73.5294140674424,0,0,-2.25236504201458,NA,3,0
-"799",-1.24920128341908,56.3247801839008,1,1,0.789145208708141,NA,3,0
-"800",-1.28001259675319,59.4168953671932,0,0,0.630803743129537,NA,1,0
-"801",-1.02393278779038,54.6778190756221,0,1,0.915461403145339,0.915461403145339,3,0
-"802",-1.38765057998806,55.0149952264239,0,0,0.0217775991973722,0.0217775991973722,2,0
-"803",-0.0492211144061267,48.4986042242414,1,1,-0.538692377632498,-0.538692377632498,4,1
-"804",1.81096073048314,61.2624092272811,0,0,-2.03288160541304,-2.03288160541304,3,1
-"805",-0.0995100657082667,64.7286389917113,0,0,0.635409682093095,0.635409682093095,3,0
-"806",0.777237124297383,58.1347478479356,1,0,1.21794626140681,1.21794626140681,0,1
-"807",-1.10259632518001,43.8071355004172,0,1,0.63796618526594,0.63796618526594,2,0
-"808",-0.221644782701794,55.9724095378059,0,0,-0.634898013775829,-0.634898013775829,0,0
-"809",0.5660949625959,64.1281224180517,0,1,-0.763522359047601,-0.763522359047601,4,1
-"810",-0.354959345255063,52.7928059593948,0,1,-0.101990719944618,-0.101990719944618,5,1
-"811",0.785262601468044,54.0993476958089,1,1,-0.065395507533636,-0.065395507533636,4,0
-"812",0.694047378599404,48.3037338213833,0,1,0.421495213312655,0.421495213312655,0,1
-"813",-0.632729895584955,65.9145664607184,0,0,1.24369083450269,1.24369083450269,2,0
-"814",-0.771958641961308,53.3136105099826,1,0,1.11412556472886,1.11412556472886,2,0
-"815",2.3578173876011,91.6793764721645,1,1,0.352007640693812,0.352007640693812,2,1
-"816",-0.19254933635981,70.1303941735815,0,0,-1.09417696530104,-1.09417696530104,3,1
-"817",0.00661933421419712,58.3401268034862,1,0,-0.852122202550184,-0.852122202550184,1,0
-"818",-0.0545460246273611,60.0241647226761,1,0,-1.17664670710138,-1.17664670710138,1,1
-"819",-0.568206880540376,52.8269801682559,0,0,1.05151524836991,1.05151524836991,0,0
-"820",-0.926975943696645,62.8133802830802,1,1,-1.64170068936338,-1.64170068936338,2,1
-"821",-1.1357351813807,60.0728189711361,1,1,-0.999322055614457,-0.999322055614457,2,0
-"822",0.913962339054452,42.7029977359167,1,0,0.197181052483672,0.197181052483672,1,1
-"823",-0.986062833322873,41.565263835908,1,1,0.858572747161643,0.858572747161643,4,1
-"824",0.66748541348963,66.0120303141026,0,1,0.379268814639385,0.379268814639385,5,0
-"825",1.40659548970662,36.8206242918338,0,0,0.691272621242877,0.691272621242877,5,0
-"826",1.15777896011788,56.1968792628043,0,0,-0.332866679460152,-0.332866679460152,3,1
-"827",-0.138753197573533,76.5516490019156,1,1,-0.702466854631497,-0.702466854631497,4,1
-"828",-0.539321850960138,43.3714495098447,0,1,-1.71827188864524,-1.71827188864524,4,1
-"829",-1.07889349906152,62.9472966578159,1,0,-0.0533594167322972,-0.0533594167322972,4,0
-"830",-0.0185621399511091,75.6990143919341,0,1,1.33033001889087,1.33033001889087,5,0
-"831",-0.232615685678704,65.1194689676205,0,0,0.184433224194206,0.184433224194206,1,0
-"832",0.15629354251416,72.8590240106394,0,0,1.40437355070227,1.40437355070227,2,0
-"833",-0.604009754754033,50.3248524644029,1,0,0.439859671988793,0.439859671988793,0,0
-"834",0.629035120121665,66.3238935450673,0,1,0.215679969692286,0.215679969692286,5,0
-"835",0.714169570852022,61.8307866764632,0,1,-0.839563940423475,-0.839563940423475,2,1
-"836",1.07588610128876,73.5011611692373,1,0,0.173038200664969,0.173038200664969,2,1
-"837",2.24975551737932,84.8732767144425,1,0,-0.985303072019256,-0.985303072019256,3,1
-"838",0.197890598879292,61.1466169708147,1,0,1.10488004265206,1.10488004265206,2,0
-"839",-0.575546318478704,70.9574036400398,1,1,-1.00691086515951,-1.00691086515951,2,1
-"840",-0.0773208650045946,55.9729012880704,0,1,-0.605765264093382,-0.605765264093382,2,1
-"841",-1.62791988910102,56.4093028564553,1,1,1.23631924513615,1.23631924513615,4,0
-"842",0.129923153969037,54.4721177008355,1,1,-0.667938284296919,-0.667938284296919,2,1
-"843",-0.425673133260061,46.4532501972434,0,1,1.11904774776545,1.11904774776545,3,0
-"844",0.406011748521125,60.7333886206226,1,0,-0.172943387963443,-0.172943387963443,1,0
-"845",-0.268810185638072,59.6101147000787,1,0,-0.374972337078608,-0.374972337078608,1,0
-"846",-1.38098453629771,67.2186988398137,0,1,-0.362724158095648,-0.362724158095648,5,0
-"847",0.0365833213445593,57.8508104219382,0,1,0.879702337381518,0.879702337381518,5,1
-"848",-1.35324453117639,50.8441506622652,0,0,-1.50123695347916,-1.50123695347916,3,1
-"849",0.897006193942258,58.7156145919783,1,0,-1.49010928703793,-1.49010928703793,1,0
-"850",-1.82935971226393,78.4021048782139,1,1,0.527434864974974,0.527434864974974,4,1
-"851",-0.141876944171991,64.5053125599064,0,0,1.25170413109103,1.25170413109103,1,1
-"852",0.457981777842779,62.7479367766071,0,1,2.08247273578487,2.08247273578487,5,1
-"853",-1.51961751308999,70.5637641799707,0,1,0.825769355822583,0.825769355822583,3,1
-"854",1.39905531466027,77.9205427905425,1,1,0.831442747299394,0.831442747299394,2,0
-"855",-0.885339190903023,77.1081921609865,0,1,1.12265888209484,1.12265888209484,3,0
-"856",-0.506330291221321,55.6857106606254,0,0,-0.70525925520244,-0.70525925520244,2,0
-"857",0.163332105966331,59.2072313561489,0,1,-0.171084647027825,-0.171084647027825,0,1
-"858",-0.337143856258392,56.3677495549941,1,1,0.134300960325259,0.134300960325259,3,0
-"859",-1.03595153200983,57.990095269631,1,0,-0.0540683683497902,-0.0540683683497902,2,0
-"860",0.409337928216715,64.5128192983634,1,0,1.50517157529026,1.50517157529026,1,1
-"861",-0.524223458216886,55.2005801519779,0,1,1.22713757188919,1.22713757188919,0,1
-"862",0.506500373524185,48.344748198733,1,1,1.47929760840955,1.47929760840955,1,1
-"863",-0.504722862774708,57.6105602870942,0,1,1.20631205327699,1.20631205327699,3,1
-"864",-0.957250328953504,74.4983465190903,1,1,1.1083535507302,1.1083535507302,2,1
-"865",-0.0946343584532563,78.5287387225315,0,0,-1.49938280598482,-1.49938280598482,2,0
-"866",0.649439317610584,55.9676849553722,1,0,0.60989975618154,0.60989975618154,5,0
-"867",0.242321799327066,68.2696831595617,1,1,-1.82115152407692,-1.82115152407692,1,0
-"868",-0.0589340902617142,58.7086509758736,0,0,0.077820357983384,0.077820357983384,1,1
-"869",-1.94779407157618,58.7847311320163,0,0,0.478241320191152,0.478241320191152,1,0
-"870",1.4514436318915,53.2572671519954,1,1,-0.740192918234158,-0.740192918234158,3,0
-"871",-0.0399396776377224,44.2156620120559,1,0,1.44447797881903,1.44447797881903,2,1
-"872",-0.863997691069911,78.0207639644711,0,1,0.323853778514233,0.323853778514233,1,1
-"873",-0.201690239712518,68.0111681386451,0,0,-0.356084214835065,-0.356084214835065,1,0
-"874",0.261255562727432,42.0773527249183,1,0,0.552137107246651,0.552137107246651,3,1
-"875",-1.51835138330586,56.5258127372455,1,1,0.449175119547768,0.449175119547768,5,0
-"876",0.717308632275803,75.5431592932965,0,0,1.23074133402351,1.23074133402351,1,0
-"877",0.246832728096417,61.064927931859,1,0,0.965154616132545,0.965154616132545,4,0
-"878",1.20276704630509,61.9250892067048,0,0,1.56168460211253,1.56168460211253,0,1
-"879",0.986039270447351,68.4922796714094,1,0,-0.316111942044904,-0.316111942044904,5,1
-"880",-0.537940599062239,69.9051461175328,1,0,-0.917320660872748,-0.917320660872748,4,0
-"881",-0.490282741813956,66.275822116448,1,0,1.55722150429562,1.55722150429562,3,0
-"882",-0.995516172562447,64.1521548555604,0,0,-0.399468222498247,-0.399468222498247,2,0
-"883",-0.361702928254615,74.7493906483472,0,1,0.442155462945799,0.442155462945799,4,1
-"884",0.944354811296923,77.0947207391152,0,0,0.781209235101923,0.781209235101923,1,1
-"885",1.79220701161545,62.2835960194932,0,1,-1.47966001438128,-1.47966001438128,5,0
-"886",0.928409395864299,57.9188311793936,0,1,0.514096507221489,0.514096507221489,3,0
-"887",-1.82993766963068,60.1214575527235,1,0,-0.824996216671844,-0.824996216671844,0,1
-"888",0.821878106163116,63.4972266325496,0,0,-0.865937769635399,-0.865937769635399,4,1
-"889",1.0362687620034,53.2235535759013,0,1,2.58007784938081,2.58007784938081,0,0
-"890",0.617420144129801,66.4218267919739,1,0,-1.60328067887141,-1.60328067887141,4,1
-"891",-0.854930363118786,47.1056196473064,1,0,-0.932155695231725,-0.932155695231725,1,1
-"892",1.06451051848471,82.2606725497448,0,1,-1.63416463348034,-1.63416463348034,5,0
-"893",0.729481458103667,35.1658064102412,0,0,1.14526199164209,1.14526199164209,4,1
-"894",0.66265658435809,61.7283427870576,1,0,-0.873270479383926,-0.873270479383926,3,1
-"895",-0.381407202704243,59.4109585234552,0,0,-0.157622793650065,-0.157622793650065,0,1
-"896",0.845891777793581,65.4745229730498,0,0,0.511908207706561,0.511908207706561,2,0
-"897",0.111591884445813,55.8952639789569,0,0,1.18477431013477,1.18477431013477,5,1
-"898",-0.0677702616560359,84.024051603136,1,0,1.66262494013238,1.66262494013238,1,1
-"899",1.48709809651828,58.5577906055765,0,1,-1.44003666766935,-1.44003666766935,5,1
-"900",0.75722337664623,60.2339644283464,0,1,0.0718381042586017,0.0718381042586017,3,0
-"901",-2.87404249339871,71.2412194136023,1,0,1.60026127461792,1.60026127461792,5,1
-"902",-0.426542449077482,66.9777449047275,0,1,1.09024282511803,1.09024282511803,4,1
-"903",-1.48740629563197,61.8090747869435,1,1,0.529549933033616,0.529549933033616,0,0
-"904",0.588791645710936,58.3039716959421,1,0,0.439869052170178,0.439869052170178,2,0
-"905",-0.44259951077578,57.6547195264732,1,1,2.19439783890363,2.19439783890363,1,1
-"906",0.901304802320198,46.4506930716295,0,0,1.21259811758189,1.21259811758189,2,0
-"907",0.467828340367688,66.2995607852048,0,0,-0.583330212349268,-0.583330212349268,4,0
-"908",0.0220403186670302,64.4727015251654,0,0,-0.122696798736509,-0.122696798736509,3,0
-"909",-0.0851078317956122,49.5595814154325,1,1,-1.11572822879677,-1.11572822879677,2,0
-"910",-0.361267142653458,58.1271442537623,1,1,-0.245228769113442,-0.245228769113442,1,0
-"911",0.199506501537268,72.8729087029212,0,0,-0.33479722149108,-0.33479722149108,4,0
-"912",-1.1082542547003,72.7409281326188,0,1,-1.28266300567136,-1.28266300567136,4,0
-"913",0.672794070486354,47.7883864180764,1,1,0.796043792076802,0.796043792076802,1,1
-"914",-0.39411363337364,68.0202190654962,0,1,0.823456399610159,0.823456399610159,4,1
-"915",-0.743967585584992,61.2683070070261,0,1,1.78880270166576,1.78880270166576,3,0
-"916",-0.32611347678341,63.8851485718354,0,1,0.975574368447363,0.975574368447363,0,0
-"917",1.87466882681447,51.2780382678861,1,0,0.412025202686387,0.412025202686387,3,0
-"918",-1.75799914534674,57.9291300999102,1,1,1.97493733334156,1.97493733334156,4,0
-"919",-0.348860909964869,39.4158731102794,0,1,-0.951142928618088,-0.951142928618088,3,0
-"920",0.106589229118385,52.608766828855,1,0,0.567952353137801,0.567952353137801,2,0
-"921",-0.537658781205147,56.9337787562458,0,0,-0.441991599466074,-0.441991599466074,4,0
-"922",-0.411269131911835,73.0341763353321,1,0,-1.76203008968521,-1.76203008968521,1,1
-"923",0.382348337976121,49.4134014494489,1,1,-1.7618261210662,-1.7618261210662,1,1
-"924",0.222514088542642,63.0172018583522,0,0,-0.199306385745731,-0.199306385745731,1,1
-"925",-0.673754508319701,60.8997318683277,1,1,0.474692677271762,0.474692677271762,3,1
-"926",0.567432913870525,46.3660774942888,1,0,0.359995385588833,0.359995385588833,1,1
-"927",0.17948365476,62.825238713036,1,1,-0.132368527220886,-0.132368527220886,1,0
-"928",0.686341749488027,75.8069945043382,1,1,0.12107844105611,0.12107844105611,2,0
-"929",-1.32726366148341,69.4004024632186,1,1,-0.0381938134769495,-0.0381938134769495,2,1
-"930",-0.767916438222332,71.3143023201082,0,0,-0.872905972754614,-0.872905972754614,0,0
-"931",-0.192702508672789,57.8630409930615,0,1,-0.797107844441803,-0.797107844441803,0,0
-"932",0.95905119808221,69.5783890475,0,1,0.323471025211062,0.323471025211062,2,1
-"933",-0.529031361979442,36.2919037046433,0,1,1.65605831529332,1.65605831529332,1,1
-"934",-1.03288699095086,47.4674139064516,0,0,0.0637501511077446,0.0637501511077446,2,1
-"935",0.771745490447405,58.2515204836704,1,0,2.25298948645765,2.25298948645765,3,1
-"936",0.465356664198533,61.0851159290109,0,1,-0.9438673491284,-0.9438673491284,0,0
-"937",-1.22567106691397,65.1692045431265,1,0,0.343178024320818,0.343178024320818,4,1
-"938",2.04667724022033,67.2163438594481,0,0,0.18411953937326,0.18411953937326,2,1
-"939",1.49867444889729,55.400561672237,1,1,-0.239758700034669,-0.239758700034669,2,1
-"940",-1.38894908467684,55.1838845348883,1,0,0.851805632237612,0.851805632237612,0,1
-"941",2.15337646372281,56.5072467774652,0,0,-1.26896125131166,-1.26896125131166,3,0
-"942",-1.72911674054936,54.3823402815629,1,1,-0.148853941632772,-0.148853941632772,4,1
-"943",0.102042485094495,58.7529730160193,1,0,0.023438855961796,0.023438855961796,4,0
-"944",-1.14127912749709,68.7755232786064,0,0,0.860632635277945,0.860632635277945,3,0
-"945",-0.377009088203781,64.6334691360602,1,1,-0.799273980270713,-0.799273980270713,0,1
-"946",-0.348782098762047,55.6063466367055,0,0,-2.83653272169744,-2.83653272169744,5,0
-"947",0.322885081228563,53.075827075032,1,0,1.0722458639509,1.0722458639509,2,0
-"948",-1.12433961794016,58.9554720640944,0,0,0.974138633268488,0.974138633268488,4,1
-"949",-2.75704972232067,57.9200008743287,1,1,1.05385557723761,1.05385557723761,2,1
-"950",-0.0958110991197285,72.1930841222822,1,1,-0.209404913847133,-0.209404913847133,1,1
-"951",-0.746699804956649,37.2956430098056,1,1,-2.53391876785558,-2.53391876785558,4,0
-"952",0.210781418005385,50.2654733095165,0,0,0.324381377840888,0.324381377840888,3,1
-"953",-0.848509808508819,66.2119025904558,0,1,1.52923093464334,1.52923093464334,2,1
-"954",0.792264765704344,66.6256167229789,1,0,-0.49502553937255,-0.49502553937255,3,1
-"955",0.0291874325756546,53.4900975127976,0,1,-0.74992055147891,-0.74992055147891,1,1
-"956",0.603388012483792,57.9126293144572,1,1,-0.0381006103542124,-0.0381006103542124,5,1
-"957",-0.994940104813448,77.9691164714174,0,1,0.153699179840078,0.153699179840078,0,0
-"958",-0.211332458143761,49.0122158510559,0,0,-0.279685204486181,-0.279685204486181,1,1
-"959",0.344021087496185,77.7774209175325,0,1,0.738721035004576,0.738721035004576,1,1
-"960",-0.0893785289835366,63.1694440814458,1,0,1.24054859264869,1.24054859264869,3,0
-"961",1.6540374665165,70.4957642163157,1,1,-0.779622052059768,-0.779622052059768,4,1
-"962",-1.96290871320897,54.7371921499326,1,0,0.265049589448697,0.265049589448697,4,0
-"963",0.756125511499489,48.797874636976,1,0,0.0591445263956009,0.0591445263956009,5,1
-"964",0.0144186579430547,63.9825949406673,1,1,-1.13984338266092,-1.13984338266092,3,1
-"965",0.945713804060792,64.2130680300369,0,1,1.40524178999936,1.40524178999936,5,0
-"966",-0.098808492386284,55.7484635094942,0,1,0.411013833171653,0.411013833171653,4,1
-"967",-2.73975424338714,51.230089940189,1,0,1.13299332536587,1.13299332536587,1,1
-"968",0.420305849423153,72.9999660430953,1,1,-0.543361565575453,-0.543361565575453,4,0
-"969",-0.218379262260984,46.63213243505,1,1,-0.212711776294238,-0.212711776294238,3,1
-"970",-0.989355329598254,74.2924242973265,1,1,-0.238925993307699,-0.238925993307699,2,0
-"971",-0.656468896184539,54.0530687991355,0,0,0.588847108405563,0.588847108405563,4,1
-"972",0.055663548201307,41.1731354217979,0,0,0.238272526252067,0.238272526252067,1,1
-"973",2.45206344477497,81.80311231724,1,0,1.36858573287737,1.36858573287737,2,0
-"974",0.18817357373771,63.7877727864363,1,1,-0.32345239834268,-0.32345239834268,4,1
-"975",-0.979696368377701,49.3068717129501,0,1,-0.205802607928562,-0.205802607928562,1,1
-"976",1.90361046750456,56.2485501837805,0,1,-0.790108072379854,-0.790108072379854,3,1
-"977",-0.438519782195613,52.0520228153762,1,1,-0.300860202232585,-0.300860202232585,2,0
-"978",-0.829726095585463,63.1512286871127,1,0,-0.398807652887792,-0.398807652887792,3,0
-"979",-1.247231318762,61.3393938568366,0,0,1.8097043061288,1.8097043061288,1,0
-"980",-0.673809163087791,47.5944842442115,1,0,-1.11568094665946,-1.11568094665946,4,0
-"981",1.93516726396696,62.56480536921,1,0,1.03671143043759,1.03671143043759,0,1
-"982",0.906561434864075,53.2592979325425,0,0,1.17357458312996,1.17357458312996,3,1
-"983",0.736526771427079,66.3322735378512,1,0,-0.867450686176965,-0.867450686176965,1,1
-"984",0.146625304045126,66.0236487324394,1,0,-0.736363718750934,-0.736363718750934,3,1
-"985",0.283120796739,59.6596631347373,1,0,-0.368193908693371,-0.368193908693371,5,0
-"986",0.589276720770128,52.1029363945098,1,1,0.628323960404366,0.628323960404366,2,1
-"987",0.215849476577042,55.2956444578579,0,1,-0.417201443455236,-0.417201443455236,4,1
-"988",0.155053234874317,49.871818623723,1,0,-1.0960078796469,-1.0960078796469,1,1
-"989",0.953826636303852,68.3355327007342,0,0,1.1488297620837,1.1488297620837,3,0
-"990",-0.165852702479289,73.5402356223174,0,1,0.110196214486804,0.110196214486804,0,0
-"991",0.25500374557096,60.5007271198645,0,0,-1.17523493852895,-1.17523493852895,2,1
-"992",-0.897472359354659,63.3688075564445,0,1,0.838088347089023,0.838088347089023,2,1
-"993",1.58397010470279,57.9100112668061,1,0,-0.171711241613191,-0.171711241613191,2,0
-"994",1.39275862816597,62.1845998789684,1,1,0.145241397802456,0.145241397802456,2,0
-"995",-1.81225823172892,78.7347520792372,1,1,-0.480092901716805,-0.480092901716805,2,0
-"996",-0.216319678589458,62.617218049717,0,0,0.870525638997579,0.870525638997579,5,0
-"997",0.549856056710878,73.3469451935642,1,0,-0.730408660241045,-0.730408660241045,3,1
-"998",0.482736237517578,62.477466461451,0,0,-0.209510059658523,-0.209510059658523,3,0
-"999",0.760848728950074,46.2421513881639,1,1,1.06559159302466,1.06559159302466,5,0
-"1000",-0.445658016934015,48.0946021824269,1,1,1.47733324836137,1.47733324836137,3,0
diff --git a/variable-info/update-outcome-info/toTabs.sh b/variable-info/update-outcome-info/toTabs.sh
old mode 100644
new mode 100755

From d4a99d3e5a36d5fc76cd3302a25f1a167f0e0fa7 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 10:29:50 -0400
Subject: [PATCH 02/27] Code refactor: make an R package first

---
 .Rhistory                                     |   2 +
 .gitignore                                    |   1 +
 PHESANT/.Rbuildignore                         |   2 +
 PHESANT/.Rhistory                             |   0
 PHESANT/DESCRIPTION                           |   9 ++
 PHESANT/NAMESPACE                             |   1 +
 PHESANT.Rproj => PHESANT/PHESANT.Rproj        |   4 +
 PHESANT/R/PHESANT-internal.R                  | 127 ++++++++++++++++++
 {WAS => PHESANT/R}/addToCounts.r              |   0
 {WAS => PHESANT/R}/binaryLogisticRegression.r |   0
 {WAS => PHESANT/R}/equalSizedBins.r           |   0
 {WAS => PHESANT/R}/fixOddFieldsToCatMul.r     |   0
 {WAS => PHESANT/R}/getIsCatMultExposure.r     |   0
 {WAS => PHESANT/R}/getIsExposure.r            |   0
 .../R}/getNumValuesCatMultExposure.r          |   0
 {WAS => PHESANT/R}/incrementCounter.r         |   0
 {WAS => PHESANT/R}/initFunctions.r            |  33 -----
 {WAS => PHESANT/R}/loadConfounders.r          |   0
 {WAS => PHESANT/R}/loadData.r                 |   0
 {WAS => PHESANT/R}/loadIndicatorFields.r      |   0
 {WAS => PHESANT/R}/loadPhenotypes.r           |   0
 {WAS => PHESANT/R}/loadTraitOfInterest.r      |   0
 {WAS => PHESANT/R}/makeTestDataFrame.r        |   0
 PHESANT/R/option_list.R                       |  20 +++
 {WAS => PHESANT/R}/processArgs.r              |   0
 {WAS => PHESANT/R}/reassignValue.r            |   0
 {WAS => PHESANT/R}/replaceMissingCodes.r      |   0
 {WAS => PHESANT/R}/replaceNaN.r               |   0
 {WAS => PHESANT/R}/saveCounts.r               |   0
 {WAS => PHESANT/R}/storeNewVar.r              |   0
 {WAS => PHESANT/R}/testAssociations.r         |   0
 {WAS => PHESANT/R}/testCatMultiple.r          |   0
 {WAS => PHESANT/R}/testCatSingle.r            |   0
 {WAS => PHESANT/R}/testCategoricalOrdered.r   |   0
 {WAS => PHESANT/R}/testCategoricalUnordered.r |   0
 {WAS => PHESANT/R}/testContinuous.r           |   0
 {WAS => PHESANT/R}/testInteger.r              |   0
 {WAS => PHESANT/R}/testNumExamples.r          |   0
 {WAS => PHESANT/R}/unittests/run-tests.sh     |   0
 .../R}/unittests/test_equalSizedBins.r        |   0
 .../R}/unittests/test_reassignValue.r         |   0
 .../R}/unittests/test_testCatMultiple.r       |   0
 .../R}/unittests/test_testCatSingle.r         |   0
 {WAS => PHESANT/R}/validatePhenotypeInput.r   |   0
 {WAS => PHESANT/R}/validateTraitInput.r       |   0
 PHESANT/man/PHESANT-package.Rd                |  34 +++++
 PHESANT/opt_only.RData                        | Bin 0 -> 330 bytes
 {WAS => PHESANT}/phenomeScan.r                |  38 ++----
 PHESANT/test.sh                               |   3 +
 opt.RData                                     | Bin 0 -> 4811 bytes
 50 files changed, 214 insertions(+), 60 deletions(-)
 create mode 100644 .Rhistory
 create mode 100644 PHESANT/.Rbuildignore
 create mode 100644 PHESANT/.Rhistory
 create mode 100644 PHESANT/DESCRIPTION
 create mode 100644 PHESANT/NAMESPACE
 rename PHESANT.Rproj => PHESANT/PHESANT.Rproj (67%)
 create mode 100644 PHESANT/R/PHESANT-internal.R
 rename {WAS => PHESANT/R}/addToCounts.r (100%)
 rename {WAS => PHESANT/R}/binaryLogisticRegression.r (100%)
 rename {WAS => PHESANT/R}/equalSizedBins.r (100%)
 rename {WAS => PHESANT/R}/fixOddFieldsToCatMul.r (100%)
 rename {WAS => PHESANT/R}/getIsCatMultExposure.r (100%)
 rename {WAS => PHESANT/R}/getIsExposure.r (100%)
 rename {WAS => PHESANT/R}/getNumValuesCatMultExposure.r (100%)
 rename {WAS => PHESANT/R}/incrementCounter.r (100%)
 rename {WAS => PHESANT/R}/initFunctions.r (76%)
 rename {WAS => PHESANT/R}/loadConfounders.r (100%)
 rename {WAS => PHESANT/R}/loadData.r (100%)
 rename {WAS => PHESANT/R}/loadIndicatorFields.r (100%)
 rename {WAS => PHESANT/R}/loadPhenotypes.r (100%)
 rename {WAS => PHESANT/R}/loadTraitOfInterest.r (100%)
 rename {WAS => PHESANT/R}/makeTestDataFrame.r (100%)
 create mode 100644 PHESANT/R/option_list.R
 rename {WAS => PHESANT/R}/processArgs.r (100%)
 rename {WAS => PHESANT/R}/reassignValue.r (100%)
 rename {WAS => PHESANT/R}/replaceMissingCodes.r (100%)
 rename {WAS => PHESANT/R}/replaceNaN.r (100%)
 rename {WAS => PHESANT/R}/saveCounts.r (100%)
 rename {WAS => PHESANT/R}/storeNewVar.r (100%)
 rename {WAS => PHESANT/R}/testAssociations.r (100%)
 rename {WAS => PHESANT/R}/testCatMultiple.r (100%)
 rename {WAS => PHESANT/R}/testCatSingle.r (100%)
 rename {WAS => PHESANT/R}/testCategoricalOrdered.r (100%)
 rename {WAS => PHESANT/R}/testCategoricalUnordered.r (100%)
 rename {WAS => PHESANT/R}/testContinuous.r (100%)
 rename {WAS => PHESANT/R}/testInteger.r (100%)
 rename {WAS => PHESANT/R}/testNumExamples.r (100%)
 rename {WAS => PHESANT/R}/unittests/run-tests.sh (100%)
 rename {WAS => PHESANT/R}/unittests/test_equalSizedBins.r (100%)
 rename {WAS => PHESANT/R}/unittests/test_reassignValue.r (100%)
 rename {WAS => PHESANT/R}/unittests/test_testCatMultiple.r (100%)
 rename {WAS => PHESANT/R}/unittests/test_testCatSingle.r (100%)
 rename {WAS => PHESANT/R}/validatePhenotypeInput.r (100%)
 rename {WAS => PHESANT/R}/validateTraitInput.r (100%)
 create mode 100644 PHESANT/man/PHESANT-package.Rd
 create mode 100644 PHESANT/opt_only.RData
 rename {WAS => PHESANT}/phenomeScan.r (62%)
 create mode 100755 PHESANT/test.sh
 create mode 100644 opt.RData

diff --git a/.Rhistory b/.Rhistory
new file mode 100644
index 0000000..591e1ef
--- /dev/null
+++ b/.Rhistory
@@ -0,0 +1,2 @@
+package.skeleton(name = "PHESANT")
+package.skeleton(name = "PHESANT")
diff --git a/.gitignore b/.gitignore
index 9bea433..85c0afc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 
 .DS_Store
+.Rproj.user
diff --git a/PHESANT/.Rbuildignore b/PHESANT/.Rbuildignore
new file mode 100644
index 0000000..91114bf
--- /dev/null
+++ b/PHESANT/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/PHESANT/.Rhistory b/PHESANT/.Rhistory
new file mode 100644
index 0000000..e69de29
diff --git a/PHESANT/DESCRIPTION b/PHESANT/DESCRIPTION
new file mode 100644
index 0000000..4769eb6
--- /dev/null
+++ b/PHESANT/DESCRIPTION
@@ -0,0 +1,9 @@
+Package: PHESANT
+Type: Package
+Title: What the package does (short line)
+Version: 1.0
+Date: 2019-03-14
+Author: Who wrote it
+Maintainer: Who to complain to <yourfault@somewhere.net>
+Description: More about what it does (maybe more than one line)
+License: What license is it under?
diff --git a/PHESANT/NAMESPACE b/PHESANT/NAMESPACE
new file mode 100644
index 0000000..d75f824
--- /dev/null
+++ b/PHESANT/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/PHESANT.Rproj b/PHESANT/PHESANT.Rproj
similarity index 67%
rename from PHESANT.Rproj
rename to PHESANT/PHESANT.Rproj
index 8e3c2eb..21a4da0 100644
--- a/PHESANT.Rproj
+++ b/PHESANT/PHESANT.Rproj
@@ -11,3 +11,7 @@ Encoding: UTF-8
 
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/PHESANT/R/PHESANT-internal.R b/PHESANT/R/PHESANT-internal.R
new file mode 100644
index 0000000..3b1d509
--- /dev/null
+++ b/PHESANT/R/PHESANT-internal.R
@@ -0,0 +1,127 @@
+.Random.seed <-
+c(403L, 10L, -2055377151L, -1261003152L, -653149682L, 1435497081L, 
+1855348235L, -576966854L, 341399100L, 778157439L, -1613771051L, 
+1531682540L, 412921922L, 1086953373L, 325854743L, -1838982274L, 
+-1413973704L, -1064806885L, 323434361L, 1468101064L, -205967882L, 
+-631271375L, -660989341L, -707934670L, 2008175492L, -1116261945L, 
+385783965L, -399175660L, -563583302L, 625440517L, 1609831647L, 
+1445418342L, -1909055248L, 1313776659L, 1749100977L, -1356585888L, 
+1455157758L, 1388664201L, 2074088123L, -1846870390L, -1243661972L, 
+343572207L, 1950927141L, 1541089244L, -359217742L, -1348217683L, 
+413594727L, 726502286L, 1026280552L, -622679957L, 1120416265L, 
+1076264088L, -2134790202L, 2005156641L, 902238131L, -1253660894L, 
+770078996L, 1084119447L, -1439872627L, 590254244L, -1844983158L, 
+-1204420363L, -1441798321L, 1447714902L, 1974291200L, -2095841725L, 
+-585513503L, -1731227312L, 937445294L, 934640985L, -1011479701L, 
+913148634L, 1909599132L, 1373632415L, 178208245L, -821178228L, 
+-2087284382L, -1997959875L, 1589640631L, 764332638L, 1962285976L, 
+261272315L, -1048315239L, -201784536L, -1601408554L, -2012176815L, 
+668146947L, -1514712942L, 30025252L, -873383705L, 764988733L, 
+-731944524L, 1454405146L, 904033637L, -33915905L, -515351930L, 
+2009025040L, 617304243L, 158108497L, -1127143680L, -258595362L, 
+-1664987607L, 1106011611L, 724134634L, -2001639284L, 262526927L, 
+499109765L, 1971379260L, 1522683154L, -712324595L, -9821881L, 
+-1493900498L, 744160904L, -1168410741L, 426661737L, 1753729336L, 
+-490466330L, 555131777L, 1754946323L, -813456190L, -416563340L, 
+1511400311L, 1291424877L, 2086259460L, 1989440042L, 735829909L, 
+-250657745L, -759055050L, -2138913440L, 601457571L, 2098922689L, 
+1814889648L, -126393394L, 2014160697L, 1337096011L, 2010817786L, 
+94510588L, -753603009L, -1139018987L, 612225068L, 1440518402L, 
+1251727837L, -859002665L, 1183175614L, -1121028616L, -388640293L, 
+850886713L, -2082842232L, -286673610L, -1376702607L, 289504675L, 
+1370276082L, 2064014404L, 1143236359L, 244238045L, -1750603820L, 
+2003793658L, 251030085L, -1643296097L, 813770790L, 1153809584L, 
+368946515L, -1173546895L, 2066941344L, 2125658430L, -1106106295L, 
+-1619944197L, 56489162L, -1429099988L, -702894545L, 2097114853L, 
+-1469324388L, -1305795086L, 1942503533L, -783513177L, 979614670L, 
+-464289496L, -2063539413L, -1102053047L, -571165096L, -1266678010L, 
+1462313697L, 866272883L, 863080162L, -2029530540L, -1653605545L, 
+-1657831603L, -1705729308L, -581874998L, -1662598347L, 317032847L, 
+-1302323562L, -710869440L, 2045748995L, 1157095201L, 1237002256L, 
+609251310L, 481136537L, -2050820053L, -149807846L, -1435701284L, 
+712831455L, 124114357L, 1680823116L, 408859042L, -1960140547L, 
+-134820361L, -212387042L, -1368816168L, 1844444987L, 1914468569L, 
+1771902056L, -1979266666L, -1699702511L, 427268803L, 1609642706L, 
+-2067868828L, 137279143L, -1601814531L, -127387148L, 59483866L, 
+1411659813L, 223425855L, -1177511226L, 1897576272L, 414222195L, 
+1107216785L, -426198080L, 904228254L, -675370654L, 1407568616L, 
+-1648012084L, -773699076L, 1481535986L, 1360236672L, 1007529460L, 
+-1342715256L, -1851472070L, -949733040L, 729675884L, -570554988L, 
+1833331794L, 2089554400L, -318619444L, 193276576L, 1429041538L, 
+-2126949144L, -306398484L, 1763143996L, -1277055886L, 1554374256L, 
+421975588L, 2032640056L, 211760410L, 1924362448L, -1268650692L, 
+-1928376428L, 1611904898L, -603361344L, -1687638468L, -1350393456L, 
+1079395362L, 248160520L, 1618027660L, -1988603108L, 665800850L, 
+1214202240L, -187286508L, -2088061144L, 545226298L, -1092967984L, 
+-1418993300L, 567848884L, 746534418L, -772762656L, 1200707020L, 
+1496597728L, 1776060450L, -568946904L, -433355252L, -639612100L, 
+368270322L, 706859760L, 1953071684L, -1698060712L, -1286869190L, 
+-834147344L, -415867204L, 1347345684L, -740441918L, -1348860832L, 
+284594556L, 1966673872L, -535664350L, 656744104L, 677991308L, 
+-464448324L, -598055118L, -1857457600L, 548719796L, -660904888L, 
+123257274L, 2067885584L, -2115319572L, 1984745812L, 381529874L, 
+-298453600L, 2077870796L, -1096423840L, -1618591806L, 1879972648L, 
+-1549703764L, 2022029628L, 206182258L, 1161427248L, 2028039972L, 
+1369963832L, 1577974490L, 1466738576L, 1352827644L, -1536618604L, 
+-1325287358L, 1242152192L, 574403644L, -1861933744L, 1895827618L, 
+1347786568L, -1926394100L, 604801756L, 1377291090L, -2066652544L, 
+-127154092L, -1934056472L, 1591219962L, -180620976L, 189311788L, 
+1286340468L, -37364846L, 1854013024L, 2103385996L, 1819648864L, 
+543637474L, -1091715160L, -1094826804L, -477037060L, 1789887410L, 
+255111664L, -654716476L, -1160838824L, 146863482L, -1985572432L, 
+894295996L, -496635820L, -159435838L, 685579296L, -314557252L, 
+-8424880L, 438237154L, -1256534424L, -1636173236L, -235171588L, 
+8093682L, -1930136320L, 695349108L, 1842151432L, -1619477062L, 
+-851016240L, 881351660L, -1716387948L, -115207214L, -1791137952L, 
+-33112756L, 324058400L, 157033346L, 208630504L, -594262804L, 
+1896161852L, 265798514L, -1382278544L, 1861693476L, -602374344L, 
+1108468890L, 484203216L, 484362428L, -709216620L, -1557066110L, 
+-1252221760L, 683766076L, -2027837296L, -1045503454L, -228385272L, 
+2107578636L, 1733003932L, -1349392878L, -558103168L, -496880108L, 
+1038675496L, 1056519482L, 2093201360L, -2010307988L, -1508511948L, 
+-377087342L, -1249011360L, -1160312500L, 1870274528L, -232228958L, 
+1435512744L, -1369185396L, -243099972L, 1927674738L, -2106238736L, 
+-554463164L, 1986329944L, -1455619014L, 1574239088L, -1919521732L, 
+914670356L, 445555138L, 2084602080L, -778729604L, -1345243440L, 
+199575202L, -1920988120L, 1905594636L, -253809732L, -1626538702L, 
+1548744640L, -677629388L, -2139472952L, 519623866L, -1041137776L, 
+1637101036L, 2136944724L, -1615245678L, 2128794400L, -59988532L, 
+-1540817952L, 1147640514L, -44053976L, -882973012L, 1048963772L, 
+698567154L, -1162995408L, -453493596L, -1281574216L, -413576358L, 
+846591376L, 1607352700L, -716217068L, 72880450L, 930200320L, 
+291130300L, -596200624L, 568531234L, 1941716296L, 23336972L, 
+1518573143L, 1694002593L, 1330335334L, -367935900L, -591315563L, 
+459321023L, -743172520L, -1153875338L, 1744498307L, -1595034939L, 
+-2103107614L, -546796976L, -202584199L, 390984875L, 2084477980L, 
+1812200266L, -1649591489L, -1192012407L, 1048710558L, 440534092L, 
+222078493L, 670313383L, -1613058352L, 1954390126L, -688183525L, 
+1469139069L, 1553757802L, -316926200L, -1165826127L, -1885802461L, 
+1929285924L, -477141550L, -2023655897L, 2073149553L, -958355786L, 
+-1588544940L, -616261915L, 311055087L, 1339211304L, -692618618L, 
+-890616653L, -1587672555L, 1473830002L, 1794930880L, -1751978711L, 
+-706341989L, -174078868L, 1009748090L, -577465937L, -1331312455L, 
+954243790L, -898918372L, -550806643L, 1301792247L, -1906050816L, 
+-471410850L, 243503147L, -964325587L, 2040009626L, 165040472L, 
+4277441L, -1684012173L, -2112323436L, 707749154L, 1246311479L, 
+1815489793L, -1161228282L, -106256828L, 953040757L, -1505234209L, 
+-1434948040L, -2082533866L, -1478655453L, 1141738981L, 620266498L, 
+181872240L, -1541364711L, -1821358581L, -1404373764L, -1721304790L, 
+430858079L, 403881705L, 62217726L, 1339273708L, -1570424515L, 
+1231956231L, -1980606224L, 458605710L, -1919336773L, 1922633373L, 
+-2114897334L, -53110680L, 313559121L, 954907459L, -1235980732L, 
+-683520782L, -1188665145L, 595888785L, 787797014L, 181722740L, 
+-550557435L, -172300465L, 1876414792L, -1850278682L, -642551277L, 
+-1898592907L, -657802926L, 1363939744L, 1281517705L, 1436188859L, 
+-1660478708L, 526215834L, -359997425L, -1166039975L, 401238254L, 
+-391058564L, -739133459L, 28807319L, 686529376L, -1191093442L, 
+-252455925L, -1116826483L, -1645730374L, -1123120136L, 1970423713L, 
+243630419L, 640639092L, -818695678L, -795186025L, -2076447135L, 
+1631671974L, -1814995292L, -831882411L, -1899210753L, -1279039848L, 
+185248694L, 455811139L, -1657661051L, -1742312926L, -1304084208L, 
+1057480889L, -1839299221L, 2022677596L, 1940418954L, -2044848769L, 
+1693505737L, 134421854L, 1079041548L, -855240227L, -1810362137L, 
+-98392816L, -1493717970L, -373067045L, 798565309L, -890258134L, 
+-1339358776L, 374128369L, -1103474589L, -1752112028L, -1493973102L, 
+746695783L, -1192483791L, -800576138L, 303344276L, 1196185765L, 
+-1719902801L, 1792706152L, 1735704134L, -396461069L, -1485459313L
+)
diff --git a/WAS/addToCounts.r b/PHESANT/R/addToCounts.r
similarity index 100%
rename from WAS/addToCounts.r
rename to PHESANT/R/addToCounts.r
diff --git a/WAS/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
similarity index 100%
rename from WAS/binaryLogisticRegression.r
rename to PHESANT/R/binaryLogisticRegression.r
diff --git a/WAS/equalSizedBins.r b/PHESANT/R/equalSizedBins.r
similarity index 100%
rename from WAS/equalSizedBins.r
rename to PHESANT/R/equalSizedBins.r
diff --git a/WAS/fixOddFieldsToCatMul.r b/PHESANT/R/fixOddFieldsToCatMul.r
similarity index 100%
rename from WAS/fixOddFieldsToCatMul.r
rename to PHESANT/R/fixOddFieldsToCatMul.r
diff --git a/WAS/getIsCatMultExposure.r b/PHESANT/R/getIsCatMultExposure.r
similarity index 100%
rename from WAS/getIsCatMultExposure.r
rename to PHESANT/R/getIsCatMultExposure.r
diff --git a/WAS/getIsExposure.r b/PHESANT/R/getIsExposure.r
similarity index 100%
rename from WAS/getIsExposure.r
rename to PHESANT/R/getIsExposure.r
diff --git a/WAS/getNumValuesCatMultExposure.r b/PHESANT/R/getNumValuesCatMultExposure.r
similarity index 100%
rename from WAS/getNumValuesCatMultExposure.r
rename to PHESANT/R/getNumValuesCatMultExposure.r
diff --git a/WAS/incrementCounter.r b/PHESANT/R/incrementCounter.r
similarity index 100%
rename from WAS/incrementCounter.r
rename to PHESANT/R/incrementCounter.r
diff --git a/WAS/initFunctions.r b/PHESANT/R/initFunctions.r
similarity index 76%
rename from WAS/initFunctions.r
rename to PHESANT/R/initFunctions.r
index e412c6e..03c0f23 100644
--- a/WAS/initFunctions.r
+++ b/PHESANT/R/initFunctions.r
@@ -16,39 +16,6 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-# load the required r files
-loadSource <- function() {
-	source("loadData.r")
-	source("reassignValue.r")
-	source("validatePhenotypeInput.r")
-	source("validateTraitInput.r")
-	source("testNumExamples.r")
-	source("binaryLogisticRegression.r")
-	source("equalSizedBins.r")
-	source("fixOddFieldsToCatMul.r")
-	source("replaceMissingCodes.r")
-	source("replaceNaN.r")
-	source("testAssociations.r")
-	source("testCatMultiple.r")
-	source("testCatSingle.r")
-	source("testContinuous.r")
-	source("testInteger.r")
-	source("testCategoricalOrdered.r")
-	source("testCategoricalUnordered.r")
-	source("saveCounts.r")
-	source("incrementCounter.r")
-	source("getIsCatMultExposure.r")
-	source("getIsExposure.r")
-	source("addToCounts.r")
-	source("getNumValuesCatMultExposure.r")	
-	source("storeNewVar.r")
-	source("loadPhenotypes.r")
-	source("loadTraitOfInterest.r")	
-	source("loadConfounders.r")
-	source("makeTestDataFrame.r")
-	source("loadIndicatorFields.r")
-}
-
 # init the counters used to determine how many variables took each path in the variable processing flow.
 initCounters <- function() {
 	counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
diff --git a/WAS/loadConfounders.r b/PHESANT/R/loadConfounders.r
similarity index 100%
rename from WAS/loadConfounders.r
rename to PHESANT/R/loadConfounders.r
diff --git a/WAS/loadData.r b/PHESANT/R/loadData.r
similarity index 100%
rename from WAS/loadData.r
rename to PHESANT/R/loadData.r
diff --git a/WAS/loadIndicatorFields.r b/PHESANT/R/loadIndicatorFields.r
similarity index 100%
rename from WAS/loadIndicatorFields.r
rename to PHESANT/R/loadIndicatorFields.r
diff --git a/WAS/loadPhenotypes.r b/PHESANT/R/loadPhenotypes.r
similarity index 100%
rename from WAS/loadPhenotypes.r
rename to PHESANT/R/loadPhenotypes.r
diff --git a/WAS/loadTraitOfInterest.r b/PHESANT/R/loadTraitOfInterest.r
similarity index 100%
rename from WAS/loadTraitOfInterest.r
rename to PHESANT/R/loadTraitOfInterest.r
diff --git a/WAS/makeTestDataFrame.r b/PHESANT/R/makeTestDataFrame.r
similarity index 100%
rename from WAS/makeTestDataFrame.r
rename to PHESANT/R/makeTestDataFrame.r
diff --git a/PHESANT/R/option_list.R b/PHESANT/R/option_list.R
new file mode 100644
index 0000000..203393e
--- /dev/null
+++ b/PHESANT/R/option_list.R
@@ -0,0 +1,20 @@
+library("optparse")
+
+option_list = list(
+  make_option(c("-f", "--phenofile"), type="character", default=NULL, help="Phenotype dataset file name", metavar="character"),
+  make_option(c("-g", "--traitofinterestfile"), type="character", default=NULL, help="Trait of interest dataset file name", metavar="character"),
+  make_option(c("-v", "--variablelistfile"), type="character", default=NULL, help="variablelistfile file name (should be tab separated)", metavar="character"),
+  make_option(c("-d", "--datacodingfile"), type="character", default=NULL, help="datacodingfile file name (should be comma separated)", metavar="character"),
+  make_option(c("-e", "--traitofinterest"), type="character", default=NULL, help="traitofinterest option should specify trait of interest variable name", metavar="character"),
+  make_option(c("-r", "--resDir"), type="character", default=NULL, help="resDir option should specify directory where results files should be stored", metavar="character"),
+  make_option(c("-u", "--userId"), type="character", default="userId", help="userId option should specify user ID column in trait of interest and phenotype files [default= %default]", metavar="character"),
+  make_option(c("-t", "--test"), action="store_true", default=FALSE, help="Run test phenome scan on test data (see test subfolder) [default= %default]"),
+  make_option(c("-s", "--sensitivity"), action="store_true", default=FALSE, help="Run sensitivity phenome scan [default= %default]"),
+  make_option(c("-a", "--partIdx"), type="integer", default=NULL, help="Part index of phenotype (used to parellise)"),
+  make_option(c("-b", "--numParts"), type="integer", default=NULL, help="Number of phenotype parts (used to parellise)"),
+  make_option(c("-j", "--genetic"), action="store", default=TRUE, help="Trait of interest is genetic, e.g. a SNP or genetic risk score [default= %default]"),
+  make_option(c("-z", "--save"), action="store_true", default=FALSE, help="Save generated phenotypes to a file rather than testing associations [default= %default]"),
+  make_option(c("-c", "--confounderfile"), type="character", default=NULL, help="Confounder file name", metavar="character"),
+  make_option(c("-i", "--confidenceintervals"), type="logical", default=TRUE, help="Whether confidence intervals should be calculated [default= %default]"),
+  make_option(c("-k", "--standardise"), action="store", default=TRUE, help="Trait of interest is standardised to have mean=0 and std=1 [default= %default]")
+)
\ No newline at end of file
diff --git a/WAS/processArgs.r b/PHESANT/R/processArgs.r
similarity index 100%
rename from WAS/processArgs.r
rename to PHESANT/R/processArgs.r
diff --git a/WAS/reassignValue.r b/PHESANT/R/reassignValue.r
similarity index 100%
rename from WAS/reassignValue.r
rename to PHESANT/R/reassignValue.r
diff --git a/WAS/replaceMissingCodes.r b/PHESANT/R/replaceMissingCodes.r
similarity index 100%
rename from WAS/replaceMissingCodes.r
rename to PHESANT/R/replaceMissingCodes.r
diff --git a/WAS/replaceNaN.r b/PHESANT/R/replaceNaN.r
similarity index 100%
rename from WAS/replaceNaN.r
rename to PHESANT/R/replaceNaN.r
diff --git a/WAS/saveCounts.r b/PHESANT/R/saveCounts.r
similarity index 100%
rename from WAS/saveCounts.r
rename to PHESANT/R/saveCounts.r
diff --git a/WAS/storeNewVar.r b/PHESANT/R/storeNewVar.r
similarity index 100%
rename from WAS/storeNewVar.r
rename to PHESANT/R/storeNewVar.r
diff --git a/WAS/testAssociations.r b/PHESANT/R/testAssociations.r
similarity index 100%
rename from WAS/testAssociations.r
rename to PHESANT/R/testAssociations.r
diff --git a/WAS/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
similarity index 100%
rename from WAS/testCatMultiple.r
rename to PHESANT/R/testCatMultiple.r
diff --git a/WAS/testCatSingle.r b/PHESANT/R/testCatSingle.r
similarity index 100%
rename from WAS/testCatSingle.r
rename to PHESANT/R/testCatSingle.r
diff --git a/WAS/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
similarity index 100%
rename from WAS/testCategoricalOrdered.r
rename to PHESANT/R/testCategoricalOrdered.r
diff --git a/WAS/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
similarity index 100%
rename from WAS/testCategoricalUnordered.r
rename to PHESANT/R/testCategoricalUnordered.r
diff --git a/WAS/testContinuous.r b/PHESANT/R/testContinuous.r
similarity index 100%
rename from WAS/testContinuous.r
rename to PHESANT/R/testContinuous.r
diff --git a/WAS/testInteger.r b/PHESANT/R/testInteger.r
similarity index 100%
rename from WAS/testInteger.r
rename to PHESANT/R/testInteger.r
diff --git a/WAS/testNumExamples.r b/PHESANT/R/testNumExamples.r
similarity index 100%
rename from WAS/testNumExamples.r
rename to PHESANT/R/testNumExamples.r
diff --git a/WAS/unittests/run-tests.sh b/PHESANT/R/unittests/run-tests.sh
similarity index 100%
rename from WAS/unittests/run-tests.sh
rename to PHESANT/R/unittests/run-tests.sh
diff --git a/WAS/unittests/test_equalSizedBins.r b/PHESANT/R/unittests/test_equalSizedBins.r
similarity index 100%
rename from WAS/unittests/test_equalSizedBins.r
rename to PHESANT/R/unittests/test_equalSizedBins.r
diff --git a/WAS/unittests/test_reassignValue.r b/PHESANT/R/unittests/test_reassignValue.r
similarity index 100%
rename from WAS/unittests/test_reassignValue.r
rename to PHESANT/R/unittests/test_reassignValue.r
diff --git a/WAS/unittests/test_testCatMultiple.r b/PHESANT/R/unittests/test_testCatMultiple.r
similarity index 100%
rename from WAS/unittests/test_testCatMultiple.r
rename to PHESANT/R/unittests/test_testCatMultiple.r
diff --git a/WAS/unittests/test_testCatSingle.r b/PHESANT/R/unittests/test_testCatSingle.r
similarity index 100%
rename from WAS/unittests/test_testCatSingle.r
rename to PHESANT/R/unittests/test_testCatSingle.r
diff --git a/WAS/validatePhenotypeInput.r b/PHESANT/R/validatePhenotypeInput.r
similarity index 100%
rename from WAS/validatePhenotypeInput.r
rename to PHESANT/R/validatePhenotypeInput.r
diff --git a/WAS/validateTraitInput.r b/PHESANT/R/validateTraitInput.r
similarity index 100%
rename from WAS/validateTraitInput.r
rename to PHESANT/R/validateTraitInput.r
diff --git a/PHESANT/man/PHESANT-package.Rd b/PHESANT/man/PHESANT-package.Rd
new file mode 100644
index 0000000..0178cc4
--- /dev/null
+++ b/PHESANT/man/PHESANT-package.Rd
@@ -0,0 +1,34 @@
+\name{PHESANT-package}
+\alias{PHESANT-package}
+\alias{PHESANT}
+\docType{package}
+\title{
+\packageTitle{PHESANT}
+}
+\description{
+\packageDescription{PHESANT}
+}
+\details{
+
+The DESCRIPTION file:
+\packageDESCRIPTION{PHESANT}
+\packageIndices{PHESANT}
+~~ An overview of how to use the package, including the most important functions ~~
+}
+\author{
+\packageAuthor{PHESANT}
+
+Maintainer: \packageMaintainer{PHESANT}
+}
+\references{
+~~ Literature or other references for background information ~~
+}
+~~ Optionally other standard keywords, one per line, from file KEYWORDS in the R documentation directory ~~
+\keyword{ package }
+\seealso{
+~~ Optional links to other man pages, e.g. ~~
+~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
+}
+\examples{
+~~ simple examples of the most important functions ~~
+}
diff --git a/PHESANT/opt_only.RData b/PHESANT/opt_only.RData
new file mode 100644
index 0000000000000000000000000000000000000000..f4d8fa863434c0806fb59d18003a41eb5eb661f2
GIT binary patch
literal 330
zcmV-Q0k!@giwFP!000001C3HqOT#b_?$%CS1v}WQ_ydHt;Dhgjz4)qvdyVaN3uH;j
zUCZ|8#cS8j5GPnj^5x5yyWDquU2mS^O&kQlGze!rhYX`BPm>@D=DdccCgzt66CMla
z=poB;0;G@CTV4thdA$Q|=vV`01-730-dDq+HrQCWpnY_-t*}yjDoB+?!rU}em_4LY
zS0<xhH=qB$*(FU2Q%b!}jpaqCA<BWgC^zHVJbvj!w}6e}u%6_qL9j0+qqt{J-z7Zx
z2k&d#OrcQ9Yt~GQJ>Z~A26EoFrIHH9D#!{+%xD6xf}Eqp=vil(!{2r0oxF3{#0xZ9
zz<4dKXUJ(6(U0yHL8BzuN;(eI^%gXcEJiho_WbI?=t`EL3+VH;Lg6_O30(^ADURxB
c2dW-$%tgKPU%9ed-hFlB8#^ETW%vRB0GivN*Z=?k

literal 0
HcmV?d00001

diff --git a/WAS/phenomeScan.r b/PHESANT/phenomeScan.r
similarity index 62%
rename from WAS/phenomeScan.r
rename to PHESANT/phenomeScan.r
index 2be21ca..85fc6a9 100644
--- a/WAS/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -19,35 +19,19 @@
 ##
 ## main phenome scan file
 
+## Updated by Quanli Wang 
+
+library(PHESANT)
 library("optparse")
 
-option_list = list(
-  make_option(c("-f", "--phenofile"), type="character", default=NULL, help="Phenotype dataset file name", metavar="character"),
-  make_option(c("-g", "--traitofinterestfile"), type="character", default=NULL, help="Trait of interest dataset file name", metavar="character"),
-  make_option(c("-v", "--variablelistfile"), type="character", default=NULL, help="variablelistfile file name (should be tab separated)", metavar="character"),
-  make_option(c("-d", "--datacodingfile"), type="character", default=NULL, help="datacodingfile file name (should be comma separated)", metavar="character"),
-  make_option(c("-e", "--traitofinterest"), type="character", default=NULL, help="traitofinterest option should specify trait of interest variable name", metavar="character"),
-  make_option(c("-r", "--resDir"), type="character", default=NULL, help="resDir option should specify directory where results files should be stored", metavar="character"),
-  make_option(c("-u", "--userId"), type="character", default="userId", help="userId option should specify user ID column in trait of interest and phenotype files [default= %default]", metavar="character"),
-  make_option(c("-t", "--test"), action="store_true", default=FALSE, help="Run test phenome scan on test data (see test subfolder) [default= %default]"),
-  make_option(c("-s", "--sensitivity"), action="store_true", default=FALSE, help="Run sensitivity phenome scan [default= %default]"),
-  make_option(c("-a", "--partIdx"), type="integer", default=NULL, help="Part index of phenotype (used to parellise)"),
-  make_option(c("-b", "--numParts"), type="integer", default=NULL, help="Number of phenotype parts (used to parellise)"),
-  make_option(c("-j", "--genetic"), action="store", default=TRUE, help="Trait of interest is genetic, e.g. a SNP or genetic risk score [default= %default]"),
-  make_option(c("-z", "--save"), action="store_true", default=FALSE, help="Save generated phenotypes to a file rather than testing associations [default= %default]"),
-  make_option(c("-c", "--confounderfile"), type="character", default=NULL, help="Confounder file name", metavar="character"),
-  make_option(c("-i", "--confidenceintervals"), type="logical", default=TRUE, help="Whether confidence intervals should be calculated [default= %default]"),
-  make_option(c("-k", "--standardise"), action="store", default=TRUE, help="Trait of interest is standardised to have mean=0 and std=1 [default= %default]")
-);
-opt_parser = OptionParser(option_list=option_list);
-opt = parse_args(opt_parser);
-
-save.image(file = "opt.RData")
-source("processArgs.r")
-processArgs();
-
-source("initFunctions.r")
-loadSource();
+args <- commandArgs(T)
+if (length(args) == 0) {
+  load(file = "opt_only.RData")
+} else {
+  opt_parser = OptionParser(option_list=option_list);
+  opt = parse_args(opt_parser);
+  processArgs();
+}
 
 ## load the files we write to and use
 counters=initCounters();
diff --git a/PHESANT/test.sh b/PHESANT/test.sh
new file mode 100755
index 0000000..17d4de0
--- /dev/null
+++ b/PHESANT/test.sh
@@ -0,0 +1,3 @@
+testDir="~/Documents/Github/PHESANT/testWAS/"
+Rscript phenomeScan.r --phenofile="${testDir}data/phenotypes.csv" --traitofinterestfile="${testDir}data/exposure.csv" --variablelistfile="${testDir}variable-lists/outcome-info.tsv" --datacodingfile="${testDir}variable-lists/data-coding-ordinal-info.txt" --traitofinterest="exposure" --resDir="${testDir}results/" --userId="userId"
+
diff --git a/opt.RData b/opt.RData
new file mode 100644
index 0000000000000000000000000000000000000000..fc4e5ffe212e4a0852f27489b7b9f2e116da4e25
GIT binary patch
literal 4811
zcmV;+5;W}}iwFP!000001MMAYa}>w5S2q$M0S1gYG?G9F!AjyZ#>f~OY;zc61O^OP
z9_@~#HLKmt&a4o=LT(>9o%^VKP4XX7sr-hdlF#{=5BZQPCm&L&Sm~a5v-9R{ckj$f
zE2OGb)l=_Izkc_7-80iWbM4%%Bb~Q88DmYXxt0HK<{w*{_`d{eVe5F?nJ+k#1+!>d
zMV4sZL3&$DwmD<*4?XDMo*UxeDWfoJ<??gZM9R#K6pcHaP$r+V?-3I1aGZbK!eNty
zOQ$`XFFKRcS#yT4YD!KM+;!P}ZbpXMl}x$<O=q$e8PjH_ga~9ntL@~A6t%^9SfKi@
zOU;@^PJ&yAAg3+cp~xyB+SAswS;{(CuEdoamoFzM)mbZBAXIl=6;y?6M%r{t+j0!S
z!pNC(6@2C_$DB8dQI+UOnc3`=nYz#Yut@qg_<fURamM!`jiA<4*0gzN6K1#hHP`D`
z-N5obQ#lKqZe?(F@NjX%k$?Weg>t;KELKFvjMvjjd$uN%PSMOb+z~mhGIvwpkv58q
z1EHOEQoD6fge^)&>lOo(cF|!2*Tpa+KW+E}Ri0g6O{5Ko&&I{Dm%10DI;MxRiRX!V
zg(o&AlROwR=2X_oX6opM9yu4S5023>f$5DQUXx1Mv@vBFjyYx67B_|ISm^_6$th`t
zQ+kp~5p1b^I+L5Jn@cvzc<3Dx9wB!}DnB=8u2F|r3Wsc}s8@CINVk-U)+Oi35W}0!
z4e*Cuuu_@nhlW#8&HUV2bCs;9@KHxH$rU@7Db~d`ZJ4&!BbPi(g(K3LqLt!;JTxB6
zaz_{(nJ+qQH<fH7$RbzR^jh*uN$r;s=X4=mAHQJQ>$JTUo|^S~6`m5E#)Wgdo@GmO
zIi5I@Br<bpZ<*~b{X8GufraF_u@64oTT5<q6mD$eB~V@WI$bADI>nO8lkM63Oop#l
z6@F}SLiLVyx_xE!E3@oci91C208VoZo1f<xc{nI0Dn2P$miN^zO-<*sX{&f3f_vC1
z@`kPO#)f3lwsLmH$;@Y*hxKwwR|Stqz8Mp~K|{kXT3Ze<6%JU(PiM`db0NJ@OId3d
z6=X(P)Y?O&wX@AZuCc=lQrcP&l_@AjL%e>a4JU7KT2_{?pe@x(EX-|HSba)i_0D86
zSDF(l+jTH|N0_$O?#U~qxhXzDglsMhX~(cUVYn(x|BgD~XRMs%WKuO*USDY1L!?7R
zCZTY^waOFpjBNm&LxwdnGh&#=#FeW?zUUt|iW&PpuR>hT7}IoExK-h#ch!@$ZO*Gs
z7neRs2gD;y1h*zQzH5B<R4~XCTcn5Soj>v}z7jT^S<{=l`N^eW+IBvbG2IK3Yiz<u
zDO|(P=~MaKbiTw(b+LZu^fDgWLiWUq<q>Q1%2q~U@U5P~GifWAvfLUmZ)R(oHuPDs
zMad>oAv=F_)^al-AY+v2pff@)K&q5=^C)`lQp>g~e0N{7ymk1=m03(@?3yyA>dKaq
zkL0T>(n$x=a_9Y7QT665GdDiwo*~*!di-z<r&S8KDx5W|o^-rhB6XT{0^*S-o!*Jr
z`~yCnaL0&|+!408ALe?(TF5w$dG2uW0#~OkJ5|gSB=^4<LG7>#8E&Z(y(6O%ay=HZ
zR`CkOe2Uxetb2Q@MckeIAE0jKkQS-R*&YP9JOQq`AmW9Cz~9W_sK{3M-p1kAAVxcf
zZ9)v80~`}{-Ud1sg&H0YX2LNaubbd)0$vzzJIJ&f`1Hdu(LO0cg#(XwHVFD)dOJDn
zlJMO_@C*)p61)L|w-5MWyv+oMpC_<kf`e^(h(nTju_<gi3N{_%@EOtf2<UlKq3>bB
z`#6Ua5<N~5yi**WC3ss1&T|~jRMGVX4lg1c5idUkKl;8q54yYz?JFSrOCElc@Y#D{
zo8J&yT?GGK0vn0;_dxeD(7OWkUPU}#1ADy=J`*;(26$IN&OZb01kkww{nx=ZZvfAm
z9R7sp3f$Wgy0;KrU;k=;7h~>l_y^eFE{DI9areNdmq4z!fd3sB|0ak35MAB|nI@5+
zZv(E-iJzJ$+KEnSiB2g`r$k3gou)ZTALum0Kkp{v_$<USNT+u|zG)cGOIc!s=#&MS
z?!%Zl7-NA>y!0kc6P*eYo${d5IMPYT`6YhrWg;gpxo%zX<B!+R#9bCW{z?-54)C{$
zeh;939`suPo)0;0k?8fFgzmeDF4pUPe(VFH*9S1h*Xu*x<=37ldOecx|1$9Z9ntHn
z(Ek<C>ubRC>m2vDM6Yj3=zasyjT3|4;wXOv8+^n+|ACDA7~~N)xb5M)a(%*4{u09$
z-<HVyDaia+;*0O_V}B!Ze3yB1qwkCF@veWz;Qs>&|L=SJo7NLw{1Ey-1KocFJb%n_
zyNEA-Dxv!mL^o=USNY3dQ~V5M_&KLL1oVCZy8R01{1V0sp55?StUts$M66>N3zj>H
z_dFrpj(<U?46!LQO)wKG%<*x8*`#8=7a`(~X1+*MbirK{_wNgWgU)3t`Vs#p7Kwb#
zOd%gu1b4KHn=NT^Z#2~1gHU=#Mn;9GH_uLt3hYsdOoDfdf_L-j(@DAb+p+lk?@97K
zI7{(w;7W~!h24ULh1;_#d0WH6?83}z64Jk4Syf0i=D&dVa>|8w%}E)yGr8kh3#RQ@
zVX8YR7p&LD;2Pmb%AN<zava!JK{Cky`vSFCx{hTwhh#HT1z#%TFp4UFcv6hFNAo2o
zm7lYcncQ@K#Br6DX|ZmZwW!$tQ&oo(Y8?cvq(>~7FY?CBf<6nf0wwh6TxNab*Se%i
z>1v}8tt%|TR$3$1hx@nD{tY#%02kP`GBZiPuAXmPZZk_aR7%a*9cEz=lG;MB-dZT+
z?NZSS%0S<u1P_}-CPU9Mt6#ZCtAZ^RvBOtIA>yWG7BPI=UMik0n1o21mszCX9-3Ah
zoR74{;%#B_>1I7#uY#MZp)pU?Dth_$xKxy_HIha5eyggiOYdz=U#hk%Ox2#TM{=cX
zmY_cwPn}*rCF6>{Lm^gYp~y39686|5PKiYc2SMp61*I0-$rBo(tQ;t>X$-SCQ<}4K
zjxkrV9e4L#sZhvf#QwX`@}pJkw;usovJ{fZx0C4|v`ZnsdldvD?p7dn2^~hP1u+kk
zA$!=;y-9V_d9^p(KT7)zrkxN%{@GIq{iedZIa&4@J*(<lf~G17x{Y++vJEbT;aZnP
zn}s;en2)0i5|x34I{i!^Gy_Z?3r&NA1aof~^QfYdZ>zDw?^7AF+6=QV%;zxGa4_7T
zr2YHj$vLPCZJB&SRpdJm?mt5NhwCVxE(H%VeF`38(Nl1&a>r|sijrj@P0HjFZ19+q
z600%7^vO8N)F~$V$29$qGX)PbeTW}n`V>4GPtM@PlWtNeOQ&iv*JD+rKF;)sc_N<F
z$CfKGpJw{RJjtRb=H9@9C?msWrOEir67*h0$#{zClkr)m58Bi5%+O!U0v8=0A@r2X
z*m0%~5AoN-(6ywBAI4Q#!A7z+mgc!CRzAb@N%%a|C*fJPbT3f3b;<XF#=4^aMHW5z
z_LqM;CCo3jE4nVAKB4({ZV4){!WHUyrjL-9m_83*j%SX6mCD0cSoAy`BEN@R?kv1e
z#l{zzJ_|1~eHLD3OZNelTbG5eYHTa|udwJ@=*@ec@x25LJA<a*YfI2|6{g=+rVowR
z!~Lsf&3TRK%fduFIrc9lBx@-T*ID%PaDe_ccRA{VOsE@79}I6Wz0q&7SVrHyQnmP2
z3}YMcv)kpa!MCf}_D!a*!FQOx2H#~%w>_0xSA*|qY%2QSV$o~x4*%E7!LOP<FVNyU
zxdbIwS@A_`4A)pZ56aYwG5%1TQ&M66x0ybMO|~+VLfow=Q{jHJt3cc&VCvWypD^de
zx*+_6BlkMFah+FUTU^sZKQb~NWU^T|l!~QXUj*wWK>N=@2>yM?B4~=W{T#KvkXTUJ
z#+U@$85LuiC1Vu(Qv|wl|IFUd{+W<1hn=~?FssbJCX6(1W=sBsnh3HWGr6>oOedn`
zKL37FmdI7uQA-7zWQ1|ZW*sWX)8zXeogkIlZxS#fhN|&77=B!c{Wjvxqu6L5obK<w
zXC9s#H#m;kJ=|s&|51R4e(WNCKoE{iYoWl6Y>OY^T@XJoR#>@m%MQf=ZhgKqZSg$O
zi|yoVgRIgU)9zY&RQ7oLb_6xD+o+m${kkZqZP!o&sK&5K9i*U^tbO;8uqxU8RL!ab
zI(?SDk0}8hVpzD&5(yiT9aYtAw3kXGo2L_i7emuXbcMVdykq<5%*mjCJ+xiWcC&a@
zubb7c>e2m4g5Sn?_8+8ms5@-;!I*Z}(xdX@O-Y}#ihWYmWZ$ogX4!_P1dNHHPi!7S
z53-r6njS-{SoQxv_Mw=vN1`j_-5gKe0}*rfKumd~(jE3=OKidNbFrOO{{D}#^2*rF
z)z5=|mv;irgZ%t#f8>q9JkejQT#YN&*05ZM>Lr)pwH?|gq21=u9r5kDgGH)w35>s7
zH7)_y0|ectSY^$KxMeB<r`QR&+#$&AuE8yP!fxrUr(1SGyA#@8BySJMyB&D-gRHy3
z9sMA)Xj@sZ3u+L#tpn&9ELg}>^H$&S6xV`%D`Y{ciR9W0MFG}*Sfrjum2$-Vi|zOU
zX!k-p2<?7o_dzT6`@}W>FbkP~u$}=t1Z_XG2U&dr_z+VEa4q|V;@UL1LN=wkt)Y+}
zg?0?uBhVi8<U8iq<ipU49W!y=aDs*0aIBu8eGJ+WXrE^F3GI_i9okWM+{CrxGb?0A
zs?Qn<=Vzfk4ecpt$0ahy+ZiLMJkR1g?JNs9t-qeZe+Js;pnZYWC-`4v>fn#D<0Y;s
z&#jOtsY+`o{$GOjWoXYs`-(*7I6G1V4Y5NcuFEd7kjn<@8T}Wby#Vd2tUl3yg{h-I
z%1)EGHhgV`Y)ExkLt+0qwAY}Wfc7f1*Cn#Y+HoO>yaDY^Xm3D!OF}%#4g)b(-0_ba
z4|iE89(LC=r{0A2HnjIxeRAq8rp_tt&Vjhbn_MB|Q6<(;GMUi6?a6c8KLwbQ$h_p6
z=b&iQ!AE5SxX^?^_^y=&H)VZj2Uv%|zctVWun|EQf^Gm`p)Ck{5NwSJPXX*k@DzeQ
z0KEYH2zn6=0PF?Wk6<qXVVfa<g9wHY_%=O+K%5_q0*oOz921TKJdNNOf|CHx0Gvkf
z3<4p~IKcA=#t{fz&jOr7;2#?!@KP1<<LCkc-&Pk9h~t+KysCk#5x}Q&T>)<Z+(d9I
z0$}`0th(RtAh?UbA9oLdIQ|xZKjuvYlL{~ae7q?HsR%Gdpqft82xbuY<7N?v<L?0Y
zV=M&s6)*?j<K+<KBS4P8mk{`J77>W!HUeGzIT668Gp~RJfQJa)jQ|*5(0?C+uh$0%
z#PJUi==Az>1n}v6MFC#}_&S1bL;#HcA*&vP-$L*a0)O1c2tGo%pCHiL;?oH534!k*
z@a6a}0&)C%2z30v9|3$ipDEx+06#|XlL&zE1^p;BUgj?`m;4Oi=LmkO0fZN4Q6+H2
zdb3HaExO5=J#<r_bc2+Sy^CU#jRhDt-bNfevi1!om<qn>j*lz$<zbXyKO4RiF8iE_
zaGc^`PR(XEOP*ufDjsTX170^AL=UynJlO`qpyG*7u=kBTLrL+B7%%}mf-Gzhcv^)M
z;Dw&wLY|-EKifh)CkUjLJ@&{agdQXfBi$!!z_T{EQrCG@o-NJ<QC+rKm-kwv$*gx~
z<1M0f&xf8?JRiF9&7aHs(0lq$Pwn&WtG}vqZ9MkA@oLT0;)!{MCmOHTY`j{t@oG)q
zA(p}+jaO@~9v>AIK5D#L^YQwnr1neW)tYO^jgG>NjaO?v4sX~BZ!})5xk3&w6%J^;
zTC<*3pHf)8@oLSwoBkbjz&Bp4xmuj`u6mMgyjt_|x+bM?P2<&?jaO^R&)^%c)@;05
zvx;yl`{KvpMb_fKMp1kZ>VLs|#QcBB@wS}*kof-sr8e}Sls5D*mp1g2ueRVJPtPvG
zICA{-Vj!8{hw1J@pZy2Flz%i@p7!V+B)1pa!MhoUnH)!i%TE`=Lx%Y|PEJf!4>HcI
z!N+@72W)YjAbdsOlv>1ItR7KwYXmc4Q~1xTN)Vh;ne=CbBh`CE#kHxN;W;(H5|Lis
zL@NA0M%DmPR5-nCDp2v{N`w_%k_tiHdjEaNQTOG1zLP|!*!h<@^H0VQ>54(mg$Gp&
z_*f=79#Y;eTWHF19$4Hqhu!J~cFg2b*^-^{*LT$n6EzFRiJ%4!Wh!tY8CXFG(MJoG
lM3AbPx+#>Y;uK9IK~sJjIfC|XS$cL$;s11|weDh*002)0Pgno|

literal 0
HcmV?d00001


From 9fee34e163cacd0ad5956b2348433a40528f6fd0 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 12:16:54 -0400
Subject: [PATCH 03/27] Code Refactor: remove global variable vl

---
 PHESANT/DESCRIPTION                     | 12 ++--
 PHESANT/NAMESPACE                       |  4 ++
 PHESANT/R/addToCounts.r                 | 33 ----------
 PHESANT/R/counters.r                    | 56 +++++++++++++++++
 PHESANT/R/fixOddFieldsToCatMul.r        | 49 ---------------
 PHESANT/R/getIsCatMultExposure.r        | 61 -------------------
 PHESANT/R/getIsExposure.r               | 80 ++++++++++++++++++++++++-
 PHESANT/R/getNumValuesCatMultExposure.r | 53 ----------------
 PHESANT/R/incrementCounter.r            | 34 -----------
 PHESANT/R/initFunctions.r               |  5 --
 PHESANT/R/loadData.r                    | 26 +++++++-
 PHESANT/R/loadIndicatorFields.r         |  6 +-
 PHESANT/R/reassignValue.r               |  6 +-
 PHESANT/R/saveCounts.r                  | 31 ----------
 PHESANT/R/testAssociations.r            | 14 ++---
 PHESANT/R/testCatMultiple.r             | 14 ++---
 PHESANT/R/testCatSingle.r               | 10 ++--
 PHESANT/R/testCategoricalOrdered.r      |  4 +-
 PHESANT/R/testCategoricalUnordered.r    |  4 +-
 PHESANT/R/testContinuous.r              | 12 ++--
 PHESANT/R/testInteger.r                 | 10 ++--
 PHESANT/phenomeScan.r                   |  6 +-
 22 files changed, 211 insertions(+), 319 deletions(-)
 delete mode 100644 PHESANT/R/addToCounts.r
 create mode 100644 PHESANT/R/counters.r
 delete mode 100644 PHESANT/R/fixOddFieldsToCatMul.r
 delete mode 100644 PHESANT/R/getIsCatMultExposure.r
 delete mode 100644 PHESANT/R/getNumValuesCatMultExposure.r
 delete mode 100644 PHESANT/R/incrementCounter.r
 delete mode 100644 PHESANT/R/saveCounts.r

diff --git a/PHESANT/DESCRIPTION b/PHESANT/DESCRIPTION
index 4769eb6..62068ae 100644
--- a/PHESANT/DESCRIPTION
+++ b/PHESANT/DESCRIPTION
@@ -1,9 +1,9 @@
 Package: PHESANT
 Type: Package
-Title: What the package does (short line)
-Version: 1.0
+Title: Repliments PHESANT
+Version: 0.1
 Date: 2019-03-14
-Author: Who wrote it
-Maintainer: Who to complain to <yourfault@somewhere.net>
-Description: More about what it does (maybe more than one line)
-License: What license is it under?
+Author: Quanli Wang
+Maintainer: Quanli Wang <quanliwang20@hotmail.com>
+Description: This will replement the original PHESANT in a package and potentially with some optimizations
+License: GPL (>= 3)
diff --git a/PHESANT/NAMESPACE b/PHESANT/NAMESPACE
index d75f824..404475d 100644
--- a/PHESANT/NAMESPACE
+++ b/PHESANT/NAMESPACE
@@ -1 +1,5 @@
 exportPattern("^[[:alpha:]]+")
+importFrom("stats", "coef", "complete.cases", "confint", "end", "glm",
+             "lm", "model.matrix", "na.omit", "pnorm", "qnorm",
+             "quantile", "relevel")
+importFrom("utils", "read.table", "type.convert", "write.table")
diff --git a/PHESANT/R/addToCounts.r b/PHESANT/R/addToCounts.r
deleted file mode 100644
index e41420c..0000000
--- a/PHESANT/R/addToCounts.r
+++ /dev/null
@@ -1,33 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# adds given value to counter, that are used to count how many variables go down each route in the data flow
-addToCounts <- function(countName, num) {
-		
-	idx = which(counters$name==countName)
-
-	if (length(idx)==0) {
-		# counter does not exist so add with countValue 1
-		counters <<- rbind(counters, data.frame(name=countName, countValue=num))
-	}
-	else {
-		# add to counter that already exists
-		counters$countValue[idx] <<- counters$countValue[idx]+num
-	}
-
-}
diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
new file mode 100644
index 0000000..15fdc47
--- /dev/null
+++ b/PHESANT/R/counters.r
@@ -0,0 +1,56 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# adds given value to counter, that are used to count how many variables go down each route in the data flow
+addToCounts <- function(countName, num) {
+    idx = which(counters$name==countName)
+    if (length(idx)==0) {
+        # counter does not exist so add with countValue 1
+        counters <<- rbind(counters, data.frame(name=countName, countValue=num))
+    } else {
+        # add to counter that already exists
+        counters$countValue[idx] <<- counters$countValue[idx]+num
+    }
+}
+
+# increments counters used to count how many variables go down each route in the data flow
+incrementCounter <- function(countName) {
+    idx = which(counters$name==countName)
+    if (length(idx)==0) {
+        # counter does not exist so add with countValue 1
+        counters <<- rbind(counters, data.frame(name=countName, countValue=1))
+    } else {
+        # increment counter that already exists
+        counters$countValue[idx] <<- counters$countValue[idx]+1
+    }
+}
+
+# Saves the counters stored in count variables, to a file in results directory
+saveCounts <- function() {
+    countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
+    # sort on counter name
+    sortIdx = order(as.character(counters[,"name"]))
+    counters <<- counters[sortIdx,]
+    write.table(counters, file=countFile, sep=",", quote=FALSE, row.names=FALSE)
+}
+
+# init the counters used to determine how many variables took each path in the variable processing flow.
+initCounters <- function() {
+    counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
+    return(counters)
+}
\ No newline at end of file
diff --git a/PHESANT/R/fixOddFieldsToCatMul.r b/PHESANT/R/fixOddFieldsToCatMul.r
deleted file mode 100644
index 2fb25c4..0000000
--- a/PHESANT/R/fixOddFieldsToCatMul.r
+++ /dev/null
@@ -1,49 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Changes variable name from instances to arrays
-# 
-# This function changes the format of variable names from instance format to array format (i.e. we treat
-# the instances as arrays), for a small subset of Biobank variables.
-#
-# Some variables are stored in Biobank as categorical (single) fields with the data stored as set of instances,
-# but we want to treat these instead as categorical (multiple) with a set of arrays.
-# These are indicated by the value "YES-INSTANCES" in the CAT_SINGLE_TO_CAT_MULT column of the variable info file.
-# This function changes the format of these variable names from VARID_0_0, VARID_1_0, VARID_2_0 etc (which
-# is instance format), to VARID_0_0, VARID_0_1, VARID_0_2 etc. (array format) so they can be treated as categorical (multiple) fields. 
-fixOddFieldsToCatMul <- function(data) {
-  	# examples are variables: 40006, 40011, 40012, 40013
-  	# get all variables that need their instances changing to arrays
-  	dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
-  	for (i in 1:nrow(dataPheno)) {
-    		varID = dataPheno[i,]$FieldID;		
-    		varidString = paste("x",varID,"_", sep="");			
-    			
-    		# get all columns in data dataframe for this variable	
-    		colIdxs = which(grepl(varidString,names(data)));
-    		
-    		# change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
-    		count = 0;
-    		for (j in colIdxs) {	
-    			  colnames(data)[j] <- paste(varidString, "0_", count, sep="")
-    			  count = count + 1;
-    		}				
-	}
-	return(data)
-}
diff --git a/PHESANT/R/getIsCatMultExposure.r b/PHESANT/R/getIsCatMultExposure.r
deleted file mode 100644
index 560263f..0000000
--- a/PHESANT/R/getIsCatMultExposure.r
+++ /dev/null
@@ -1,61 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# looks up categorical multiple field in the variable info file, return
-# whether field has YES in TRAIT_OF_INTEREST column (i.e. all values in 
-# this field denote the exposure), or whether varName has varValue stated 
-# as a trait of interest in the TRAIT_OF_INTEREST column (multiple values are
-# separated by "|" in this field
-getIsCatMultExposure <- function(varName, varValue) {
-	
-	# get row index of field in variable information file
-	idx=which(vl$phenoInfo$FieldID==varName)
-
-	# may be empty of may contain VALUE1|VALUE2 etc .. to denote those
-	# cat mult values denoting exposure variable
-	isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
-        
-        if (!is.na(isExposure) & isExposure!="") {
-		
-		isExposure = as.character(isExposure)
-		
-		## first check if value is YES, then all values are exposure traits
-		if (isExposure == "YES") {
-			cat("IS_CM_ALL_EXPOSURE || ")
-			return(TRUE)
-		}
-
-		## try to split by |, to set particular values as exposure
-
-		# split into variable Values
-		exposureValues = unlist(strsplit(isExposure,"\\|"))
-
-		# for each value stated, check whether it is varValue
-		for (thisVal in exposureValues) {
-			if (thisVal == varValue) {
-				cat("IS_CM_EXPOSURE || ")
-				return(TRUE)
-			}
-		}
-	}
-
-	# varValue is not in list of exposure values
-	return(FALSE)
-
-}
diff --git a/PHESANT/R/getIsExposure.r b/PHESANT/R/getIsExposure.r
index c8df69e..a05d561 100644
--- a/PHESANT/R/getIsExposure.r
+++ b/PHESANT/R/getIsExposure.r
@@ -20,7 +20,7 @@
 # returns boolean var - whether this field denotes the trait of interest, as specified
 # in the variable information file
 # to determine if values of cat mult fields (not the whole field) are exposure values, use getIsCatMultExposure function instead.
-getIsExposure <- function(varName) {
+getIsExposure <- function(vl, varName) {
 
 	idx=which(vl$phenoInfo$FieldID==varName)
         isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
@@ -29,3 +29,81 @@ getIsExposure <- function(varName) {
     	}
 	return(FALSE)
 }
+
+# looks up categorical multiple field in the variable info file, return
+# whether field has YES in TRAIT_OF_INTEREST column (i.e. all values in 
+# this field denote the exposure), or whether varName has varValue stated 
+# as a trait of interest in the TRAIT_OF_INTEREST column (multiple values are
+# separated by "|" in this field
+getIsCatMultExposure <- function(vl, varName, varValue) {
+  
+  # get row index of field in variable information file
+  idx=which(vl$phenoInfo$FieldID==varName)
+  
+  # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+  # cat mult values denoting exposure variable
+  isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+  
+  if (!is.na(isExposure) & isExposure!="") {
+    
+    isExposure = as.character(isExposure)
+    
+    ## first check if value is YES, then all values are exposure traits
+    if (isExposure == "YES") {
+      cat("IS_CM_ALL_EXPOSURE || ")
+      return(TRUE)
+    }
+    
+    ## try to split by |, to set particular values as exposure
+    
+    # split into variable Values
+    exposureValues = unlist(strsplit(isExposure,"\\|"))
+    
+    # for each value stated, check whether it is varValue
+    for (thisVal in exposureValues) {
+      if (thisVal == varValue) {
+        cat("IS_CM_EXPOSURE || ")
+        return(TRUE)
+      }
+    }
+  }
+  
+  # varValue is not in list of exposure values
+  return(FALSE)
+  
+}
+
+# looks up categorical multiple field in the variable info file, return
+# number of values denoted as trait of interest.
+# returns zero if whole field is denoted trait of interest, not particular values.
+getNumValuesCatMultExposure <- function(vl, varName) {
+  
+  # get row index of field in variable information file
+  idx=which(vl$phenoInfo$FieldID==varName)
+  
+  # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+  # cat mult values denoting exposure variable
+  isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+  
+  if (!is.na(isExposure) & isExposure!="") {
+    
+    isExposure = as.character(isExposure)
+    
+    ## first check if value is YES, then no partic values are traits of interest
+    if (isExposure == "YES") {
+      return(0)
+    }
+    
+    ## try to split by |, to set particular values as exposure
+    
+    # split into variable Values
+    exposureValues = unlist(strsplit(isExposure,"\\|"))
+    
+    return(length(exposureValues))
+    
+  }
+  
+  # varValue is not in list of exposure values
+  return(0)
+  
+}
diff --git a/PHESANT/R/getNumValuesCatMultExposure.r b/PHESANT/R/getNumValuesCatMultExposure.r
deleted file mode 100644
index 4cae9a9..0000000
--- a/PHESANT/R/getNumValuesCatMultExposure.r
+++ /dev/null
@@ -1,53 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# looks up categorical multiple field in the variable info file, return
-# number of values denoted as trait of interest.
-# returns zero if whole field is denoted trait of interest, not particular values.
-getNumValuesCatMultExposure <- function(varName) {
-	
-	# get row index of field in variable information file
-	idx=which(vl$phenoInfo$FieldID==varName)
-
-	# may be empty of may contain VALUE1|VALUE2 etc .. to denote those
-	# cat mult values denoting exposure variable
-	isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
-        
-        if (!is.na(isExposure) & isExposure!="") {
-		
-		isExposure = as.character(isExposure)
-		
-		## first check if value is YES, then no partic values are traits of interest
-		if (isExposure == "YES") {
-			return(0)
-		}
-
-		## try to split by |, to set particular values as exposure
-
-		# split into variable Values
-		exposureValues = unlist(strsplit(isExposure,"\\|"))
-
-		return(length(exposureValues))
-
-	}
-
-	# varValue is not in list of exposure values
-	return(0)
-
-}
diff --git a/PHESANT/R/incrementCounter.r b/PHESANT/R/incrementCounter.r
deleted file mode 100644
index c430a24..0000000
--- a/PHESANT/R/incrementCounter.r
+++ /dev/null
@@ -1,34 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# increments counters used to count how many variables go down each route in the data flow
-incrementCounter <- function(countName) {
-		
-	idx = which(counters$name==countName)
-
-	if (length(idx)==0) {
-		# counter does not exist so add with countValue 1
-		counters <<- rbind(counters, data.frame(name=countName, countValue=1))
-	}
-	else {
-		# increment counter that already exists
-		counters$countValue[idx] <<- counters$countValue[idx]+1
-	}
-
-}
diff --git a/PHESANT/R/initFunctions.r b/PHESANT/R/initFunctions.r
index 03c0f23..38a04b5 100644
--- a/PHESANT/R/initFunctions.r
+++ b/PHESANT/R/initFunctions.r
@@ -16,11 +16,6 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-# init the counters used to determine how many variables took each path in the variable processing flow.
-initCounters <- function() {
-	counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
-	return(counters);
-}
 
 # create new results files and headers
 initResultsFiles <- function() {
diff --git a/PHESANT/R/loadData.r b/PHESANT/R/loadData.r
index 68d4eed..450b103 100644
--- a/PHESANT/R/loadData.r
+++ b/PHESANT/R/loadData.r
@@ -21,7 +21,7 @@
 # creates phenotype / trait of interest data frame
 # creates confounder data frame
 # returns an object holding these two data frames
-loadData <- function() {
+loadData <- function(vl) {
   	library(data.table)
 
   
@@ -69,10 +69,30 @@ loadData <- function() {
   	}
   
   	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
-  	phenotype = fixOddFieldsToCatMul(phenotype)
-  	indFields = loadIndicatorFields(colnames(phenotype))
+  	phenotype = fixOddFieldsToCatMul(vl, phenotype)
+  	indFields = loadIndicatorFields(vl, colnames(phenotype))
   	d = list(datax=phenotype, confounders=conf, inds=indFields)
   	return(d)
 }
 
+fixOddFieldsToCatMul <- function(vl, data) {
+  # examples are variables: 40006, 40011, 40012, 40013
+  # get all variables that need their instances changing to arrays
+  dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
+  for (i in 1:nrow(dataPheno)) {
+    varID = dataPheno[i,]$FieldID;		
+    varidString = paste("x",varID,"_", sep="");			
+    
+    # get all columns in data dataframe for this variable	
+    colIdxs = which(grepl(varidString,names(data)));
+    
+    # change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
+    count = 0;
+    for (j in colIdxs) {	
+      colnames(data)[j] <- paste(varidString, "0_", count, sep="")
+      count = count + 1;
+    }				
+  }
+  return(data)
+}
 
diff --git a/PHESANT/R/loadIndicatorFields.r b/PHESANT/R/loadIndicatorFields.r
index e069c8e..8159ddb 100644
--- a/PHESANT/R/loadIndicatorFields.r
+++ b/PHESANT/R/loadIndicatorFields.r
@@ -18,7 +18,7 @@
 
 ##
 ## load data used for data code default value related field, and categorical multiple indicator field
-loadIndicatorFields <- function(phenosToTest) {
+loadIndicatorFields <- function(vl, phenosToTest) {
   	print("Loading indicator fields from phenotypes file ...")
   
   	# read pheno file column names
@@ -27,7 +27,7 @@ loadIndicatorFields <- function(phenosToTest) {
   	indVars = c(opt$userId)
   
   	## add indicator variables to pheno data
-  	indVars = addIndicatorVariables(indVars, phenosToTest, phenoVarsAll)
+  	indVars = addIndicatorVariables(vl, indVars, phenosToTest, phenoVarsAll)
   	if (length(indVars)>1) {
     		# not just user id column
     		print("Loading required related variable(s):")
@@ -44,7 +44,7 @@ loadIndicatorFields <- function(phenosToTest) {
 }
 
 
-addIndicatorVariables <- function(indVars, phenosToTest, phenoVarsAll) {
+addIndicatorVariables <- function(vl, indVars, phenosToTest, phenoVarsAll) {
 
   	##### default value related fields for data codes
   	# get list of all indicator variables from outcome info file
diff --git a/PHESANT/R/reassignValue.r b/PHESANT/R/reassignValue.r
index ed9701c..c9bc289 100644
--- a/PHESANT/R/reassignValue.r
+++ b/PHESANT/R/reassignValue.r
@@ -18,7 +18,7 @@
 
 
 # Reassigns values as specified in data coding info file
-reassignValue <- function(pheno, varName) {
+reassignValue <- function(vl, pheno, varName) {
 
 	# get data code info - whether this data code is ordinal or not and any reordering and resassignments
         dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
@@ -38,12 +38,12 @@ reassignValue <- function(pheno, varName) {
 	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
         reassignments = as.character(dataDataCode$reassignments);
 	
-	return(reassignValue2(pheno, reassignments))
+	return(reassignValue2(vl, pheno, reassignments))
 	
 }
 
 # Reassigns values in pheno, as specified in resassignments argument
-reassignValue2 <- function(pheno, reassignments) {
+reassignValue2 <- function(vl, pheno, reassignments) {
 
 	# can be NA if row not included in data coding info file
 	if (!is.na(reassignments) && nchar(reassignments)>0) {
diff --git a/PHESANT/R/saveCounts.r b/PHESANT/R/saveCounts.r
deleted file mode 100644
index 5a72916..0000000
--- a/PHESANT/R/saveCounts.r
+++ /dev/null
@@ -1,31 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Saves the counters stored in count variables, to a file in results directory
-saveCounts <- function() {
-
-	countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
-
-	# sort on counter name
-	sortIdx = order(as.character(counters[,"name"]))
-	counters <<- counters[sortIdx,]
-
-	write.table(counters, file=countFile, sep=",", quote=FALSE, row.names=FALSE)
-}
-
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 71592ab..8a2aba1 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -18,7 +18,7 @@
 
 
 # Tests the association of a field, determined by its field type
-testAssociations <- function(currentVar, currentVarShort, thisdata) {
+testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 
 	## call file for variable type
 
@@ -38,7 +38,7 @@ testAssociations <- function(currentVar, currentVarShort, thisdata) {
 		excluded = vl$phenoInfo$EXCLUDED[idx]
 		catSinToMult = vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT[idx]
 		fieldType = vl$phenoInfo$ValueType[idx]
-		isExposure = getIsExposure(currentVarShort) #vl$phenoInfo$EXPOSURE_PHENOTYPE[idx]
+		isExposure = getIsExposure(vl, currentVarShort) #vl$phenoInfo$EXPOSURE_PHENOTYPE[idx]
 
 		if (fieldType=="Integer") {		
 
@@ -55,7 +55,7 @@ testAssociations <- function(currentVar, currentVarShort, thisdata) {
 					incrementCounter("start.exposure.int")
 				}
 
-			    	testInteger(currentVarShort, "INTEGER", thisdata);
+			    	testInteger(vl, currentVarShort, "INTEGER", thisdata);
 			}
 			cat("\n");
 	    	}
@@ -73,7 +73,7 @@ testAssociations <- function(currentVar, currentVarShort, thisdata) {
 				if (isExposure==TRUE) {
                                         incrementCounter("start.exposure.cont")
                                 }
-				testContinuous(currentVarShort, "CONTINUOUS", thisdata);
+				testContinuous(vl, currentVarShort, "CONTINUOUS", thisdata);
 	        	}
 	        	cat("\n");
 		}
@@ -91,7 +91,7 @@ testAssociations <- function(currentVar, currentVarShort, thisdata) {
 				if (isExposure==TRUE) {
                                         incrementCounter("start.exposure.catSin")
                                 }
-			    	testCategoricalSingle(currentVarShort, "CAT-SIN", thisdata);
+			    	testCategoricalSingle(vl, currentVarShort, "CAT-SIN", thisdata);
 			}
 			cat("\n");
 	  	}
@@ -117,12 +117,12 @@ testAssociations <- function(currentVar, currentVarShort, thisdata) {
                                 }
 				else {
 					# get number of cat mult values denoting trait of interest
-	                                numVals = getNumValuesCatMultExposure(currentVarShort)
+	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
 					if (numVals>0) {
 		                                addToCounts("start.exposure.catMulvalues", numVals)
 					}
 				}
-		        	testCategoricalMultiple(currentVarShort, "CAT-MUL", thisdata);
+		        	testCategoricalMultiple(vl, currentVarShort, "CAT-MUL", thisdata);
 			}
 			cat("\n");
 		}
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 8aaf974..bf0768f 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -23,11 +23,11 @@
 # in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
-testCategoricalMultiple <- function(varName, varType, thisdata) {
+testCategoricalMultiple <- function(vl, varName, varType, thisdata) {
 	cat("CAT-MULTIPLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
-	pheno = reassignValue(pheno, varName)
+	pheno = reassignValue(vl, pheno, varName)
 
 	## get unique values from all columns of this variable
 	uniqueValues = unique(na.omit(pheno[,1]));
@@ -65,7 +65,7 @@ testCategoricalMultiple <- function(varName, varType, thisdata) {
 		newthisdata = cbind.data.frame(thisdata[,1:numPreceedingCols], varBinaryFactor)
 
 		## one of 3 ways to decide which examples are negative
-        	idxsToRemove = restrictSample(varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
+        	idxsToRemove = restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
 
 		if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
 			newthisdata = newthisdata[-idxsToRemove,]
@@ -80,7 +80,7 @@ testCategoricalMultiple <- function(varName, varType, thisdata) {
 			incrementCounter("catMul.10")
 	        }
 		else {
-			isExposure = getIsCatMultExposure(varName, variableVal)
+			isExposure = getIsCatMultExposure(vl, varName, variableVal)
 
 			incrementCounter("catMul.over10")
 		     	# binary - so logistic regression
@@ -92,16 +92,16 @@ testCategoricalMultiple <- function(varName, varType, thisdata) {
 # restricts sample based on value in CAT_MULT_INDICATOR_FIELDS column of variable info file,
 # either NO_NAN, ALL or a field ID
 # returns idx's that should be removed from the sample
-restrictSample <- function(varName,pheno,variableVal, userID) {
+restrictSample <- function(vl, varName,pheno,variableVal, userID) {
 
 	# get definition for sample for this variable either NO_NAN, ALL or a variable ID
 	varIndicator = vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
 
-	return(restrictSample2(varName,pheno,varIndicator,variableVal, userID))
+	return(restrictSample2(vl, varName,pheno,varIndicator,variableVal, userID))
 }
 
 
-restrictSample2 <- function(varName,pheno, varIndicator,variableVal, userID) {
+restrictSample2 <- function(vl, varName,pheno, varIndicator,variableVal, userID) {
 	
 	if (varIndicator=="NO_NAN") { # remove NAs
 		## remove all people with no value for this variable
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 109f88b..e801e44 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -23,16 +23,16 @@
 # 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
-testCategoricalSingle <- function(varName, varType, thisdata) {
+testCategoricalSingle <- function(vl, varName, varType, thisdata) {
 	cat("CAT-SINGLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(varName)
+	isExposure = getIsExposure(vl, varName)
 
 	# assert variable has only one column
 	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
 
-	pheno = reassignValue(pheno, varName)
+	pheno = reassignValue(vl, pheno, varName)
 
 	# get data code info - whether this data code is ordinal or not and any reordering
         dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
@@ -94,7 +94,7 @@ testCategoricalSingle <- function(varName, varType, thisdata) {
 			incrementCounter("catSin.case2")
 
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			testCategoricalUnordered(varName, varType, thisdatanew);
+			testCategoricalUnordered(vl, varName, varType, thisdatanew);
 			
 		}
 		else if (ordered == 1) {
@@ -105,7 +105,7 @@ testCategoricalSingle <- function(varName, varType, thisdata) {
 
 			## reorder variable values into increasing order
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			testCategoricalOrdered(varName, varType, thisdatanew, order)
+			testCategoricalOrdered(vl, varName, varType, thisdatanew, order)
 		
 		}
 		else if (ordered == -2) {
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index 78debf6..38f86bf 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -18,7 +18,7 @@
 
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
-testCategoricalOrdered <- function(varName, varType, thisdata, orderStr="") {
+testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="") {
 
 	
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -98,7 +98,7 @@ testCategoricalOrdered <- function(varName, varType, thisdata, orderStr="") {
 		cat("SUCCESS results-ordered-logistic");
 		incrementCounter("success.ordCat")
 
-		isExposure = getIsExposure(varName)
+		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
                         incrementCounter("success.exposure.ordCat")
                 }
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index eae9c73..6667e5c 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -19,7 +19,7 @@
 
 # Tests an unordered categorical phenotype with multinomial regression
 # and saves this result in the multinomial logistic results file
-testCategoricalUnordered <- function(varName, varType, thisdata) {
+testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	#geno = thisdata[,"geno"]
@@ -131,7 +131,7 @@ testCategoricalUnordered <- function(varName, varType, thisdata) {
 		cat("SUCCESS results-notordered-logistic ");
 		incrementCounter("success.unordCat")
 
-		isExposure = getIsExposure(varName)
+		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
                         incrementCounter("success.exposure.unordCat")
                 }
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 1532eba..2cc159a 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -18,28 +18,28 @@
 
 
 # Main function called for continuous fields
-testContinuous <- function(varName, varType, thisdata) {
+testContinuous <- function(vl, varName, varType, thisdata) {
 
 	cat("CONTINUOUS MAIN || ");	
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 
 	# reassign values
-        pheno = reassignValue(pheno, varName)
+        pheno = reassignValue(vl, pheno, varName)
 
 	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
 
-	testContinuous2(varName, varType, thisdata)
+	testContinuous2(vl, varName, varType, thisdata)
 	
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(varName, varType, thisdata) {
+testContinuous2 <- function(vl, varName, varType, thisdata) {
 	cat("CONTINUOUS || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(varName)
+	isExposure = getIsExposure(vl, varName)
 
 	if (!is.null(dim(pheno))) {
 		phenoAvg = rowMeans(pheno, na.rm=TRUE)
@@ -113,7 +113,7 @@ testContinuous2 <- function(varName, varType, thisdata) {
 
 				incrementCounter("cont.ordcattry.ordcat")
 			        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned);
-				testCategoricalOrdered(varName, varType, thisdatanew);
+				testCategoricalOrdered(vl, varName, varType, thisdatanew);
 			}
 			else {
 				# try to treat as binary because not enough examples in each bin
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 9f87511..0738de0 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -22,18 +22,18 @@
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
-testInteger <- function(varName, varType, thisdata) {
+testInteger <- function(vl, varName, varType, thisdata) {
 	cat("INTEGER || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(varName)
+	isExposure = getIsExposure(vl, varName)
 
 	if (!is.numeric(as.matrix(pheno))) {
 		cat("SKIP Integer type but not numeric",sep="");
 		return(NULL)
 	}
 
-	pheno = reassignValue(pheno, varName)
+	pheno = reassignValue(vl, pheno, varName)
 
 	## average if multiple columns
 	if (!is.null(dim(pheno))) {
@@ -52,7 +52,7 @@ testInteger <- function(varName, varType, thisdata) {
 	if (length(uniqVar)>=20) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoAvg);
-		testContinuous2(varName, varType, thisdatanew)
+		testContinuous2(vl, varName, varType, thisdatanew)
 		incrementCounter("int.continuous")
 	}
 	else {
@@ -82,7 +82,7 @@ testInteger <- function(varName, varType, thisdata) {
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
 
 			# treat as ordinal categorical
-			testCategoricalOrdered(varName, varType, thisdatanew);
+			testCategoricalOrdered(vl, varName, varType, thisdatanew);
 		}
 	}
 }
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 85fc6a9..6435475 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -41,7 +41,7 @@ if (opt$save==FALSE) {
 vl=initVariableLists();
 
 ## load data
-d <- loadData()
+d <- loadData(vl)
 data=d$datax
 confounders=d$confounders
 indicatorFields=d$inds
@@ -105,7 +105,7 @@ for (var in phenoVars) {
 		if (first==FALSE) {
 
 			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			testAssociations(currentVar, currentVarShort, thisdata)
+			testAssociations(vl, currentVar, currentVarShort, thisdata)
 		}
 		
 		first=FALSE;
@@ -124,7 +124,7 @@ for (var in phenoVars) {
 if (phenoIdx>0){
 	# last variable so test association
 	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	testAssociations(currentVar, currentVarShort, thisdata)
+	testAssociations(vl, currentVar, currentVarShort, thisdata)
 }
 
 sink()

From 8da1087f0a595e05cde1d415874d6212a7c0908e Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 12:37:51 -0400
Subject: [PATCH 04/27] Fix library imports

---
 PHESANT/DESCRIPTION                  | 3 ++-
 PHESANT/NAMESPACE                    | 4 ++++
 PHESANT/R/loadData.r                 | 2 --
 PHESANT/R/testCategoricalOrdered.r   | 5 +----
 PHESANT/R/testCategoricalUnordered.r | 2 --
 5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/PHESANT/DESCRIPTION b/PHESANT/DESCRIPTION
index 62068ae..5677e5f 100644
--- a/PHESANT/DESCRIPTION
+++ b/PHESANT/DESCRIPTION
@@ -5,5 +5,6 @@ Version: 0.1
 Date: 2019-03-14
 Author: Quanli Wang
 Maintainer: Quanli Wang <quanliwang20@hotmail.com>
-Description: This will replement the original PHESANT in a package and potentially with some optimizations
+Description: This will replement the original PHESANT in a package and potentially with some optimizations.
 License: GPL (>= 3)
+Depends: MASS, lmtest, data.table, nnet
diff --git a/PHESANT/NAMESPACE b/PHESANT/NAMESPACE
index 404475d..7331d97 100644
--- a/PHESANT/NAMESPACE
+++ b/PHESANT/NAMESPACE
@@ -1,4 +1,8 @@
 exportPattern("^[[:alpha:]]+")
+import(MASS)
+import(lmtest)
+import(data.table)
+import(nnet)
 importFrom("stats", "coef", "complete.cases", "confint", "end", "glm",
              "lm", "model.matrix", "na.omit", "pnorm", "qnorm",
              "quantile", "relevel")
diff --git a/PHESANT/R/loadData.r b/PHESANT/R/loadData.r
index 450b103..39f24a2 100644
--- a/PHESANT/R/loadData.r
+++ b/PHESANT/R/loadData.r
@@ -22,8 +22,6 @@
 # creates confounder data frame
 # returns an object holding these two data frames
 loadData <- function(vl) {
-  	library(data.table)
-
   
   	##### validating data
   	## check phenotype file headers
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index 38f86bf..ba025bd 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -61,10 +61,7 @@ testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="")
 		sink(modelFitLogFile, append=TRUE)
 		print("--------------")
 		print(varName)
-
-		require(MASS)
-		require(lmtest)
-
+		
 		### BEGIN TRYCATCH
 		tryCatch({
 		confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index 6667e5c..c0bb914 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -60,7 +60,6 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 		print("--------------")
                 print(varName)
 
-		require(nnet)
 		if (opt$standardise==TRUE) {
                 	geno = scale(thisdata[,"geno"])
                 }
@@ -80,7 +79,6 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 		fitB <- multinom(phenoFactor ~ ., data=confounders, maxit=1000)
 
 		## compare model to baseline model
-		require(lmtest)
 		lres = lrtest(fit, fitB)
 		modelP = lres[2,"Pr(>Chisq)"];
 		

From 0315f17f492094ede96cb797b8004381fd2cef58 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 13:45:16 -0400
Subject: [PATCH 05/27] Code refactor: remove global variables counters for
 now: might need a better solution later on

---
 PHESANT/R/binaryLogisticRegression.r | 19 ++++++------
 PHESANT/R/counters.r                 | 16 ++++++-----
 PHESANT/R/testAssociations.r         | 42 +++++++++++++--------------
 PHESANT/R/testCatMultiple.r          | 11 +++----
 PHESANT/R/testCatSingle.r            | 20 ++++++-------
 PHESANT/R/testCategoricalOrdered.r   | 15 +++++-----
 PHESANT/R/testCategoricalUnordered.r | 17 +++++------
 PHESANT/R/testContinuous.r           | 43 ++++++++++++++--------------
 PHESANT/R/testInteger.r              | 17 +++++------
 PHESANT/phenomeScan.r                |  8 +++---
 10 files changed, 108 insertions(+), 100 deletions(-)

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index 6fbcefd..0ff9ecc 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -21,7 +21,7 @@
 #
 # Performs binary logistic regression on the phenotype stored in thisdata 
 # and stores result in 'results-logistic-binary' results file.
-binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
+binaryLogisticRegression <- function(varName, counters, varType, thisdata, isExposure) {
 
         phenoFactor = factor(thisdata[,phenoStartIdx])
 
@@ -31,7 +31,7 @@ binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
 	if (length(facLevels)!=2) {
 		#stop(paste("Not 2 levels: ", length(facLevels), " || ", sep=""))
 		cat("BINARY-NOT2LEVELS- (", length(facLevels), ") || ",sep="");
-                incrementCounter("binary.nottwolevels")
+                counters <- incrementCounter(counters, "binary.nottwolevels")
 	}
 
 	idxTrue = length(which(phenoFactor==facLevels[1]))
@@ -40,11 +40,11 @@ binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
   
         if (idxTrue<10 || idxFalse<10) {
 		cat("BINARY-LOGISTIC-SKIP-10 (", idxTrue, "/", idxFalse, ") || ", sep="")
-		incrementCounter("binary.10")
+          counters <- incrementCounter(counters, "binary.10")
 	}
 	else if (numNotNA<500) {	
 		cat("BINARY-LOGISTIC-SKIP-500 (", numNotNA, ") || ",sep="");
-		incrementCounter("binary.500")
+	  counters <- incrementCounter(counters, "binary.500")
        	}
 	else {
 
@@ -54,7 +54,7 @@ binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
 			cat("SUCCESS results-logistic-binary ");			
-			incrementCounter("success.binary")
+			counters <- incrementCounter(counters, "success.binary")
 		}
 		else {
 
@@ -102,10 +102,10 @@ binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
                 write(paste(varName,varType,paste(idxTrue,"/",idxFalse,"(",numNotNA,")",sep=""), beta,lower,upper,pvalue, sep=","), file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
                 cat("SUCCESS results-logistic-binary ");
                 
-		incrementCounter("success.binary")
+                counters <- incrementCounter(counters, "success.binary")
 
 		if (isExposure==TRUE) {
-	            	incrementCounter("success.exposure.binary")
+		  counters <-incrementCounter(counters,"success.exposure.binary")
 	        }	
 
 		## END TRYCATCH
@@ -113,9 +113,10 @@ binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
                         sink()
                         sink(resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        incrementCounter("binary.error")
+                        counters <- incrementCounter(counters, "binary.error")
                 })
 		}	
-        }
+	}
+	return(counters)
 }
 
diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
index 15fdc47..a6e22a7 100644
--- a/PHESANT/R/counters.r
+++ b/PHESANT/R/counters.r
@@ -17,31 +17,33 @@
 # DEALINGS IN THE SOFTWARE.
 
 # adds given value to counter, that are used to count how many variables go down each route in the data flow
-addToCounts <- function(countName, num) {
+addToCounts <- function(counters, countName, num) {
     idx = which(counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
-        counters <<- rbind(counters, data.frame(name=countName, countValue=num))
+        counters <- rbind(counters, data.frame(name=countName, countValue=num))
     } else {
         # add to counter that already exists
-        counters$countValue[idx] <<- counters$countValue[idx]+num
+        counters$countValue[idx] <- counters$countValue[idx]+num
     }
+    return(counters)
 }
 
 # increments counters used to count how many variables go down each route in the data flow
-incrementCounter <- function(countName) {
+incrementCounter <- function(counters, countName) {
     idx = which(counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
-        counters <<- rbind(counters, data.frame(name=countName, countValue=1))
+        counters <- rbind(counters, data.frame(name=countName, countValue=1))
     } else {
         # increment counter that already exists
-        counters$countValue[idx] <<- counters$countValue[idx]+1
+        counters$countValue[idx] <- counters$countValue[idx]+1
     }
+    return(counters)
 }
 
 # Saves the counters stored in count variables, to a file in results directory
-saveCounts <- function() {
+saveCounts <- function(counters) {
     countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
     # sort on counter name
     sortIdx = order(as.character(counters[,"name"]))
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 8a2aba1..1ecb5eb 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -18,7 +18,7 @@
 
 
 # Tests the association of a field, determined by its field type
-testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
+testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata) {
 
 	## call file for variable type
 
@@ -30,7 +30,7 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 		# check if variable info is found for this field
 		if (length(idx)==0) {
 			cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
-			incrementCounter("notinphenofile")
+		  counters <- incrementCounter(counters, "notinphenofile")
 		}
 		else {
 
@@ -47,15 +47,15 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 			
 			if (excluded!="") {
 				cat(paste("Excluded integer: ", excluded, " || ", sep=""))
-				incrementCounter("excluded.int")
+			  counters <- incrementCounter(counters, "excluded.int")
 			}
 			else {
-				incrementCounter("start.int")
+			  counters <- incrementCounter(counters, "start.int")
 				if (isExposure==TRUE) {
-					incrementCounter("start.exposure.int")
+				  counters <- incrementCounter(counters, "start.exposure.int")
 				}
 
-			    	testInteger(vl, currentVarShort, "INTEGER", thisdata);
+			    	counters <- testInteger(vl, couinters, currentVarShort, "INTEGER", thisdata);
 			}
 			cat("\n");
 	    	}
@@ -66,14 +66,14 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 
 		    	if (excluded!="") {
 				cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
-				incrementCounter("excluded.cont")
+		    	  counters <- incrementCounter(counters, "excluded.cont")
 		    	}
 			else {
-				incrementCounter("start.cont")
+			  counters <- incrementCounter(counters, "start.cont")
 				if (isExposure==TRUE) {
-                                        incrementCounter("start.exposure.cont")
+				  counters<- incrementCounter(counters, "start.exposure.cont")
                                 }
-				testContinuous(vl, currentVarShort, "CONTINUOUS", thisdata);
+				counters <- testContinuous(vl, counters, currentVarShort, "CONTINUOUS", thisdata);
 	        	}
 	        	cat("\n");
 		}
@@ -84,14 +84,14 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 	
 	    		if (excluded!="") {
 				cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
-				incrementCounter("excluded.catSin")
+	    		  counters <- incrementCounter(counters, "excluded.catSin")
 			}
 			else {
-				incrementCounter("start.catSin")
+			  counters <- incrementCounter(counters, "start.catSin")
 				if (isExposure==TRUE) {
-                                        incrementCounter("start.exposure.catSin")
+				  counters <-incrementCounter(counters, "start.exposure.catSin")
                                 }
-			    	testCategoricalSingle(vl, currentVarShort, "CAT-SIN", thisdata);
+			  counters <- testCategoricalSingle(vl, counters, currentVarShort, "CAT-SIN", thisdata);
 			}
 			cat("\n");
 	  	}
@@ -102,27 +102,27 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 
 			if (excluded!="") {
 				cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
-				incrementCounter("excluded.catMul")
+			  counters <- incrementCounter(counters, "excluded.catMul")
 			}
 			else {
 
 				if (catSinToMult!="") {
 					cat("cat-single to cat-multiple || ", sep="")
-					incrementCounter("catSinToCatMul")
+				  counters <- incrementCounter(counters, "catSinToCatMul")
 				}
 
-				incrementCounter("start.catMul")	
+			  counters <- incrementCounter(counters, "start.catMul")	
 				if (isExposure==TRUE) {
-                                        incrementCounter("start.exposure.catMul")
+				  counters <- incrementCounter(counters, "start.exposure.catMul")
                                 }
 				else {
 					# get number of cat mult values denoting trait of interest
 	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
 					if (numVals>0) {
-		                                addToCounts("start.exposure.catMulvalues", numVals)
+		                                counters <- addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
 					}
 				}
-		        	testCategoricalMultiple(vl, currentVarShort, "CAT-MUL", thisdata);
+		        	counters <- testCategoricalMultiple(vl, counters, currentVarShort, "CAT-MUL", thisdata);
 			}
 			cat("\n");
 		}
@@ -134,7 +134,7 @@ testAssociations <- function(vl, currentVar, currentVarShort, thisdata) {
 	}, error = function(e) {
 		print(paste("ERROR:", currentVar,e))
 	})
-
+  return(counters)
 }
 
 
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index bf0768f..4df64fe 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -23,7 +23,7 @@
 # in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
-testCategoricalMultiple <- function(vl, varName, varType, thisdata) {
+testCategoricalMultiple <- function(vl, counters, varName, varType, thisdata) {
 	cat("CAT-MULTIPLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
@@ -54,7 +54,7 @@ testCategoricalMultiple <- function(vl, varName, varType, thisdata) {
 		idxsTrue = idxForVar[,"row"]
 
 		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
-		incrementCounter("catMul.binary")
+		counters <- incrementCounter(counters, "catMul.binary")
 		
 		# make zero vector and set 1s for those with this variable value
 		varBinary = rep.int(0,numRows);
@@ -77,16 +77,17 @@ testCategoricalMultiple <- function(vl, varName, varType, thisdata) {
                 
 	        if (idxTrue<10 || idxFalse<10) {
 	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
-			incrementCounter("catMul.10")
+			counters <- incrementCounter(counters, "catMul.10")
 	        }
 		else {
 			isExposure = getIsCatMultExposure(vl, varName, variableVal)
 
-			incrementCounter("catMul.over10")
+			counters <- incrementCounter(counters, "catMul.over10")
 		     	# binary - so logistic regression
-			binaryLogisticRegression(paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure)
+			counters <- binaryLogisticRegression(paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure)
 		}
 	}
+	return(counters)
 }
 
 # restricts sample based on value in CAT_MULT_INDICATOR_FIELDS column of variable info file,
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index e801e44..664622d 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -23,7 +23,7 @@
 # 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
-testCategoricalSingle <- function(vl, varName, varType, thisdata) {
+testCategoricalSingle <- function(vl, counters, varName, varType, thisdata) {
 	cat("CAT-SINGLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -68,17 +68,17 @@ testCategoricalSingle <- function(vl, varName, varType, thisdata) {
 
 	if (length(uniqVar)<=1) {
 		cat("SKIP (only one value) || ");
-		incrementCounter("catSin.onevalue")
+		counters <- incrementCounter(counters, "catSin.onevalue")
 	}
 	else if (length(uniqVar)==2) {		
 		cat("CAT-SINGLE-BINARY || ");
-		incrementCounter("catSin.case3")
+	  counters <- incrementCounter(counters, "catSin.case3")
 		# binary so logistic regression
 
 		phenoFactor = factor(pheno)
 		# binary - so logistic regression
 		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-	        binaryLogisticRegression(varName, varType, thisdatanew, isExposure)	
+		counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure)	
 	}
 	else {
 		# > 2 categories
@@ -91,33 +91,33 @@ testCategoricalSingle <- function(vl, varName, varType, thisdata) {
 		if (ordered == 0) {
 			
 			cat("CAT-SINGLE-UNORDERED || ")
-			incrementCounter("catSin.case2")
+		  counters <- incrementCounter(counters, "catSin.case2")
 
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			testCategoricalUnordered(vl, varName, varType, thisdatanew);
+			counters <- testCategoricalUnordered(vl, counters, varName, varType, thisdatanew);
 			
 		}
 		else if (ordered == 1) {
 		
 			## ordered
 			cat("ordered || ");
-			incrementCounter("catSin.case1")
+		  counters <- incrementCounter(counters, "catSin.case1")
 
 			## reorder variable values into increasing order
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			testCategoricalOrdered(vl, varName, varType, thisdatanew, order)
+			counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew, order)
 		
 		}
 		else if (ordered == -2) {
 			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
-			incrementCounter("catSin.binaryorexcluded")
+		  counters <- incrementCounter(counters, "catSin.binaryorexcluded")
 		}
 		else {
 			print(paste("ERROR", varName, varType, dataCode));
 		}
 		}
 	}
-
+  return(counters)
 }
 
 ## values are reordered and assigned values 1:N for N categories
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index ba025bd..efc8386 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -18,14 +18,14 @@
 
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
-testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="") {
+testCategoricalOrdered <- function(vl, counters, varName, varType, thisdata, orderStr="") {
 
 	
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	geno = thisdata[,"geno"]
 
 	cat("CAT-ORD || ");
-	incrementCounter("ordCat")
+	counters <- incrementCounter(counters, "ordCat")
 
 	doCatOrdAssertions(pheno)
 
@@ -39,7 +39,7 @@ testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="")
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="");
-		incrementCounter("ordCat.500")
+	  counters <- incrementCounter(counters, "ordCat.500")
 	}
 	else {
 		# test this cat ordered variable with ordered logistic regression	
@@ -52,7 +52,7 @@ testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="")
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
 			cat("SUCCESS results-ordered-logistic");
-			incrementCounter("success.ordCat")
+			counters <- incrementCounter(counters, "success.ordCat")
                 }
                 else {
 
@@ -93,11 +93,11 @@ testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="")
 
 		write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
 		cat("SUCCESS results-ordered-logistic");
-		incrementCounter("success.ordCat")
+		counters <- incrementCounter(counters, "success.ordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                        incrementCounter("success.exposure.ordCat")
+                  counters <- incrementCounter(counters, "success.exposure.ordCat")
                 }
 
 		### END TRYCATCH
@@ -105,11 +105,12 @@ testCategoricalOrdered <- function(vl, varName, varType, thisdata, orderStr="")
 			sink()
                         sink(resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        incrementCounter("ordCat.error")
+                        counters <- incrementCounter(counters, "ordCat.error")
                 })
 		}
 
 	}
+	return(counters)
 }
 
 # check that the phenotype is valid - that there are more than two categories
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index c0bb914..561ffa3 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -19,7 +19,7 @@
 
 # Tests an unordered categorical phenotype with multinomial regression
 # and saves this result in the multinomial logistic results file
-testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
+testCategoricalUnordered <- function(vl, counters, varName, varType, thisdata) {
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	#geno = thisdata[,"geno"]
@@ -27,7 +27,7 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATUNORD-SKIP-500 (", numNotNA, ") || ",sep="");
-		incrementCounter("unordCat.500")
+	  counters <- incrementCounter(counters, "unordCat.500")
 	}
 	else {
 
@@ -38,8 +38,8 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 		numWeights=(numUnique-1)*((numPreceedingCols-2)+1+1)
 		if (numWeights>1000) {
 			cat("Too many weights in model: ", numWeights, " > 1000, (num outcomes values: ", numUnique, ") || SKIP ", sep="")
-			incrementCounter("unordCat.cats")
-			return(NULL)
+		  counters <-incrementCounter(counters, "unordCat.cats")
+			return(counters)
 		}
 
 		phenoFactor = chooseReferenceCategory(pheno);
@@ -48,7 +48,7 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
 			cat("SUCCESS results-notordered-logistic ");
-	                incrementCounter("success.unordCat")
+			counters <- incrementCounter(counters, "success.unordCat")
 		}
                 else {
 		
@@ -127,11 +127,11 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
 		}
 
 		cat("SUCCESS results-notordered-logistic ");
-		incrementCounter("success.unordCat")
+		counters <- incrementCounter(counters, "success.unordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                        incrementCounter("success.exposure.unordCat")
+                  counters <- incrementCounter(counters, "success.exposure.unordCat")
                 }
 		
 		## END TRYCATCH
@@ -139,10 +139,11 @@ testCategoricalUnordered <- function(vl, varName, varType, thisdata) {
                         sink()
                         sink(resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                	incrementCounter("unordCat.error")
+                        counters <- incrementCounter(counters, "unordCat.error")
 		})
 		}
 	}
+	return(counters)
 }
 
 # find reference category - category with most number of examples
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 2cc159a..865c021 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -18,7 +18,7 @@
 
 
 # Main function called for continuous fields
-testContinuous <- function(vl, varName, varType, thisdata) {
+testContinuous <- function(vl, counters, varName, varType, thisdata) {
 
 	cat("CONTINUOUS MAIN || ");	
 
@@ -29,13 +29,13 @@ testContinuous <- function(vl, varName, varType, thisdata) {
 
 	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
 
-	testContinuous2(vl, varName, varType, thisdata)
+	counters <- testContinuous2(vl, counters, varName, varType, thisdata)
 	
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(vl, varName, varType, thisdata) {
+testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
 	cat("CONTINUOUS || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -86,19 +86,19 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 
         		if (numLevels<=1) {
        				cat("SKIP (number of levels: ",numLevels,")",sep="")
-				incrementCounter("cont.onevalue")
+        		  counters <- incrementCounter(counters,"cont.onevalue")
         		}
         		else if (numLevels==2) {
 	        		# binary
-				incrementCounter("cont.binary")
+        		  counters <- incrementCounter(counters, "cont.binary")
         			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);			
-        			binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+        			counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
         		}
         	}
 		else {
 			## try to treat as ordered categorical
 
-			incrementCounter("cont.ordcattry")
+		  counters <- incrementCounter(counters, "cont.ordcattry")
 			## equal sized bins
 			phenoBinned = equalSizedBins(phenoAvg);
 
@@ -111,9 +111,9 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 
 				# successful binning. >=10 examples in each of the 3 bins
 
-				incrementCounter("cont.ordcattry.ordcat")
+			  counters <- incrementCounter(counters, "cont.ordcattry.ordcat")
 			        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned);
-				testCategoricalOrdered(vl, varName, varType, thisdatanew);
+				counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew);
 			}
 			else {
 				# try to treat as binary because not enough examples in each bin
@@ -122,36 +122,36 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 					## skip - not possible to create binary variable because first and third bins are too small
 					## ie. could merge bin1 with bin 2 but then bin3 still too small etc
 					cat("SKIP 2 bins are too small || ")
-	                                incrementCounter("cont.ordcattry.smallbins")
+				  counters <-incrementCounter(counters, "cont.ordcattry.smallbins")
 				} 
 				else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
 
 					# combine first and second bin to create binary variable
-					incrementCounter("cont.ordcattry.binsbinary")
+				  counters <- incrementCounter(counters, "cont.ordcattry.binsbinary")
 					cat("Combine first two bins and treat as binary || ")
 					phenoBinned[which(phenoBinned==0)] = 1	
 
 					# test binary
 					thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-	                                binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+	                                counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
 				}
 				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
 
 					# combine second and last bin to create binary variable
-					incrementCounter("cont.ordcattry.binsbinary")
+				  counters <- incrementCounter(counters, "cont.ordcattry.binsbinary")
 					cat("Combine last two bins and treat as binary || ")
                                         phenoBinned[which(phenoBinned==2)] = 1
 					
                                         # test binary
                                         thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-                                        binaryLogisticRegression(varName, varType, thisdatanew, isExposure)
+                                        counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure)
                                 }
 				
 
 				else {
 					## skip - not possible to create binary variable because combining bins would still be too small
 					cat("SKIP 2 bins are too small(2) || ")
-                                        incrementCounter("cont.ordcattry.smallbins2")
+				  counters <-incrementCounter(counters, "cont.ordcattry.smallbins2")
 				}
 
 			}
@@ -160,13 +160,13 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 	}
 	else {		
 		cat("IRNT || ");
-		incrementCounter("cont.main")
+	  counters <- incrementCounter(counters, "cont.main")
 
 		# check there are at least 500 examples
 		numNotNA = length(which(!is.na(phenoAvg)))
 		if (numNotNA<500) {
 			cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="");
-			incrementCounter("cont.main.500")
+		  counters <- incrementCounter(counters, "cont.main.500")
 		}
 		else {
 			## inverse rank normal transformation
@@ -176,7 +176,7 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 				# add pheno to dataframe
 				storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
 				cat("SUCCESS results-linear");
-	                        incrementCounter("success.continuous")
+				counters <- incrementCounter(counters, "success.continuous")
                         }
                         else {
 		
@@ -223,9 +223,9 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
 			write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE");
 			cat("SUCCESS results-linear");
 
-			incrementCounter("success.continuous")
+			counters <- incrementCounter(counters, "success.continuous")
                 	if (isExposure == TRUE) {
-                	        incrementCounter("success.exposure.continuous")
+                	  counters <- incrementCounter(counters, "success.exposure.continuous")
                 	}
 
 			## END TRYCATCH
@@ -233,11 +233,12 @@ testContinuous2 <- function(vl, varName, varType, thisdata) {
                 	        sink()
                 	        sink(resLogFile, append=TRUE)
                 	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                	        incrementCounter("continuous.error")
+                	        counters <-incrementCounter(counters, "continuous.error")
                 	})
 			}			
 		}
 	}
+	return(counters)
 }
 
 irnt <- function(pheno) {
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 0738de0..02791bc 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -22,7 +22,7 @@
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
-testInteger <- function(vl, varName, varType, thisdata) {
+testInteger <- function(vl, counters, varName, varType, thisdata) {
 	cat("INTEGER || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -52,8 +52,8 @@ testInteger <- function(vl, varName, varType, thisdata) {
 	if (length(uniqVar)>=20) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoAvg);
-		testContinuous2(vl, varName, varType, thisdatanew)
-		incrementCounter("int.continuous")
+		counters <- testContinuous2(vl, counters, varName, varType, thisdatanew)
+		counters <- incrementCounter(counters, "int.continuous")
 	}
 	else {
 		
@@ -65,24 +65,25 @@ testInteger <- function(vl, varName, varType, thisdata) {
 		numLevels = length(levels(phenoFactor))
 		if (numLevels<=1) {
 			cat("SKIP (number of levels: ",numLevels,")",sep="");
-			incrementCounter("int.onevalue")
+		  counters <- incrementCounter(counters, "int.onevalue")
 		}
 		else if (numLevels==2) {
-			incrementCounter("int.binary")
+		  counters <- incrementCounter(counters, "int.binary")
 
 			# binary
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-			binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+			counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
 		}
 		else {
-			incrementCounter("int.catord")
+		  counters <- incrementCounter(counters, "int.catord")
 			cat("3-20 values || ")
 
 			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
 
 			# treat as ordinal categorical
-			testCategoricalOrdered(vl, varName, varType, thisdatanew);
+			counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew);
 		}
 	}
+	return(counters)
 }
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 6435475..33c2ef5 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -34,7 +34,7 @@ if (length(args) == 0) {
 }
 
 ## load the files we write to and use
-counters=initCounters();
+counters <- initCounters()
 if (opt$save==FALSE) {
 	initResultsFiles();
 }
@@ -105,7 +105,7 @@ for (var in phenoVars) {
 		if (first==FALSE) {
 
 			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			testAssociations(vl, currentVar, currentVarShort, thisdata)
+			counters <- testAssociations(vl, counters, currentVar, currentVarShort, thisdata)
 		}
 		
 		first=FALSE;
@@ -124,13 +124,13 @@ for (var in phenoVars) {
 if (phenoIdx>0){
 	# last variable so test association
 	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	testAssociations(vl, currentVar, currentVarShort, thisdata)
+	counters <- testAssociations(vl, counters, currentVar, currentVarShort, thisdata)
 }
 
 sink()
 
 # save counters of each path in variable flow
-saveCounts()
+saveCounts(counters)
 
 if (opt$save == TRUE) {
 	write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);

From c40962fc1c78f86ceebc78b86ef9bc9f737807af Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 14:37:23 -0400
Subject: [PATCH 06/27] Code refactor: remove global variable opt

---
 PHESANT/R/binaryLogisticRegression.r |   2 +-
 PHESANT/R/counters.r                 |   2 +-
 PHESANT/R/initFunctions.r            |   4 +-
 PHESANT/R/loadConfounders.r          |   6 +-
 PHESANT/R/loadData.r                 |  14 +--
 PHESANT/R/loadIndicatorFields.r      |   2 +-
 PHESANT/R/loadPhenotypes.r           |   2 +-
 PHESANT/R/loadTraitOfInterest.r      |   2 +-
 PHESANT/R/processArgs.r              | 158 ++++++++++++---------------
 PHESANT/R/testAssociations.r         |  10 +-
 PHESANT/R/testCatMultiple.r          |   4 +-
 PHESANT/R/testCatSingle.r            |   8 +-
 PHESANT/R/testCategoricalOrdered.r   |   2 +-
 PHESANT/R/testCategoricalUnordered.r |   2 +-
 PHESANT/R/testContinuous.r           |  14 +--
 PHESANT/R/testInteger.r              |   8 +-
 PHESANT/R/validatePhenotypeInput.r   |   2 +-
 PHESANT/R/validateTraitInput.r       |   2 +-
 PHESANT/man/PHESANT-package.Rd       |  10 +-
 PHESANT/phenomeScan.r                |  20 ++--
 20 files changed, 128 insertions(+), 146 deletions(-)

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index 0ff9ecc..a5f9d4f 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -21,7 +21,7 @@
 #
 # Performs binary logistic regression on the phenotype stored in thisdata 
 # and stores result in 'results-logistic-binary' results file.
-binaryLogisticRegression <- function(varName, counters, varType, thisdata, isExposure) {
+binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata, isExposure) {
 
         phenoFactor = factor(thisdata[,phenoStartIdx])
 
diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
index a6e22a7..ef706b9 100644
--- a/PHESANT/R/counters.r
+++ b/PHESANT/R/counters.r
@@ -43,7 +43,7 @@ incrementCounter <- function(counters, countName) {
 }
 
 # Saves the counters stored in count variables, to a file in results directory
-saveCounts <- function(counters) {
+saveCounts <- function(opt, counters) {
     countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
     # sort on counter name
     sortIdx = order(as.character(counters[,"name"]))
diff --git a/PHESANT/R/initFunctions.r b/PHESANT/R/initFunctions.r
index 38a04b5..7e9f996 100644
--- a/PHESANT/R/initFunctions.r
+++ b/PHESANT/R/initFunctions.r
@@ -18,7 +18,7 @@
 
 
 # create new results files and headers
-initResultsFiles <- function() {
+initResultsFiles <- function(opt) {
 
 	## only linear and continuous fields can create linear results
 	file.create(paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt",sep=""));
@@ -39,7 +39,7 @@ initResultsFiles <- function() {
 }
 
 # load the variable information and data code information files
-initVariableLists <- function() {
+initVariableLists <- function(opt) {
 
 	phenoInfo=read.table(opt$variablelistfile,sep="\t",header=1,comment.char="",quote="");
 
diff --git a/PHESANT/R/loadConfounders.r b/PHESANT/R/loadConfounders.r
index 72c5f28..b2cdda2 100644
--- a/PHESANT/R/loadConfounders.r
+++ b/PHESANT/R/loadConfounders.r
@@ -19,7 +19,7 @@
 ##
 ## loads confounder variables from phenotype file
 
-loadConfounders <- function(phenotypes) {
+loadConfounders <- function(opt, phenotypes) {
     if (opt$save==TRUE) { 
 	      # saving not running tests so we add a fake confounder
 	      numRows = nrow(phenotypes)
@@ -40,7 +40,7 @@ loadConfounders <- function(phenotypes) {
       		    colnames(confs)[1] <- "userID"
     	  } else {
     	        print("Loading confounders from phenotypes file ...")
-              confNames = getConfounderNames()
+              confNames = getConfounderNames(opt)
               #####
     	        ##### extract confounders from data file
               confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
@@ -90,7 +90,7 @@ loadConfounders <- function(phenotypes) {
     }
 }
 
-getConfounderNames <- function() {
+getConfounderNames <- function(opt) {
     #####
 	  ##### first get vector of confounder names
     # age and sex
diff --git a/PHESANT/R/loadData.r b/PHESANT/R/loadData.r
index 39f24a2..e73ca27 100644
--- a/PHESANT/R/loadData.r
+++ b/PHESANT/R/loadData.r
@@ -21,25 +21,25 @@
 # creates phenotype / trait of interest data frame
 # creates confounder data frame
 # returns an object holding these two data frames
-loadData <- function(vl) {
+loadData <- function(opt, vl) {
   
   	##### validating data
   	## check phenotype file headers
-  	validatePhenotypeInput()
+  	validatePhenotypeInput(opt)
   
   	## check trait of interest file headers
-  	validateTraitInput()
+  	validateTraitInput(opt)
   
   	##### load data
   	## load phenotype
   	print("Loading phenotypes ...")
-  	phenotype = loadPhenotypes()
+  	phenotype = loadPhenotypes(opt)
   
   	## load trait of interest
-    toi <- loadTraitOfInterest(phenotype)
+    toi <- loadTraitOfInterest(opt, phenotype)
   
     ## load confounders
-    conf <- loadConfounders(phenotype)
+    conf <- loadConfounders(opt, phenotype)
   
   	## add trait of interest to phenotype data frame and remove rows with no trait of interest
   	## merge in toi with phenotype - keep id list from phenotypes file
@@ -68,7 +68,7 @@ loadData <- function(vl) {
   
   	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
   	phenotype = fixOddFieldsToCatMul(vl, phenotype)
-  	indFields = loadIndicatorFields(vl, colnames(phenotype))
+  	indFields = loadIndicatorFields(opt, vl, colnames(phenotype))
   	d = list(datax=phenotype, confounders=conf, inds=indFields)
   	return(d)
 }
diff --git a/PHESANT/R/loadIndicatorFields.r b/PHESANT/R/loadIndicatorFields.r
index 8159ddb..1a815cd 100644
--- a/PHESANT/R/loadIndicatorFields.r
+++ b/PHESANT/R/loadIndicatorFields.r
@@ -18,7 +18,7 @@
 
 ##
 ## load data used for data code default value related field, and categorical multiple indicator field
-loadIndicatorFields <- function(vl, phenosToTest) {
+loadIndicatorFields <- function(opt, vl, phenosToTest) {
   	print("Loading indicator fields from phenotypes file ...")
   
   	# read pheno file column names
diff --git a/PHESANT/R/loadPhenotypes.r b/PHESANT/R/loadPhenotypes.r
index 4276242..0a55079 100644
--- a/PHESANT/R/loadPhenotypes.r
+++ b/PHESANT/R/loadPhenotypes.r
@@ -19,7 +19,7 @@
 ##
 ## load phenotypes from phenotype file
 
-loadPhenotypes <- function() {
+loadPhenotypes <- function(opt) {
     ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
     if (opt$varTypeArg!="all") {
 		      # read pheno file column names
diff --git a/PHESANT/R/loadTraitOfInterest.r b/PHESANT/R/loadTraitOfInterest.r
index 52cf5b5..7f637f2 100644
--- a/PHESANT/R/loadTraitOfInterest.r
+++ b/PHESANT/R/loadTraitOfInterest.r
@@ -20,7 +20,7 @@
 ##
 ## load trait of interest, either from separate trait of interest file, or from phenotype file
 
-loadTraitOfInterest <- function(phenotypes) {
+loadTraitOfInterest <- function(opt, phenotypes) {
     if (opt$save==TRUE) { 
 	      # saving not running tests so we don't have a trait of interest
         # add pretend trait of interest so other code doesn't break
diff --git a/PHESANT/R/processArgs.r b/PHESANT/R/processArgs.r
index 2b9a438..0413c78 100644
--- a/PHESANT/R/processArgs.r
+++ b/PHESANT/R/processArgs.r
@@ -20,96 +20,80 @@
 # Parse the arguments input by the user
 # if argument 'test' is used then run test phenome scan
 processArgs <- function() {
-
-if (opt$test==TRUE) {
-
-	# set up the test phenome scan settings
-	datadir='../testWAS/data/';
-	opt$resDir <<- '../testWAS/results/';
-        opt$userId <<- 'userId';
-	opt$phenofile <<-  paste(datadir,'phenotypes.csv', sep="");
-        opt$variablelistfile <<- '../testWAS/variable-lists/outcome-info.tsv';
-        opt$datacodingfile <<- '../testWAS/variable-lists/data-coding-ordinal-info.txt';
-	opt$confidenceintervals <<- TRUE	
-
-	if(opt$save == FALSE) {
-		opt$traitofinterestfile <<- paste(datadir,'exposure.csv', sep="");
-		opt$traitofinterest <<- 'exposure';
-		opt$sensitivity <<- FALSE;
-		opt$genetic <<- TRUE;
-	}
-
-	processParts(opt$partIdx, opt$numParts);	
-}
-else {
-
-	## check arguments are supplied correctly
-
-	if (is.null(opt$phenofile)){
-	  print_help(opt_parser)
-	  stop("phenofile argument must be supplied", call.=FALSE)
-	}
-	else if (!file.exists(opt$phenofile)) {
-                stop(paste("phenotype data file phenofile=", opt$phenofile, " does not exist", sep=""), call.=FALSE)
-        }
-
-#	if (is.null(opt$traitofinterestfile)){
-#	  print_help(opt_parser)
-#	  stop("traitofinterestfile argument must be supplied", call.=FALSE)
-#	}
-	if (opt$save==FALSE && !is.null(opt$traitofinterestfile) && !file.exists(opt$traitofinterestfile)) {
-               stop(paste("trait of interest data file traitofinterestfile=", opt$traitofinterestfile, " does not exist", sep=""), call.=FALSE)
-        }
-
-	if (opt$save==FALSE && !is.null(opt$confounderfile) && !file.exists(opt$confounderfile)) {
-               stop(paste("confounder data file confounderfile=", opt$confounderfile, " does not exist", sep=""), call.=FALSE)
-        }
-
-	if (is.null(opt$variablelistfile)){
-	  print_help(opt_parser)
-	  stop("variablelistfile argument must be supplied", call.=FALSE)
-	}
-	else if (!file.exists(opt$variablelistfile)) {
-                stop(paste("variable listing file variablelistfile=", opt$variablelistfile, " does not exist", sep=""), call.=FALSE)
-        }
-
-	if (is.null(opt$datacodingfile)){
-	  print_help(opt_parser)
-	  stop("datacodingfile argument must be supplied", call.=FALSE)
-	}
-	else if (!file.exists(opt$datacodingfile)) {
-                stop(paste("data coding file datacodingfile=", opt$datacodingfile, " does not exist", sep=""), call.=FALSE)
-        }
-
-	if (opt$save==FALSE && is.null(opt$traitofinterest)){
-	  print_help(opt_parser)
-	  stop("traitofinterest argument must be supplied", call.=FALSE)
-	}
-
-	if (is.null(opt$resDir)){
-	  print_help(opt_parser)
-	  stop("resDir argument must be supplied", call.=FALSE)
-	}
-	else if (!file.exists(opt$resDir)) {
-		stop(paste("results directory resDir=", opt$resDir, " does not exist", sep=""), call.=FALSE)
-	}
-
-
-	processParts(opt$partIdx, opt$numParts);
-}
-	
-
-if (opt$save==TRUE) {
-	print("Saving phenotypes to file. Tests of association will not run!")
-}
-
+    if (opt$test==TRUE) {
+      	# set up the test phenome scan settings
+      	datadir='../testWAS/data/';
+      	opt$resDir <- '../testWAS/results/';
+              opt$userId <- 'userId';
+      	opt$phenofile <-  paste(datadir,'phenotypes.csv', sep="");
+              opt$variablelistfile <- '../testWAS/variable-lists/outcome-info.tsv';
+              opt$datacodingfile <- '../testWAS/variable-lists/data-coding-ordinal-info.txt';
+      	opt$confidenceintervals <- TRUE	
+      
+      	if(opt$save == FALSE) {
+      		opt$traitofinterestfile <- paste(datadir,'exposure.csv', sep="");
+      		opt$traitofinterest <- 'exposure';
+      		opt$sensitivity <- FALSE;
+      		opt$genetic <- TRUE;
+      	}
+    	  opt <- processParts(opt, opt$partIdx, opt$numParts)
+    }
+    else {
+    	## check arguments are supplied correctly
+    	if (is.null(opt$phenofile)){
+      	  print_help(opt_parser)
+      	  stop("phenofile argument must be supplied", call.=FALSE)
+    	} else if (!file.exists(opt$phenofile)) {
+          stop(paste("phenotype data file phenofile=", opt$phenofile, " does not exist", sep=""), call.=FALSE)
+      }
+    
+    	if (opt$save==FALSE && !is.null(opt$traitofinterestfile) && !file.exists(opt$traitofinterestfile)) {
+          stop(paste("trait of interest data file traitofinterestfile=", opt$traitofinterestfile, " does not exist", sep=""), call.=FALSE)
+      }
+    
+    	if (opt$save==FALSE && !is.null(opt$confounderfile) && !file.exists(opt$confounderfile)) {
+          stop(paste("confounder data file confounderfile=", opt$confounderfile, " does not exist", sep=""), call.=FALSE)
+      }
+    
+    	if (is.null(opt$variablelistfile)){
+      	  print_help(opt_parser)
+      	  stop("variablelistfile argument must be supplied", call.=FALSE)
+    	} else if (!file.exists(opt$variablelistfile)) {
+          stop(paste("variable listing file variablelistfile=", opt$variablelistfile, " does not exist", sep=""), call.=FALSE)
+      }
+    
+    	if (is.null(opt$datacodingfile)){
+      	  print_help(opt_parser)
+      	  stop("datacodingfile argument must be supplied", call.=FALSE)
+    	} else if (!file.exists(opt$datacodingfile)) {
+          stop(paste("data coding file datacodingfile=", opt$datacodingfile, " does not exist", sep=""), call.=FALSE)
+      }
+    
+    	if (opt$save==FALSE && is.null(opt$traitofinterest)){
+      	  print_help(opt_parser)
+      	  stop("traitofinterest argument must be supplied", call.=FALSE)
+    	}
+    
+    	if (is.null(opt$resDir)){
+      	  print_help(opt_parser)
+      	  stop("resDir argument must be supplied", call.=FALSE)
+    	}
+    	else if (!file.exists(opt$resDir)) {
+    		  stop(paste("results directory resDir=", opt$resDir, " does not exist", sep=""), call.=FALSE)
+    	}
+    	opt <-processParts(opt, opt$partIdx, opt$numParts);
+    }
+    if (opt$save==TRUE) {
+    	  print("Saving phenotypes to file. Tests of association will not run!")
+    }
+    return(opt)
 }
 
 # Parse the 'part' arguments and check they are valid
-processParts <- function(pIdx, nParts) {
+processParts <- function(opt, pIdx, nParts) {
 
 	if (is.null(pIdx) && is.null(nParts)) {
-                opt$varTypeArg <<- "all";
+                opt$varTypeArg <- "all";
 		print(paste("Running with all traits in phenotype file:", opt$phenofile));
         }
 	else if (is.null(pIdx)) {
@@ -125,8 +109,8 @@ processParts <- function(pIdx, nParts) {
                 stop("pIdx arguments must be between 1 and nParts inclusive", call.=FALSE)
 	}
 	else {
-                opt$varTypeArg <<- paste(pIdx, "-", nParts, sep="");
+                opt$varTypeArg <- paste(pIdx, "-", nParts, sep="");
         	print(paste("Running with part",pIdx,"of",nParts," in phenotype file:", opt$phenofile));
 	}
-
+  return(opt)
 }
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 1ecb5eb..d15c64c 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -18,7 +18,7 @@
 
 
 # Tests the association of a field, determined by its field type
-testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata) {
+testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thisdata) {
 
 	## call file for variable type
 
@@ -55,7 +55,7 @@ testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata
 				  counters <- incrementCounter(counters, "start.exposure.int")
 				}
 
-			    	counters <- testInteger(vl, couinters, currentVarShort, "INTEGER", thisdata);
+			    	counters <- testInteger(opt, vl, couinters, currentVarShort, "INTEGER", thisdata);
 			}
 			cat("\n");
 	    	}
@@ -73,7 +73,7 @@ testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata
 				if (isExposure==TRUE) {
 				  counters<- incrementCounter(counters, "start.exposure.cont")
                                 }
-				counters <- testContinuous(vl, counters, currentVarShort, "CONTINUOUS", thisdata);
+				counters <- testContinuous(opt, vl, counters, currentVarShort, "CONTINUOUS", thisdata);
 	        	}
 	        	cat("\n");
 		}
@@ -91,7 +91,7 @@ testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata
 				if (isExposure==TRUE) {
 				  counters <-incrementCounter(counters, "start.exposure.catSin")
                                 }
-			  counters <- testCategoricalSingle(vl, counters, currentVarShort, "CAT-SIN", thisdata);
+			  counters <- testCategoricalSingle(opt, vl, counters, currentVarShort, "CAT-SIN", thisdata);
 			}
 			cat("\n");
 	  	}
@@ -122,7 +122,7 @@ testAssociations <- function(vl, counters, currentVar, currentVarShort, thisdata
 		                                counters <- addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
 					}
 				}
-		        	counters <- testCategoricalMultiple(vl, counters, currentVarShort, "CAT-MUL", thisdata);
+		        	counters <- testCategoricalMultiple(opt, vl, counters, currentVarShort, "CAT-MUL", thisdata);
 			}
 			cat("\n");
 		}
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 4df64fe..8719511 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -23,7 +23,7 @@
 # in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
-testCategoricalMultiple <- function(vl, counters, varName, varType, thisdata) {
+testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdata) {
 	cat("CAT-MULTIPLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
@@ -84,7 +84,7 @@ testCategoricalMultiple <- function(vl, counters, varName, varType, thisdata) {
 
 			counters <- incrementCounter(counters, "catMul.over10")
 		     	# binary - so logistic regression
-			counters <- binaryLogisticRegression(paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure)
+			counters <- binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure)
 		}
 	}
 	return(counters)
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 664622d..1c595f9 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -23,7 +23,7 @@
 # 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
-testCategoricalSingle <- function(vl, counters, varName, varType, thisdata) {
+testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata) {
 	cat("CAT-SINGLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -78,7 +78,7 @@ testCategoricalSingle <- function(vl, counters, varName, varType, thisdata) {
 		phenoFactor = factor(pheno)
 		# binary - so logistic regression
 		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-		counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure)	
+		counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure)	
 	}
 	else {
 		# > 2 categories
@@ -94,7 +94,7 @@ testCategoricalSingle <- function(vl, counters, varName, varType, thisdata) {
 		  counters <- incrementCounter(counters, "catSin.case2")
 
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			counters <- testCategoricalUnordered(vl, counters, varName, varType, thisdatanew);
+			counters <- testCategoricalUnordered(opt, vl, counters, varName, varType, thisdatanew);
 			
 		}
 		else if (ordered == 1) {
@@ -105,7 +105,7 @@ testCategoricalSingle <- function(vl, counters, varName, varType, thisdata) {
 
 			## reorder variable values into increasing order
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew, order)
+			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, order)
 		
 		}
 		else if (ordered == -2) {
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index efc8386..b4a87f5 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -18,7 +18,7 @@
 
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
-testCategoricalOrdered <- function(vl, counters, varName, varType, thisdata, orderStr="") {
+testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata, orderStr="") {
 
 	
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index 561ffa3..b32d4a4 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -19,7 +19,7 @@
 
 # Tests an unordered categorical phenotype with multinomial regression
 # and saves this result in the multinomial logistic results file
-testCategoricalUnordered <- function(vl, counters, varName, varType, thisdata) {
+testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisdata) {
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	#geno = thisdata[,"geno"]
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 865c021..0134787 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -18,7 +18,7 @@
 
 
 # Main function called for continuous fields
-testContinuous <- function(vl, counters, varName, varType, thisdata) {
+testContinuous <- function(opt, vl, counters, varName, varType, thisdata) {
 
 	cat("CONTINUOUS MAIN || ");	
 
@@ -29,13 +29,13 @@ testContinuous <- function(vl, counters, varName, varType, thisdata) {
 
 	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
 
-	counters <- testContinuous2(vl, counters, varName, varType, thisdata)
+	counters <- testContinuous2(opt, vl, counters, varName, varType, thisdata)
 	
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
+testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 	cat("CONTINUOUS || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -92,7 +92,7 @@ testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
 	        		# binary
         		  counters <- incrementCounter(counters, "cont.binary")
         			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);			
-        			counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
+        			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
         		}
         	}
 		else {
@@ -113,7 +113,7 @@ testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
 
 			  counters <- incrementCounter(counters, "cont.ordcattry.ordcat")
 			        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned);
-				counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew);
+				counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew);
 			}
 			else {
 				# try to treat as binary because not enough examples in each bin
@@ -133,7 +133,7 @@ testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
 
 					# test binary
 					thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-	                                counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
+	                                counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
 				}
 				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
 
@@ -144,7 +144,7 @@ testContinuous2 <- function(vl, counters, varName, varType, thisdata) {
 					
                                         # test binary
                                         thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-                                        counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure)
+                                        counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure)
                                 }
 				
 
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 02791bc..e003e01 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -22,7 +22,7 @@
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
-testInteger <- function(vl, counters, varName, varType, thisdata) {
+testInteger <- function(opt, vl, counters, varName, varType, thisdata) {
 	cat("INTEGER || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -52,7 +52,7 @@ testInteger <- function(vl, counters, varName, varType, thisdata) {
 	if (length(uniqVar)>=20) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoAvg);
-		counters <- testContinuous2(vl, counters, varName, varType, thisdatanew)
+		counters <- testContinuous2(opt, vl, counters, varName, varType, thisdatanew)
 		counters <- incrementCounter(counters, "int.continuous")
 	}
 	else {
@@ -72,7 +72,7 @@ testInteger <- function(vl, counters, varName, varType, thisdata) {
 
 			# binary
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-			counters <- binaryLogisticRegression(varName, counters, varType, thisdatanew, isExposure);
+			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
 		}
 		else {
 		  counters <- incrementCounter(counters, "int.catord")
@@ -82,7 +82,7 @@ testInteger <- function(vl, counters, varName, varType, thisdata) {
 			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
 
 			# treat as ordinal categorical
-			counters <- testCategoricalOrdered(vl, counters, varName, varType, thisdatanew);
+			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew);
 		}
 	}
 	return(counters)
diff --git a/PHESANT/R/validatePhenotypeInput.r b/PHESANT/R/validatePhenotypeInput.r
index 9b95f71..cc3ca87 100644
--- a/PHESANT/R/validatePhenotypeInput.r
+++ b/PHESANT/R/validatePhenotypeInput.r
@@ -18,7 +18,7 @@
 
 
 # Validate the contents of the phenotype file
-validatePhenotypeInput <- function() {
+validatePhenotypeInput <- function(opt) {
 
 	print("Validating phenotype data ...")
 
diff --git a/PHESANT/R/validateTraitInput.r b/PHESANT/R/validateTraitInput.r
index 16a35f4..a42c28b 100644
--- a/PHESANT/R/validateTraitInput.r
+++ b/PHESANT/R/validateTraitInput.r
@@ -18,7 +18,7 @@
 
 
 # Validate the contents of the trait of interest file
-validateTraitInput <- function(snpIn) {
+validateTraitInput <- function(opt) {
 
 if (opt$save!=TRUE) {
 
diff --git a/PHESANT/man/PHESANT-package.Rd b/PHESANT/man/PHESANT-package.Rd
index 0178cc4..3625692 100644
--- a/PHESANT/man/PHESANT-package.Rd
+++ b/PHESANT/man/PHESANT-package.Rd
@@ -13,7 +13,7 @@
 The DESCRIPTION file:
 \packageDESCRIPTION{PHESANT}
 \packageIndices{PHESANT}
-~~ An overview of how to use the package, including the most important functions ~~
+An overview of how to use the package, including the most important functions
 }
 \author{
 \packageAuthor{PHESANT}
@@ -21,14 +21,12 @@ The DESCRIPTION file:
 Maintainer: \packageMaintainer{PHESANT}
 }
 \references{
-~~ Literature or other references for background information ~~
+Literature or other references for background information 
 }
-~~ Optionally other standard keywords, one per line, from file KEYWORDS in the R documentation directory ~~
 \keyword{ package }
 \seealso{
-~~ Optional links to other man pages, e.g. ~~
-~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
+To be done
 }
 \examples{
-~~ simple examples of the most important functions ~~
+
 }
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 33c2ef5..57d82ab 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -26,22 +26,22 @@ library("optparse")
 
 args <- commandArgs(T)
 if (length(args) == 0) {
-  load(file = "opt_only.RData")
+    load(file = "opt_only.RData")
 } else {
-  opt_parser = OptionParser(option_list=option_list);
-  opt = parse_args(opt_parser);
-  processArgs();
+    opt_parser <- OptionParser(option_list=option_list)
+    opt <- parse_args(opt_parser)
+    opt <- processArgs(opt)
 }
 
 ## load the files we write to and use
 counters <- initCounters()
 if (opt$save==FALSE) {
-	initResultsFiles();
+	initResultsFiles(opt)
 }
-vl=initVariableLists();
+vl <- initVariableLists(opt)
 
 ## load data
-d <- loadData(vl)
+d <- loadData(opt, vl)
 data=d$datax
 confounders=d$confounders
 indicatorFields=d$inds
@@ -105,7 +105,7 @@ for (var in phenoVars) {
 		if (first==FALSE) {
 
 			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			counters <- testAssociations(vl, counters, currentVar, currentVarShort, thisdata)
+			counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata)
 		}
 		
 		first=FALSE;
@@ -124,13 +124,13 @@ for (var in phenoVars) {
 if (phenoIdx>0){
 	# last variable so test association
 	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	counters <- testAssociations(vl, counters, currentVar, currentVarShort, thisdata)
+	counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata)
 }
 
 sink()
 
 # save counters of each path in variable flow
-saveCounts(counters)
+saveCounts(opt, counters)
 
 if (opt$save == TRUE) {
 	write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);

From 620ea3707a50f9aed6bc9496f8a2818fbe3ca8fe Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 14:37:58 -0400
Subject: [PATCH 07/27] ginire history

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 85c0afc..5fc7a4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 
 .DS_Store
 .Rproj.user
+PHESANT/.Rhistory
+PHESANT/.Rhistory

From 5eea081f7b502abe9307dbc8141fd50d9ba694aa Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:01:29 -0400
Subject: [PATCH 08/27] hide some global varibles

---
 PHESANT/R/binaryLogisticRegression.r | 10 +++----
 PHESANT/R/option_list.R              |  4 ++-
 PHESANT/R/storeNewVar.r              | 31 +++++++++-------------
 PHESANT/R/testAssociations.r         | 10 +++----
 PHESANT/R/testCatMultiple.r          |  6 ++---
 PHESANT/R/testCatSingle.r            | 14 +++++-----
 PHESANT/R/testCategoricalOrdered.r   | 10 +++----
 PHESANT/R/testCategoricalUnordered.r | 12 ++++-----
 PHESANT/R/testContinuous.r           | 30 ++++++++++-----------
 PHESANT/R/testInteger.r              | 14 +++++-----
 PHESANT/phenomeScan.r                | 39 +++++++++++++---------------
 11 files changed, 86 insertions(+), 94 deletions(-)

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index a5f9d4f..48ccf85 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -21,7 +21,7 @@
 #
 # Performs binary logistic regression on the phenotype stored in thisdata 
 # and stores result in 'results-logistic-binary' results file.
-binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata, isExposure) {
+binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata, isExposure, phenoStartIdx) {
 
         phenoFactor = factor(thisdata[,phenoStartIdx])
 
@@ -65,10 +65,10 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
 		else {
 			geno = thisdata[,"geno"]
 		}
-                confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+    confounders=thisdata[,3:(phenoStartIdx -1) , drop = FALSE]
 		
 		sink()
-		sink(modelFitLogFile, append=TRUE)
+		sink(pkg.env$modelFitLogFile, append=TRUE)
 		print("--------------")
 		print(varName)
 
@@ -78,7 +78,7 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
 		mylogit <- glm(phenoFactor ~ geno + ., data=confounders, family="binomial")
 
 		sink()
-             	sink(resLogFile, append=TRUE)
+             	sink(pkg.env$resLogFile, append=TRUE)
 		
                 sumx = summary(mylogit)
 
@@ -111,7 +111,7 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
 		## END TRYCATCH
                 }, error = function(e) {
                         sink()
-                        sink(resLogFile, append=TRUE)
+                        sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
                         counters <- incrementCounter(counters, "binary.error")
                 })
diff --git a/PHESANT/R/option_list.R b/PHESANT/R/option_list.R
index 203393e..255bfbe 100644
--- a/PHESANT/R/option_list.R
+++ b/PHESANT/R/option_list.R
@@ -17,4 +17,6 @@ option_list = list(
   make_option(c("-c", "--confounderfile"), type="character", default=NULL, help="Confounder file name", metavar="character"),
   make_option(c("-i", "--confidenceintervals"), type="logical", default=TRUE, help="Whether confidence intervals should be calculated [default= %default]"),
   make_option(c("-k", "--standardise"), action="store", default=TRUE, help="Trait of interest is standardised to have mean=0 and std=1 [default= %default]")
-)
\ No newline at end of file
+)
+
+pkg.env <- new.env(parent = emptyenv())
\ No newline at end of file
diff --git a/PHESANT/R/storeNewVar.r b/PHESANT/R/storeNewVar.r
index 20c4d5a..e2f640a 100644
--- a/PHESANT/R/storeNewVar.r
+++ b/PHESANT/R/storeNewVar.r
@@ -1,21 +1,14 @@
-
-
 storeNewVar <- function(userIDData, phenoData, varName, type) {
-
-        # add pheno to dataframe
-	newdata = data.frame(userID=userIDData, newvar=phenoData)
-        names(newdata)[names(newdata)=="newvar"] = varName
-
-	if (type == "bin") {
-	        derivedBinary <<- merge(derivedBinary, newdata, by="userID", all=TRUE);
-	} else if (type == "cont") {
-		derivedCont <<- merge(derivedCont, newdata, by="userID", all=TRUE);
-	} else if (type == "catOrd") {
-		derivedCatOrd <<- merge(derivedCatOrd, newdata, by="userID", all=TRUE);
-	} else if (type == "catUnord") {
-		derivedCatUnord <<- merge(derivedCatUnord, newdata, by="userID", all=TRUE);
-	}
-
-	#write.table(phenoFactor, file=paste(opt$resDir, "data-binary-", varName, ".csv", sep=""), row.names=FALSE, col.names=FALSE, na="", quote=FALSE);
-
+    # add pheno to dataframe
+  	newdata = data.frame(userID=userIDData, newvar=phenoData)
+    names(newdata)[names(newdata)=="newvar"] = varName
+  	if (type == "bin") {
+  	    pkg.env$derivedBinary <- merge(pkg.env$derivedBinary, newdata, by="userID", all=TRUE);
+  	} else if (type == "cont") {
+  	    pkg.env$derivedCont <- merge(pkg.env$derivedCont, newdata, by="userID", all=TRUE);
+  	} else if (type == "catOrd") {
+  	    pkg.env$derivedCatOrd <- merge(pkg.env$derivedCatOrd, newdata, by="userID", all=TRUE);
+  	} else if (type == "catUnord") {
+  	    pkg.env$derivedCatUnord <- merge(pkg.env$derivedCatUnord, newdata, by="userID", all=TRUE);
+  	}
 }
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index d15c64c..04dda72 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -18,7 +18,7 @@
 
 
 # Tests the association of a field, determined by its field type
-testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thisdata) {
+testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx) {
 
 	## call file for variable type
 
@@ -55,7 +55,7 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 				  counters <- incrementCounter(counters, "start.exposure.int")
 				}
 
-			    	counters <- testInteger(opt, vl, couinters, currentVarShort, "INTEGER", thisdata);
+			    	counters <- testInteger(opt, vl, counters, currentVarShort, "INTEGER", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 	    	}
@@ -73,7 +73,7 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 				if (isExposure==TRUE) {
 				  counters<- incrementCounter(counters, "start.exposure.cont")
                                 }
-				counters <- testContinuous(opt, vl, counters, currentVarShort, "CONTINUOUS", thisdata);
+				counters <- testContinuous(opt, vl, counters, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx);
 	        	}
 	        	cat("\n");
 		}
@@ -91,7 +91,7 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 				if (isExposure==TRUE) {
 				  counters <-incrementCounter(counters, "start.exposure.catSin")
                                 }
-			  counters <- testCategoricalSingle(opt, vl, counters, currentVarShort, "CAT-SIN", thisdata);
+			  counters <- testCategoricalSingle(opt, vl, counters, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 	  	}
@@ -122,7 +122,7 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 		                                counters <- addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
 					}
 				}
-		        	counters <- testCategoricalMultiple(opt, vl, counters, currentVarShort, "CAT-MUL", thisdata);
+		        	counters <- testCategoricalMultiple(opt, vl, counters, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 		}
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 8719511..3a0c9df 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -23,7 +23,7 @@
 # in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
-testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdata) {
+testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 	cat("CAT-MULTIPLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
@@ -62,7 +62,7 @@ testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdat
 		varBinaryFactor = factor(varBinary)
 
 		## data for this new binary variable
-		newthisdata = cbind.data.frame(thisdata[,1:numPreceedingCols], varBinaryFactor)
+		newthisdata = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], varBinaryFactor)
 
 		## one of 3 ways to decide which examples are negative
         	idxsToRemove = restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
@@ -84,7 +84,7 @@ testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdat
 
 			counters <- incrementCounter(counters, "catMul.over10")
 		     	# binary - so logistic regression
-			counters <- binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure)
+			counters <- binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure, phenoStartIdx)
 		}
 	}
 	return(counters)
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 1c595f9..cf8b121 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -23,7 +23,7 @@
 # 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
-testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata) {
+testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 	cat("CAT-SINGLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -77,8 +77,8 @@ testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata)
 
 		phenoFactor = factor(pheno)
 		# binary - so logistic regression
-		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-		counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure)	
+		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
+		counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx)	
 	}
 	else {
 		# > 2 categories
@@ -93,8 +93,8 @@ testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata)
 			cat("CAT-SINGLE-UNORDERED || ")
 		  counters <- incrementCounter(counters, "catSin.case2")
 
-			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			counters <- testCategoricalUnordered(opt, vl, counters, varName, varType, thisdatanew);
+			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
+			counters <- testCategoricalUnordered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
 			
 		}
 		else if (ordered == 1) {
@@ -104,8 +104,8 @@ testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata)
 		  counters <- incrementCounter(counters, "catSin.case1")
 
 			## reorder variable values into increasing order
-			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
-			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, order)
+			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
+			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx, order)
 		
 		}
 		else if (ordered == -2) {
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index b4a87f5..d0b972d 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -18,7 +18,7 @@
 
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
-testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata, orderStr="") {
+testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx, orderStr="") {
 
 	
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -58,13 +58,13 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 
 		# ordinal logistic regression
 		sink()
-		sink(modelFitLogFile, append=TRUE)
+		sink(pkg.env$modelFitLogFile, append=TRUE)
 		print("--------------")
 		print(varName)
 		
 		### BEGIN TRYCATCH
 		tryCatch({
-		confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+		confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
 
 
 		if (opt$standardise==TRUE) {
@@ -75,7 +75,7 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 
 		ctable <- coef(summary(fit))
 		sink()
-		sink(resLogFile, append=TRUE)
+		sink(pkg.env$resLogFile, append=TRUE)
 
 		ct = coeftest(fit)
 		pvalue = ct["geno","Pr(>|t|)"]
@@ -103,7 +103,7 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 		### END TRYCATCH
 		}, error = function(e) {
 			sink()
-                        sink(resLogFile, append=TRUE)
+                        sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
                         counters <- incrementCounter(counters, "ordCat.error")
                 })
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index b32d4a4..8807b26 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -19,7 +19,7 @@
 
 # Tests an unordered categorical phenotype with multinomial regression
 # and saves this result in the multinomial logistic results file
-testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisdata) {
+testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	#geno = thisdata[,"geno"]
@@ -35,7 +35,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 		numUnique = length(unique(na.omit(pheno)))
 
 		# num outcome values * (num confounders and trait of interest and bias term)
-		numWeights=(numUnique-1)*((numPreceedingCols-2)+1+1)
+		numWeights=(numUnique-1)*(((phenoStartIdx -1)-2)+1+1)
 		if (numWeights>1000) {
 			cat("Too many weights in model: ", numWeights, " > 1000, (num outcomes values: ", numUnique, ") || SKIP ", sep="")
 		  counters <-incrementCounter(counters, "unordCat.cats")
@@ -56,7 +56,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 		reference = levels(phenoFactor)[1];
 
 		sink()
-		sink(modelFitLogFile, append=TRUE) # hide output of model fitting
+		sink(pkg.env$modelFitLogFile, append=TRUE) # hide output of model fitting
 		print("--------------")
                 print(varName)
 
@@ -68,7 +68,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
                 }
 		#cat("genoMean=", mean(geno), " genoSD=", sd(geno), " || ", sep="")
 		
-		confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+		confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
 
 		###### BEGIN TRYCATCH
 		tryCatch({
@@ -88,7 +88,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 	    	write(paste(paste(varName,"-",reference,sep=""), varType, paste(maxFreq,"/",numNotNA,sep=""), -999, -999, -999, modelP, sep=","), file=paste(opt$resDir,"results-multinomial-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE")
 
 		sink()
-		sink(resLogFile, append=TRUE)	
+		sink(pkg.env$resLogFile, append=TRUE)	
 		
 		sumx <- summary(fit)
 		
@@ -137,7 +137,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 		## END TRYCATCH
 		}, error = function(e) {
                         sink()
-                        sink(resLogFile, append=TRUE)
+                        sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
                         counters <- incrementCounter(counters, "unordCat.error")
 		})
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 0134787..73f6f90 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -18,7 +18,7 @@
 
 
 # Main function called for continuous fields
-testContinuous <- function(opt, vl, counters, varName, varType, thisdata) {
+testContinuous <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 
 	cat("CONTINUOUS MAIN || ");	
 
@@ -29,13 +29,13 @@ testContinuous <- function(opt, vl, counters, varName, varType, thisdata) {
 
 	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
 
-	counters <- testContinuous2(opt, vl, counters, varName, varType, thisdata)
+	counters <- testContinuous2(opt, vl, counters, varName, varType, thisdata, phenoStartIdx)
 	
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
+testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 	cat("CONTINUOUS || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -91,8 +91,8 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
         		else if (numLevels==2) {
 	        		# binary
         		  counters <- incrementCounter(counters, "cont.binary")
-        			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);			
-        			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
+        			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);			
+        			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx);
         		}
         	}
 		else {
@@ -112,8 +112,8 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 				# successful binning. >=10 examples in each of the 3 bins
 
 			  counters <- incrementCounter(counters, "cont.ordcattry.ordcat")
-			        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned);
-				counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew);
+			        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned);
+				counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
 			}
 			else {
 				# try to treat as binary because not enough examples in each bin
@@ -132,8 +132,8 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 					phenoBinned[which(phenoBinned==0)] = 1	
 
 					# test binary
-					thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-	                                counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
+					thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
+	                                counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx);
 				}
 				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
 
@@ -143,8 +143,8 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
                                         phenoBinned[which(phenoBinned==2)] = 1
 					
                                         # test binary
-                                        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
-                                        counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure)
+                                        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
+                                        counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx)
                                 }
 				
 
@@ -187,10 +187,10 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 			else {
                 	        geno = thisdata[,"geno"] 
                 	}
-			confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+			confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
 
 			sink()
-			sink(modelFitLogFile, append=TRUE)
+			sink(pkg.env$modelFitLogFile, append=TRUE)
 			print("--------------")			
 			print(varName)
 
@@ -200,7 +200,7 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 			fit <- lm(phenoIRNT ~ geno + ., data=confounders)
 			
 			sink()
-			sink(resLogFile, append=TRUE)
+			sink(pkg.env$resLogFile, append=TRUE)
 			
 			sumx = summary(fit)
 
@@ -231,7 +231,7 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata) {
 			## END TRYCATCH
                 	}, error = function(e) {
                 	        sink()
-                	        sink(resLogFile, append=TRUE)
+                	        sink(pkg.env$resLogFile, append=TRUE)
                 	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
                 	        counters <-incrementCounter(counters, "continuous.error")
                 	})
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index e003e01..190c732 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -22,7 +22,7 @@
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
-testInteger <- function(opt, vl, counters, varName, varType, thisdata) {
+testInteger <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
 	cat("INTEGER || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -51,8 +51,8 @@ testInteger <- function(opt, vl, counters, varName, varType, thisdata) {
 	# if >=20 separate values then treat as continuous
 	if (length(uniqVar)>=20) {
 		
-		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoAvg);
-		counters <- testContinuous2(opt, vl, counters, varName, varType, thisdatanew)
+		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg);
+		counters <- testContinuous2(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx)
 		counters <- incrementCounter(counters, "int.continuous")
 	}
 	else {
@@ -71,18 +71,18 @@ testInteger <- function(opt, vl, counters, varName, varType, thisdata) {
 		  counters <- incrementCounter(counters, "int.binary")
 
 			# binary
-			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
-			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure);
+			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
+			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure,phenoStartIdx);
 		}
 		else {
 		  counters <- incrementCounter(counters, "int.catord")
 			cat("3-20 values || ")
 
 			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
-			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
+			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
 
 			# treat as ordinal categorical
-			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew);
+			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
 		}
 	}
 	return(counters)
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 57d82ab..593ebd6 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -60,21 +60,18 @@ currentVarShort="";
 first=TRUE;
 
 if (opt$save == TRUE) {
-
-	derivedBinary <- data.frame(userID=data$userID)
-        derivedCont <- data.frame(userID=data$userID)
-        derivedCatOrd <- data.frame(userID=data$userID)
-        derivedCatUnord <- data.frame(userID=data$userID)
-
-	resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
-        sink(resLogFile)
+    pkg.env$derivedBinary <- data.frame(userID=data$userID)
+    pkg.env$derivedCont <- data.frame(userID=data$userID)
+    pkg.env$derivedCatOrd <- data.frame(userID=data$userID)
+    pkg.env$derivedCatUnord <- data.frame(userID=data$userID)
+    pkg.env$resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
+    sink(pkg.env$resLogFile)
 } else {
-	modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
-	sink(modelFitLogFile)
-	sink()
-
-	resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
-	sink(resLogFile)
+    pkg.env$modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
+  	sink(pkg.env$modelFitLogFile)
+  	sink()
+  	pkg.env$resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
+  	sink(pkg.env$resLogFile)
 }
 
 
@@ -84,7 +81,7 @@ for (var in phenoVars) {
 
 	sink()
 #	print(var)
-	sink(resLogFile, append=TRUE)
+	sink(pkg.env$resLogFile, append=TRUE)
 
 	varx = gsub("^x", "", var);
         varx = gsub("_[0-9]+$", "", varx);
@@ -105,7 +102,7 @@ for (var in phenoVars) {
 		if (first==FALSE) {
 
 			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata)
+			counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx)
 		}
 		
 		first=FALSE;
@@ -124,7 +121,7 @@ for (var in phenoVars) {
 if (phenoIdx>0){
 	# last variable so test association
 	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata)
+	counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx)
 }
 
 sink()
@@ -133,10 +130,10 @@ sink()
 saveCounts(opt, counters)
 
 if (opt$save == TRUE) {
-	write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(pkg.env$derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(pkg.env$derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(pkg.env$derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
 }
 
 warnings()

From 8a9a3c60737dc475260e297de54c21f0976d5c6f Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:27:44 -0400
Subject: [PATCH 09/27] Use package env for counters

---
 PHESANT/R/binaryLogisticRegression.r | 17 ++++++-----
 PHESANT/R/counters.r                 | 27 ++++++++---------
 PHESANT/R/testAssociations.r         | 41 +++++++++++++-------------
 PHESANT/R/testCatMultiple.r          | 11 ++++---
 PHESANT/R/testCatSingle.r            | 19 ++++++------
 PHESANT/R/testCategoricalOrdered.r   | 15 +++++-----
 PHESANT/R/testCategoricalUnordered.r | 17 ++++++-----
 PHESANT/R/testContinuous.r           | 43 ++++++++++++++--------------
 PHESANT/R/testInteger.r              | 17 ++++++-----
 PHESANT/phenomeScan.r                |  8 +++---
 10 files changed, 102 insertions(+), 113 deletions(-)

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index 48ccf85..666b8b5 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -21,7 +21,7 @@
 #
 # Performs binary logistic regression on the phenotype stored in thisdata 
 # and stores result in 'results-logistic-binary' results file.
-binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata, isExposure, phenoStartIdx) {
+binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure, phenoStartIdx) {
 
         phenoFactor = factor(thisdata[,phenoStartIdx])
 
@@ -31,7 +31,7 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
 	if (length(facLevels)!=2) {
 		#stop(paste("Not 2 levels: ", length(facLevels), " || ", sep=""))
 		cat("BINARY-NOT2LEVELS- (", length(facLevels), ") || ",sep="");
-                counters <- incrementCounter(counters, "binary.nottwolevels")
+                incrementCounter("binary.nottwolevels")
 	}
 
 	idxTrue = length(which(phenoFactor==facLevels[1]))
@@ -40,11 +40,11 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
   
         if (idxTrue<10 || idxFalse<10) {
 		cat("BINARY-LOGISTIC-SKIP-10 (", idxTrue, "/", idxFalse, ") || ", sep="")
-          counters <- incrementCounter(counters, "binary.10")
+          incrementCounter("binary.10")
 	}
 	else if (numNotNA<500) {	
 		cat("BINARY-LOGISTIC-SKIP-500 (", numNotNA, ") || ",sep="");
-	  counters <- incrementCounter(counters, "binary.500")
+	  incrementCounter("binary.500")
        	}
 	else {
 
@@ -54,7 +54,7 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
 			cat("SUCCESS results-logistic-binary ");			
-			counters <- incrementCounter(counters, "success.binary")
+			incrementCounter("success.binary")
 		}
 		else {
 
@@ -102,10 +102,10 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
                 write(paste(varName,varType,paste(idxTrue,"/",idxFalse,"(",numNotNA,")",sep=""), beta,lower,upper,pvalue, sep=","), file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
                 cat("SUCCESS results-logistic-binary ");
                 
-                counters <- incrementCounter(counters, "success.binary")
+                incrementCounter("success.binary")
 
 		if (isExposure==TRUE) {
-		  counters <-incrementCounter(counters,"success.exposure.binary")
+		  incrementCounter("success.exposure.binary")
 	        }	
 
 		## END TRYCATCH
@@ -113,10 +113,9 @@ binaryLogisticRegression <- function(opt, varName, counters, varType, thisdata,
                         sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        counters <- incrementCounter(counters, "binary.error")
+                        incrementCounter("binary.error")
                 })
 		}	
 	}
-	return(counters)
 }
 
diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
index ef706b9..31a586b 100644
--- a/PHESANT/R/counters.r
+++ b/PHESANT/R/counters.r
@@ -17,42 +17,39 @@
 # DEALINGS IN THE SOFTWARE.
 
 # adds given value to counter, that are used to count how many variables go down each route in the data flow
-addToCounts <- function(counters, countName, num) {
-    idx = which(counters$name==countName)
+addToCounts <- function(countName, num) {
+    idx = which(pkg.env$counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
-        counters <- rbind(counters, data.frame(name=countName, countValue=num))
+        pkg.env$counters <- rbind(pkg.env$counters, data.frame(name=countName, countValue=num))
     } else {
         # add to counter that already exists
-        counters$countValue[idx] <- counters$countValue[idx]+num
+        pkg.env$counters$countValue[idx] <- pkg.env$counters$countValue[idx]+num
     }
-    return(counters)
 }
 
 # increments counters used to count how many variables go down each route in the data flow
-incrementCounter <- function(counters, countName) {
-    idx = which(counters$name==countName)
+incrementCounter <- function(countName) {
+    idx = which(pkg.env$counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
-        counters <- rbind(counters, data.frame(name=countName, countValue=1))
+        pkg.env$counters <- rbind(pkg.env$counters, data.frame(name=countName, countValue=1))
     } else {
         # increment counter that already exists
-        counters$countValue[idx] <- counters$countValue[idx]+1
+        pkg.env$counters$countValue[idx] <- pkg.env$counters$countValue[idx]+1
     }
-    return(counters)
 }
 
 # Saves the counters stored in count variables, to a file in results directory
-saveCounts <- function(opt, counters) {
+saveCounts <- function(opt) {
     countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
     # sort on counter name
-    sortIdx = order(as.character(counters[,"name"]))
-    counters <<- counters[sortIdx,]
+    sortIdx = order(as.character(pkg.env$counters[,"name"]))
+    counters <- pkg.env$counters[sortIdx,]
     write.table(counters, file=countFile, sep=",", quote=FALSE, row.names=FALSE)
 }
 
 # init the counters used to determine how many variables took each path in the variable processing flow.
 initCounters <- function() {
-    counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
-    return(counters)
+  pkg.env$counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
 }
\ No newline at end of file
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 04dda72..7386217 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -18,7 +18,7 @@
 
 
 # Tests the association of a field, determined by its field type
-testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx) {
+testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx) {
 
 	## call file for variable type
 
@@ -30,7 +30,7 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 		# check if variable info is found for this field
 		if (length(idx)==0) {
 			cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
-		  counters <- incrementCounter(counters, "notinphenofile")
+		  incrementCounter("notinphenofile")
 		}
 		else {
 
@@ -47,15 +47,15 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 			
 			if (excluded!="") {
 				cat(paste("Excluded integer: ", excluded, " || ", sep=""))
-			  counters <- incrementCounter(counters, "excluded.int")
+			  incrementCounter("excluded.int")
 			}
 			else {
-			  counters <- incrementCounter(counters, "start.int")
+			  incrementCounter("start.int")
 				if (isExposure==TRUE) {
-				  counters <- incrementCounter(counters, "start.exposure.int")
+				  incrementCounter("start.exposure.int")
 				}
 
-			    	counters <- testInteger(opt, vl, counters, currentVarShort, "INTEGER", thisdata, phenoStartIdx);
+			    	testInteger(opt, vl, currentVarShort, "INTEGER", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 	    	}
@@ -66,14 +66,14 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 
 		    	if (excluded!="") {
 				cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
-		    	  counters <- incrementCounter(counters, "excluded.cont")
+		    	  incrementCounter("excluded.cont")
 		    	}
 			else {
-			  counters <- incrementCounter(counters, "start.cont")
+			  incrementCounter("start.cont")
 				if (isExposure==TRUE) {
-				  counters<- incrementCounter(counters, "start.exposure.cont")
+				  incrementCounter("start.exposure.cont")
                                 }
-				counters <- testContinuous(opt, vl, counters, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx);
+				testContinuous(opt, vl, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx);
 	        	}
 	        	cat("\n");
 		}
@@ -84,14 +84,14 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 	
 	    		if (excluded!="") {
 				cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
-	    		  counters <- incrementCounter(counters, "excluded.catSin")
+	    		  incrementCounter("excluded.catSin")
 			}
 			else {
-			  counters <- incrementCounter(counters, "start.catSin")
+			  incrementCounter("start.catSin")
 				if (isExposure==TRUE) {
-				  counters <-incrementCounter(counters, "start.exposure.catSin")
+				  incrementCounter("start.exposure.catSin")
                                 }
-			  counters <- testCategoricalSingle(opt, vl, counters, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx);
+			    testCategoricalSingle(opt, vl, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 	  	}
@@ -102,27 +102,27 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 
 			if (excluded!="") {
 				cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
-			  counters <- incrementCounter(counters, "excluded.catMul")
+			  incrementCounter("excluded.catMul")
 			}
 			else {
 
 				if (catSinToMult!="") {
 					cat("cat-single to cat-multiple || ", sep="")
-				  counters <- incrementCounter(counters, "catSinToCatMul")
+				  incrementCounter("catSinToCatMul")
 				}
 
-			  counters <- incrementCounter(counters, "start.catMul")	
+			  incrementCounter("start.catMul")	
 				if (isExposure==TRUE) {
-				  counters <- incrementCounter(counters, "start.exposure.catMul")
+				  incrementCounter("start.exposure.catMul")
                                 }
 				else {
 					# get number of cat mult values denoting trait of interest
 	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
 					if (numVals>0) {
-		                                counters <- addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
+		                                addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
 					}
 				}
-		        	counters <- testCategoricalMultiple(opt, vl, counters, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
+		        	testCategoricalMultiple(opt, vl, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
 			}
 			cat("\n");
 		}
@@ -134,7 +134,6 @@ testAssociations <- function(opt, vl, counters, currentVar, currentVarShort, thi
 	}, error = function(e) {
 		print(paste("ERROR:", currentVar,e))
 	})
-  return(counters)
 }
 
 
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 3a0c9df..b396c4c 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -23,7 +23,7 @@
 # in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
-testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	cat("CAT-MULTIPLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
@@ -54,7 +54,7 @@ testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdat
 		idxsTrue = idxForVar[,"row"]
 
 		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
-		counters <- incrementCounter(counters, "catMul.binary")
+		incrementCounter("catMul.binary")
 		
 		# make zero vector and set 1s for those with this variable value
 		varBinary = rep.int(0,numRows);
@@ -77,17 +77,16 @@ testCategoricalMultiple <- function(opt, vl, counters, varName, varType, thisdat
                 
 	        if (idxTrue<10 || idxFalse<10) {
 	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
-			counters <- incrementCounter(counters, "catMul.10")
+			incrementCounter("catMul.10")
 	        }
 		else {
 			isExposure = getIsCatMultExposure(vl, varName, variableVal)
 
-			counters <- incrementCounter(counters, "catMul.over10")
+			incrementCounter("catMul.over10")
 		     	# binary - so logistic regression
-			counters <- binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), counters, varType, newthisdata, isExposure, phenoStartIdx)
+			binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
 		}
 	}
-	return(counters)
 }
 
 # restricts sample based on value in CAT_MULT_INDICATOR_FIELDS column of variable info file,
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index cf8b121..fa69be2 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -23,7 +23,7 @@
 # 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
-testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	cat("CAT-SINGLE || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -68,17 +68,17 @@ testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata,
 
 	if (length(uniqVar)<=1) {
 		cat("SKIP (only one value) || ");
-		counters <- incrementCounter(counters, "catSin.onevalue")
+		incrementCounter("catSin.onevalue")
 	}
 	else if (length(uniqVar)==2) {		
 		cat("CAT-SINGLE-BINARY || ");
-	  counters <- incrementCounter(counters, "catSin.case3")
+	  incrementCounter("catSin.case3")
 		# binary so logistic regression
 
 		phenoFactor = factor(pheno)
 		# binary - so logistic regression
 		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-		counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx)	
+		binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)	
 	}
 	else {
 		# > 2 categories
@@ -91,33 +91,32 @@ testCategoricalSingle <- function(opt, vl, counters, varName, varType, thisdata,
 		if (ordered == 0) {
 			
 			cat("CAT-SINGLE-UNORDERED || ")
-		  counters <- incrementCounter(counters, "catSin.case2")
+		  incrementCounter("catSin.case2")
 
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-			counters <- testCategoricalUnordered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
+			testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
 			
 		}
 		else if (ordered == 1) {
 		
 			## ordered
 			cat("ordered || ");
-		  counters <- incrementCounter(counters, "catSin.case1")
+		  incrementCounter("catSin.case1")
 
 			## reorder variable values into increasing order
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx, order)
+			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx, order)
 		
 		}
 		else if (ordered == -2) {
 			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
-		  counters <- incrementCounter(counters, "catSin.binaryorexcluded")
+		  incrementCounter( "catSin.binaryorexcluded")
 		}
 		else {
 			print(paste("ERROR", varName, varType, dataCode));
 		}
 		}
 	}
-  return(counters)
 }
 
 ## values are reordered and assigned values 1:N for N categories
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index d0b972d..1f208db 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -18,14 +18,14 @@
 
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
-testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx, orderStr="") {
+testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoStartIdx, orderStr="") {
 
 	
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	geno = thisdata[,"geno"]
 
 	cat("CAT-ORD || ");
-	counters <- incrementCounter(counters, "ordCat")
+	incrementCounter("ordCat")
 
 	doCatOrdAssertions(pheno)
 
@@ -39,7 +39,7 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="");
-	  counters <- incrementCounter(counters, "ordCat.500")
+	  incrementCounter("ordCat.500")
 	}
 	else {
 		# test this cat ordered variable with ordered logistic regression	
@@ -52,7 +52,7 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
 			cat("SUCCESS results-ordered-logistic");
-			counters <- incrementCounter(counters, "success.ordCat")
+			incrementCounter("success.ordCat")
                 }
                 else {
 
@@ -93,11 +93,11 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 
 		write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
 		cat("SUCCESS results-ordered-logistic");
-		counters <- incrementCounter(counters, "success.ordCat")
+		incrementCounter("success.ordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                  counters <- incrementCounter(counters, "success.exposure.ordCat")
+                  incrementCounter("success.exposure.ordCat")
                 }
 
 		### END TRYCATCH
@@ -105,12 +105,11 @@ testCategoricalOrdered <- function(opt, vl, counters, varName, varType, thisdata
 			sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        counters <- incrementCounter(counters, "ordCat.error")
+                        incrementCounter("ordCat.error")
                 })
 		}
 
 	}
-	return(counters)
 }
 
 # check that the phenotype is valid - that there are more than two categories
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index 8807b26..dce7cb0 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -19,7 +19,7 @@
 
 # Tests an unordered categorical phenotype with multinomial regression
 # and saves this result in the multinomial logistic results file
-testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	#geno = thisdata[,"geno"]
@@ -27,7 +27,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATUNORD-SKIP-500 (", numNotNA, ") || ",sep="");
-	  counters <- incrementCounter(counters, "unordCat.500")
+	  incrementCounter("unordCat.500")
 	}
 	else {
 
@@ -38,8 +38,8 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 		numWeights=(numUnique-1)*(((phenoStartIdx -1)-2)+1+1)
 		if (numWeights>1000) {
 			cat("Too many weights in model: ", numWeights, " > 1000, (num outcomes values: ", numUnique, ") || SKIP ", sep="")
-		  counters <-incrementCounter(counters, "unordCat.cats")
-			return(counters)
+		  incrementCounter("unordCat.cats")
+			return(NULL)
 		}
 
 		phenoFactor = chooseReferenceCategory(pheno);
@@ -48,7 +48,7 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 			# add pheno to dataframe
 			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
 			cat("SUCCESS results-notordered-logistic ");
-			counters <- incrementCounter(counters, "success.unordCat")
+			incrementCounter("success.unordCat")
 		}
                 else {
 		
@@ -127,11 +127,11 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
 		}
 
 		cat("SUCCESS results-notordered-logistic ");
-		counters <- incrementCounter(counters, "success.unordCat")
+		incrementCounter("success.unordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                  counters <- incrementCounter(counters, "success.exposure.unordCat")
+                  incrementCounter("success.exposure.unordCat")
                 }
 		
 		## END TRYCATCH
@@ -139,11 +139,10 @@ testCategoricalUnordered <- function(opt, vl, counters, varName, varType, thisda
                         sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        counters <- incrementCounter(counters, "unordCat.error")
+                        incrementCounter("unordCat.error")
 		})
 		}
 	}
-	return(counters)
 }
 
 # find reference category - category with most number of examples
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 73f6f90..0f0b7aa 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -18,7 +18,7 @@
 
 
 # Main function called for continuous fields
-testContinuous <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testContinuous <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 
 	cat("CONTINUOUS MAIN || ");	
 
@@ -29,13 +29,13 @@ testContinuous <- function(opt, vl, counters, varName, varType, thisdata, phenoS
 
 	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
 
-	counters <- testContinuous2(opt, vl, counters, varName, varType, thisdata, phenoStartIdx)
+	testContinuous2(opt, vl, varName, varType, thisdata, phenoStartIdx)
 	
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	cat("CONTINUOUS || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -86,19 +86,19 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 
         		if (numLevels<=1) {
        				cat("SKIP (number of levels: ",numLevels,")",sep="")
-        		  counters <- incrementCounter(counters,"cont.onevalue")
+        		  incrementCounter("cont.onevalue")
         		}
         		else if (numLevels==2) {
 	        		# binary
-        		  counters <- incrementCounter(counters, "cont.binary")
+        		  incrementCounter("cont.binary")
         			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);			
-        			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx);
+        			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx);
         		}
         	}
 		else {
 			## try to treat as ordered categorical
 
-		  counters <- incrementCounter(counters, "cont.ordcattry")
+		  incrementCounter("cont.ordcattry")
 			## equal sized bins
 			phenoBinned = equalSizedBins(phenoAvg);
 
@@ -111,9 +111,9 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 
 				# successful binning. >=10 examples in each of the 3 bins
 
-			  counters <- incrementCounter(counters, "cont.ordcattry.ordcat")
+			  incrementCounter("cont.ordcattry.ordcat")
 			        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned);
-				counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
+				testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
 			}
 			else {
 				# try to treat as binary because not enough examples in each bin
@@ -122,36 +122,36 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 					## skip - not possible to create binary variable because first and third bins are too small
 					## ie. could merge bin1 with bin 2 but then bin3 still too small etc
 					cat("SKIP 2 bins are too small || ")
-				  counters <-incrementCounter(counters, "cont.ordcattry.smallbins")
+				  incrementCounter("cont.ordcattry.smallbins")
 				} 
 				else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
 
 					# combine first and second bin to create binary variable
-				  counters <- incrementCounter(counters, "cont.ordcattry.binsbinary")
+				  incrementCounter("cont.ordcattry.binsbinary")
 					cat("Combine first two bins and treat as binary || ")
 					phenoBinned[which(phenoBinned==0)] = 1	
 
 					# test binary
 					thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
-	                                counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx);
+	                                binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx);
 				}
 				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
 
 					# combine second and last bin to create binary variable
-				  counters <- incrementCounter(counters, "cont.ordcattry.binsbinary")
+				  incrementCounter("cont.ordcattry.binsbinary")
 					cat("Combine last two bins and treat as binary || ")
                                         phenoBinned[which(phenoBinned==2)] = 1
 					
                                         # test binary
                                         thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
-                                        counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure, phenoStartIdx)
+                                        binaryLogisticRegression(opt, varName,varType, thisdatanew, isExposure, phenoStartIdx)
                                 }
 				
 
 				else {
 					## skip - not possible to create binary variable because combining bins would still be too small
 					cat("SKIP 2 bins are too small(2) || ")
-				  counters <-incrementCounter(counters, "cont.ordcattry.smallbins2")
+				  incrementCounter("cont.ordcattry.smallbins2")
 				}
 
 			}
@@ -160,13 +160,13 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 	}
 	else {		
 		cat("IRNT || ");
-	  counters <- incrementCounter(counters, "cont.main")
+	  incrementCounter("cont.main")
 
 		# check there are at least 500 examples
 		numNotNA = length(which(!is.na(phenoAvg)))
 		if (numNotNA<500) {
 			cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="");
-		  counters <- incrementCounter(counters, "cont.main.500")
+		  incrementCounter("cont.main.500")
 		}
 		else {
 			## inverse rank normal transformation
@@ -176,7 +176,7 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 				# add pheno to dataframe
 				storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
 				cat("SUCCESS results-linear");
-				counters <- incrementCounter(counters, "success.continuous")
+				incrementCounter("success.continuous")
                         }
                         else {
 		
@@ -223,9 +223,9 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
 			write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE");
 			cat("SUCCESS results-linear");
 
-			counters <- incrementCounter(counters, "success.continuous")
+			incrementCounter("success.continuous")
                 	if (isExposure == TRUE) {
-                	  counters <- incrementCounter(counters, "success.exposure.continuous")
+                	  incrementCounter("success.exposure.continuous")
                 	}
 
 			## END TRYCATCH
@@ -233,12 +233,11 @@ testContinuous2 <- function(opt, vl, counters, varName, varType, thisdata, pheno
                 	        sink()
                 	        sink(pkg.env$resLogFile, append=TRUE)
                 	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                	        counters <-incrementCounter(counters, "continuous.error")
+                	        incrementCounter("continuous.error")
                 	})
 			}			
 		}
 	}
-	return(counters)
 }
 
 irnt <- function(pheno) {
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 190c732..5ffac76 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -22,7 +22,7 @@
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
-testInteger <- function(opt, vl, counters, varName, varType, thisdata, phenoStartIdx) {
+testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	cat("INTEGER || ");
 
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
@@ -52,8 +52,8 @@ testInteger <- function(opt, vl, counters, varName, varType, thisdata, phenoStar
 	if (length(uniqVar)>=20) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg);
-		counters <- testContinuous2(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx)
-		counters <- incrementCounter(counters, "int.continuous")
+		testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+		incrementCounter( "int.continuous")
 	}
 	else {
 		
@@ -65,25 +65,24 @@ testInteger <- function(opt, vl, counters, varName, varType, thisdata, phenoStar
 		numLevels = length(levels(phenoFactor))
 		if (numLevels<=1) {
 			cat("SKIP (number of levels: ",numLevels,")",sep="");
-		  counters <- incrementCounter(counters, "int.onevalue")
+		  incrementCounter("int.onevalue")
 		}
 		else if (numLevels==2) {
-		  counters <- incrementCounter(counters, "int.binary")
+		  incrementCounter("int.binary")
 
 			# binary
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-			counters <- binaryLogisticRegression(opt, varName, counters, varType, thisdatanew, isExposure,phenoStartIdx);
+			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure,phenoStartIdx);
 		}
 		else {
-		  counters <- incrementCounter(counters, "int.catord")
+		  incrementCounter("int.catord")
 			cat("3-20 values || ")
 
 			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
 
 			# treat as ordinal categorical
-			counters <- testCategoricalOrdered(opt, vl, counters, varName, varType, thisdatanew, phenoStartIdx);
+			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
 		}
 	}
-	return(counters)
 }
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 593ebd6..4e492f2 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -34,7 +34,7 @@ if (length(args) == 0) {
 }
 
 ## load the files we write to and use
-counters <- initCounters()
+initCounters()
 if (opt$save==FALSE) {
 	initResultsFiles(opt)
 }
@@ -102,7 +102,7 @@ for (var in phenoVars) {
 		if (first==FALSE) {
 
 			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx)
+			testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
 		}
 		
 		first=FALSE;
@@ -121,13 +121,13 @@ for (var in phenoVars) {
 if (phenoIdx>0){
 	# last variable so test association
 	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	counters <- testAssociations(opt, vl, counters, currentVar, currentVarShort, thisdata, phenoStartIdx)
+	testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
 }
 
 sink()
 
 # save counters of each path in variable flow
-saveCounts(opt, counters)
+saveCounts(opt)
 
 if (opt$save == TRUE) {
 	write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);

From 2d3a28fe7db647ade20aae91ae3707983fd02027 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:46:16 -0400
Subject: [PATCH 10/27] Fix all global variables

---
 PHESANT/DESCRIPTION            | 2 +-
 PHESANT/NAMESPACE              | 1 +
 PHESANT/R/processArgs.r        | 8 ++++----
 PHESANT/R/testAssociations.r   | 2 +-
 PHESANT/R/testCatMultiple.r    | 2 +-
 PHESANT/R/testCatSingle.r      | 8 ++++----
 PHESANT/man/PHESANT-package.Rd | 2 +-
 PHESANT/phenomeScan.r          | 4 ++--
 8 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/PHESANT/DESCRIPTION b/PHESANT/DESCRIPTION
index 5677e5f..0c25a93 100644
--- a/PHESANT/DESCRIPTION
+++ b/PHESANT/DESCRIPTION
@@ -7,4 +7,4 @@ Author: Quanli Wang
 Maintainer: Quanli Wang <quanliwang20@hotmail.com>
 Description: This will replement the original PHESANT in a package and potentially with some optimizations.
 License: GPL (>= 3)
-Depends: MASS, lmtest, data.table, nnet
+Depends: MASS, lmtest, data.table, nnet, optparse
diff --git a/PHESANT/NAMESPACE b/PHESANT/NAMESPACE
index 7331d97..289fcf4 100644
--- a/PHESANT/NAMESPACE
+++ b/PHESANT/NAMESPACE
@@ -3,6 +3,7 @@ import(MASS)
 import(lmtest)
 import(data.table)
 import(nnet)
+import(optparse)
 importFrom("stats", "coef", "complete.cases", "confint", "end", "glm",
              "lm", "model.matrix", "na.omit", "pnorm", "qnorm",
              "quantile", "relevel")
diff --git a/PHESANT/R/processArgs.r b/PHESANT/R/processArgs.r
index 0413c78..bca4872 100644
--- a/PHESANT/R/processArgs.r
+++ b/PHESANT/R/processArgs.r
@@ -19,7 +19,7 @@
 
 # Parse the arguments input by the user
 # if argument 'test' is used then run test phenome scan
-processArgs <- function() {
+processArgs <- function(opt, opt_parser) {
     if (opt$test==TRUE) {
       	# set up the test phenome scan settings
       	datadir='../testWAS/data/';
@@ -36,7 +36,7 @@ processArgs <- function() {
       		opt$sensitivity <- FALSE;
       		opt$genetic <- TRUE;
       	}
-    	  opt <- processParts(opt, opt$partIdx, opt$numParts)
+    	  opt <- processParts(opt, opt_parser,opt$partIdx, opt$numParts)
     }
     else {
     	## check arguments are supplied correctly
@@ -81,7 +81,7 @@ processArgs <- function() {
     	else if (!file.exists(opt$resDir)) {
     		  stop(paste("results directory resDir=", opt$resDir, " does not exist", sep=""), call.=FALSE)
     	}
-    	opt <-processParts(opt, opt$partIdx, opt$numParts);
+    	opt <-processParts(opt, opt_parser,opt$partIdx, opt$numParts);
     }
     if (opt$save==TRUE) {
     	  print("Saving phenotypes to file. Tests of association will not run!")
@@ -90,7 +90,7 @@ processArgs <- function() {
 }
 
 # Parse the 'part' arguments and check they are valid
-processParts <- function(opt, pIdx, nParts) {
+processParts <- function(opt, opt_parser, pIdx, nParts) {
 
 	if (is.null(pIdx) && is.null(nParts)) {
                 opt$varTypeArg <- "all";
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 7386217..8309b13 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -119,7 +119,7 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 					# get number of cat mult values denoting trait of interest
 	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
 					if (numVals>0) {
-		                                addToCounts(addToCounts, "start.exposure.catMulvalues", numVals)
+		                                addToCounts("start.exposure.catMulvalues", numVals)
 					}
 				}
 		        	testCategoricalMultiple(opt, vl, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index b396c4c..8184ee3 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -122,7 +122,7 @@ restrictSample2 <- function(vl, varName,pheno, varIndicator,variableVal, userID)
 		# remove people who have no value for indicator variable
 		indName = paste("x",varIndicator,"_0_0",sep="");
 		cat("Indicator name ", indName, " || ", sep="");
-		indvarx = merge(userID, indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)		
+		indvarx = merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)		
 		indicatorVar = indvarx[,indName]
 
 		# remove participants with NA value in this related field
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index fa69be2..2f8724c 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -55,7 +55,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
 	## this is used where there is no zero option e.g. field 100200
 	defaultValue = dataDataCode$default_value
 	defaultRelatedID = dataDataCode$default_related_field
-	pheno = setDefaultValue(pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
+	pheno = setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
 
         ## all categories coded as <0 we assume are `missing' values
         pheno = replaceMissingCodes(pheno)
@@ -155,7 +155,7 @@ reorderOrderedCategory <- function(pheno,order) {
 ## sets default value for people with no value in pheno, but with a value in the
 ## field specified in the default_value_related_field column in the data coding info file.
 ## the default value is specified in the default_value column in the data coding info file.
-setDefaultValue <- function(pheno, defaultValue, defaultRelatedID, userID) {
+setDefaultValue <- function(vl, pheno, defaultValue, defaultRelatedID, userID) {
 
 
 	if (!is.na(defaultValue) && nchar(defaultValue)>0) {
@@ -164,8 +164,8 @@ setDefaultValue <- function(pheno, defaultValue, defaultRelatedID, userID) {
 	       	indName = paste("x",defaultRelatedID,"_0_0",sep="");
 
 	     	cat("Default related field: ", indName, " || ", sep="");
-		indicatorVar = indicatorFields[,indName]
-		indvarx = merge(userID, indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+		indicatorVar = vl$indicatorFields[,indName]
+		indvarx = merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
                 indicatorVar = indvarx[,indName]
 
 	    	# remove participants with NA value in this related field
diff --git a/PHESANT/man/PHESANT-package.Rd b/PHESANT/man/PHESANT-package.Rd
index 3625692..fd5d054 100644
--- a/PHESANT/man/PHESANT-package.Rd
+++ b/PHESANT/man/PHESANT-package.Rd
@@ -28,5 +28,5 @@ Literature or other references for background information
 To be done
 }
 \examples{
-
+#example here 
 }
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 4e492f2..3aa1ff5 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -30,7 +30,7 @@ if (length(args) == 0) {
 } else {
     opt_parser <- OptionParser(option_list=option_list)
     opt <- parse_args(opt_parser)
-    opt <- processArgs(opt)
+    opt <- processArgs(opt, opt_parser)
 }
 
 ## load the files we write to and use
@@ -44,7 +44,7 @@ vl <- initVariableLists(opt)
 d <- loadData(opt, vl)
 data=d$datax
 confounders=d$confounders
-indicatorFields=d$inds
+vl$indicatorFields=d$inds
 
 numPreceedingCols = ncol(confounders)-1+2; # confounders,minus id column, plus trait of interest and user ID
 phenoStartIdx = numPreceedingCols+1;

From b13aa1dd77037d82589f36e6518147d0829f4113 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:52:56 -0400
Subject: [PATCH 11/27] remove unwanted file

---
 PHESANT/.Rhistory | 215 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 215 insertions(+)

diff --git a/PHESANT/.Rhistory b/PHESANT/.Rhistory
index e69de29..0e542d1 100644
--- a/PHESANT/.Rhistory
+++ b/PHESANT/.Rhistory
@@ -0,0 +1,215 @@
+library(PHESANT)
+rm(list = ls())
+rm(list = ls())
+library(PHESANT)
+load(file = "opt.RData")
+getwd()
+load(file = "../opt.RData")
+save(opt, file = "opt_only.RData")
+rm(list = ls())
+library(PHESANT)
+load(file = "opt_only.RData")
+counters=initCounters();
+if (opt$save==FALSE) {
+initResultsFiles();
+}
+vl=initVariableLists();
+## load data
+d <- loadData()
+data=d$datax
+confounders=d$confounders
+indicatorFields=d$inds
+numPreceedingCols = ncol(confounders)-1+2; # confounders,minus id column, plus trait of interest and user ID
+phenoStartIdx = numPreceedingCols+1;
+print("LOADING DONE")
+phenoVars=colnames(data);
+# remove user id and age and sex columns
+phenoVars = phenoVars[-c(1,2)]; # first and second columns are the id and snpScore, respectively, as determined in loadData.r
+currentVar="";
+currentVarShort="";
+first=TRUE;
+if (opt$save == TRUE) {
+derivedBinary <- data.frame(userID=data$userID)
+derivedCont <- data.frame(userID=data$userID)
+derivedCatOrd <- data.frame(userID=data$userID)
+derivedCatUnord <- data.frame(userID=data$userID)
+resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
+sink(resLogFile)
+} else {
+modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
+sink(modelFitLogFile)
+sink()
+resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
+sink(resLogFile)
+}
+phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
+for (var in phenoVars) {
+sink()
+#	print(var)
+sink(resLogFile, append=TRUE)
+varx = gsub("^x", "", var);
+varx = gsub("_[0-9]+$", "", varx);
+varxShort = gsub("^x", "", var);
+varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
+## test this variable
+if (currentVar == varx) {
+thisCol = data[,eval(var)]
+thisCol = replaceNaN(thisCol)
+currentVarValues = cbind.data.frame(currentVarValues, thisCol);
+}
+else if (currentVarShort == varxShort) {
+## different time point of this var so skip
+}
+else {
+## new variable so run test for previous (we have collected all the columns now)
+if (first==FALSE) {
+thisdata = makeTestDataFrame(data, confounders, currentVarValues)
+testAssociations(currentVar, currentVarShort, thisdata)
+}
+first=FALSE;
+## new variable so set values
+currentVar = varx;
+currentVarShort = varxShort;
+currentVarValues = data[,eval(var)]
+currentVarValues = replaceNaN(currentVarValues)
+}
+phenoIdx = phenoIdx + 1;
+}
+if (phenoIdx>0){
+# last variable so test association
+thisdata = makeTestDataFrame(data, confounders, currentVarValues)
+testAssociations(currentVar, currentVarShort, thisdata)
+}
+sink()
+# save counters of each path in variable flow
+saveCounts()
+if (opt$save == TRUE) {
+write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+write.table(derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+write.table(derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+write.table(derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+}
+warnings()
+library(PHESANT)
+rm(list = ls())
+library(PHESANT)
+library("optparse")
+option_list
+library(PHESANT)
+rm(list = ls())
+args <- commandArgs(T)
+args
+commandArgs
+? commandArgs
+len(args)
+lenth(args)
+size(args)
+class(args)
+args[1]
+dim(args)
+length(args)
+rm(list = ls())
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+args <- commandArgs(T)
+length(args) == 0
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/R/testAssociations.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+rm(list = ls())
+## load data
+d <- loadData(vl)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/R/loadData.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+opt$save
+View(data)
+library(PHESANT)
+library(PHESANT)
+pkg.env
+pkg.env$a
+pkg.env$derivedBinary <- data.frame(userID=data$userID)
+write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/R/testContinuous.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+pkg.env$modelFitLogFile
+source('~/Documents/GitHub/PHESANT/PHESANT/R/testCategoricalOrdered.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+pkg.env
+names(pkg.env)
+pkg.env$counters
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/R/storeNewVar.r')
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+? print_help
+vl
+vl$indicatorFields=d$inds
+vl
+names(vl)
+library(PHESANT)
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
+source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')

From f947f6f882d2ac57fdb104b6f6bd99dc44852de1 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:55:28 -0400
Subject: [PATCH 12/27] remove histtory file

---
 .gitignore        |   2 -
 PHESANT/.Rhistory | 215 ----------------------------------------------
 2 files changed, 217 deletions(-)
 delete mode 100644 PHESANT/.Rhistory

diff --git a/.gitignore b/.gitignore
index 5fc7a4a..85c0afc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,3 @@
 
 .DS_Store
 .Rproj.user
-PHESANT/.Rhistory
-PHESANT/.Rhistory
diff --git a/PHESANT/.Rhistory b/PHESANT/.Rhistory
deleted file mode 100644
index 0e542d1..0000000
--- a/PHESANT/.Rhistory
+++ /dev/null
@@ -1,215 +0,0 @@
-library(PHESANT)
-rm(list = ls())
-rm(list = ls())
-library(PHESANT)
-load(file = "opt.RData")
-getwd()
-load(file = "../opt.RData")
-save(opt, file = "opt_only.RData")
-rm(list = ls())
-library(PHESANT)
-load(file = "opt_only.RData")
-counters=initCounters();
-if (opt$save==FALSE) {
-initResultsFiles();
-}
-vl=initVariableLists();
-## load data
-d <- loadData()
-data=d$datax
-confounders=d$confounders
-indicatorFields=d$inds
-numPreceedingCols = ncol(confounders)-1+2; # confounders,minus id column, plus trait of interest and user ID
-phenoStartIdx = numPreceedingCols+1;
-print("LOADING DONE")
-phenoVars=colnames(data);
-# remove user id and age and sex columns
-phenoVars = phenoVars[-c(1,2)]; # first and second columns are the id and snpScore, respectively, as determined in loadData.r
-currentVar="";
-currentVarShort="";
-first=TRUE;
-if (opt$save == TRUE) {
-derivedBinary <- data.frame(userID=data$userID)
-derivedCont <- data.frame(userID=data$userID)
-derivedCatOrd <- data.frame(userID=data$userID)
-derivedCatUnord <- data.frame(userID=data$userID)
-resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
-sink(resLogFile)
-} else {
-modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
-sink(modelFitLogFile)
-sink()
-resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
-sink(resLogFile)
-}
-phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
-for (var in phenoVars) {
-sink()
-#	print(var)
-sink(resLogFile, append=TRUE)
-varx = gsub("^x", "", var);
-varx = gsub("_[0-9]+$", "", varx);
-varxShort = gsub("^x", "", var);
-varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
-## test this variable
-if (currentVar == varx) {
-thisCol = data[,eval(var)]
-thisCol = replaceNaN(thisCol)
-currentVarValues = cbind.data.frame(currentVarValues, thisCol);
-}
-else if (currentVarShort == varxShort) {
-## different time point of this var so skip
-}
-else {
-## new variable so run test for previous (we have collected all the columns now)
-if (first==FALSE) {
-thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-testAssociations(currentVar, currentVarShort, thisdata)
-}
-first=FALSE;
-## new variable so set values
-currentVar = varx;
-currentVarShort = varxShort;
-currentVarValues = data[,eval(var)]
-currentVarValues = replaceNaN(currentVarValues)
-}
-phenoIdx = phenoIdx + 1;
-}
-if (phenoIdx>0){
-# last variable so test association
-thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-testAssociations(currentVar, currentVarShort, thisdata)
-}
-sink()
-# save counters of each path in variable flow
-saveCounts()
-if (opt$save == TRUE) {
-write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-write.table(derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-write.table(derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-write.table(derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-}
-warnings()
-library(PHESANT)
-rm(list = ls())
-library(PHESANT)
-library("optparse")
-option_list
-library(PHESANT)
-rm(list = ls())
-args <- commandArgs(T)
-args
-commandArgs
-? commandArgs
-len(args)
-lenth(args)
-size(args)
-class(args)
-args[1]
-dim(args)
-length(args)
-rm(list = ls())
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-args <- commandArgs(T)
-length(args) == 0
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/R/testAssociations.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-rm(list = ls())
-## load data
-d <- loadData(vl)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/R/loadData.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-opt$save
-View(data)
-library(PHESANT)
-library(PHESANT)
-pkg.env
-pkg.env$a
-pkg.env$derivedBinary <- data.frame(userID=data$userID)
-write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/R/testContinuous.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-pkg.env$modelFitLogFile
-source('~/Documents/GitHub/PHESANT/PHESANT/R/testCategoricalOrdered.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-pkg.env
-names(pkg.env)
-pkg.env$counters
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/R/storeNewVar.r')
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-? print_help
-vl
-vl$indicatorFields=d$inds
-vl
-names(vl)
-library(PHESANT)
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')
-source('~/Documents/GitHub/PHESANT/PHESANT/phenomeScan.r')

From ef7fcb73812922b636c1a20d4c66b20e0cc7aecc Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 16:58:35 -0400
Subject: [PATCH 13/27] ignore Rhistory

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 85c0afc..e97dfd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 
 .DS_Store
 .Rproj.user
+PHESANT/.Rhistory

From e5ddaad45382039fd0d12509a0ea9c8dd86f9686 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 17:00:55 -0400
Subject: [PATCH 14/27] remove Rhisitory at the top level

---
 .Rhistory | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 .Rhistory

diff --git a/.Rhistory b/.Rhistory
deleted file mode 100644
index 591e1ef..0000000
--- a/.Rhistory
+++ /dev/null
@@ -1,2 +0,0 @@
-package.skeleton(name = "PHESANT")
-package.skeleton(name = "PHESANT")

From 174c5d4469bb07bb811627d979b4a357722f5f12 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Thu, 14 Mar 2019 17:45:41 -0400
Subject: [PATCH 15/27] Code fefactor

---
 PHESANT/R/{replaceNaN.r => cleanData.R} | 39 +++++++++----------------
 PHESANT/R/replaceMissingCodes.r         | 37 -----------------------
 PHESANT/R/testCatSingle.r               |  8 ++++-
 PHESANT/R/testContinuous.r              | 12 ++++----
 PHESANT/man/irnt.Rd                     | 24 +++++++++++++++
 5 files changed, 51 insertions(+), 69 deletions(-)
 rename PHESANT/R/{replaceNaN.r => cleanData.R} (73%)
 delete mode 100644 PHESANT/R/replaceMissingCodes.r
 create mode 100644 PHESANT/man/irnt.Rd

diff --git a/PHESANT/R/replaceNaN.r b/PHESANT/R/cleanData.R
similarity index 73%
rename from PHESANT/R/replaceNaN.r
rename to PHESANT/R/cleanData.R
index dc3dfd4..5848f2d 100644
--- a/PHESANT/R/replaceNaN.r
+++ b/PHESANT/R/cleanData.R
@@ -19,29 +19,18 @@
 
 # Replace NaN and empty values with NA in pheno
 replaceNaN <- function(pheno) {
-
-	if (is.factor(pheno)) {
-		
-		phenoReplaced = pheno
-
-		nanStr  = which(phenoReplaced=="NaN")
-        	phenoReplaced[nanStr]=NA 
-		
-		emptyx  = which(phenoReplaced=="")
-       		phenoReplaced[emptyx]=NA
-
-	}
-	else {
-
-		phenoReplaced = pheno
-		nanx  = which(is.nan(phenoReplaced))
-		phenoReplaced[nanx] = NA;
-
-		emptyStr  = which(phenoReplaced=="")	
-		phenoReplaced[emptyStr] = NA;
-	    
-	}
-
-	return(phenoReplaced)
-
+  	if (is.factor(pheno)) {
+    		phenoReplaced <- pheno
+    		nanStr  <- which(phenoReplaced=="NaN")
+        phenoReplaced[nanStr] <- NA 
+    		emptyx  <- which(phenoReplaced=="")
+        phenoReplaced[emptyx] <- NA
+  	} else {
+    	  phenoReplaced <- pheno
+    		nanx  <-  which(is.nan(phenoReplaced))
+    		phenoReplaced[nanx] <- NA
+    		emptyStr  <- which(phenoReplaced=="")	
+    		phenoReplaced[emptyStr] <- NA
+  	}
+  	return(phenoReplaced)
 }
diff --git a/PHESANT/R/replaceMissingCodes.r b/PHESANT/R/replaceMissingCodes.r
deleted file mode 100644
index f792374..0000000
--- a/PHESANT/R/replaceMissingCodes.r
+++ /dev/null
@@ -1,37 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Replace negative values with NA as these are assumed to be missing
-replaceMissingCodes <- function(pheno) {
-
-	phenoReplaced = pheno;
-
-	uniqVar = unique(na.omit(phenoReplaced))
-
-	# variable values <0 are `missing' codes
-        for (u in uniqVar) {
-		if (u<0) {
-		    	idxU = which(phenoReplaced==u)
-                       	phenoReplaced[idxU]=NA
-                }
-        }
-
-	return(phenoReplaced)
-
-}
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 2f8724c..bd32e4d 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -58,7 +58,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
 	pheno = setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
 
         ## all categories coded as <0 we assume are `missing' values
-        pheno = replaceMissingCodes(pheno)
+        pheno = .replaceMissingCodes(pheno)
 
 	## remove categories if < 10 examples
 	pheno = testNumExamples(pheno)
@@ -190,3 +190,9 @@ setDefaultValue <- function(vl, pheno, defaultValue, defaultRelatedID, userID) {
 }
 
 
+# Replace negative values with NA as these are assumed to be missing
+.replaceMissingCodes <- function(pheno) {
+  phenoReplaced <- pheno
+  phenoReplaced[phenoReplaced <0 ] <- NA
+  return(phenoReplaced)
+}
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 0f0b7aa..0f5171d 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -240,11 +240,11 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 	}
 }
 
-irnt <- function(pheno) {
-	set.seed(1234)
-	numPhenos = length(which(!is.na(pheno)))
-	quantilePheno = (rank(pheno, na.last="keep", ties.method="random")-0.5)/numPhenos
-	phenoIRNT = qnorm(quantilePheno)	
-	return(phenoIRNT);
+irnt <- function(pheno, seed = 1234) {
+  	set.seed(seed)
+  	numPhenos <- length(which(!is.na(pheno)))
+  	quantilePheno <- (rank(pheno, na.last="keep", ties.method="random")-0.5)/numPhenos
+  	phenoIRNT <- qnorm(quantilePheno)	
+  	return(phenoIRNT)
 }
 
diff --git a/PHESANT/man/irnt.Rd b/PHESANT/man/irnt.Rd
new file mode 100644
index 0000000..ed0323d
--- /dev/null
+++ b/PHESANT/man/irnt.Rd
@@ -0,0 +1,24 @@
+\name{irnt}
+\alias{irnt}
+\title{
+ Inverse Rank Normal Transformation
+}
+\description{
+  Perform an inverse rank normal transformation for input phenotypes
+}
+\usage{
+irnt(pheno,seed)
+}
+\arguments{
+  \item{pheno}{
+  A vector of numeric vlues with NA allowed.
+}
+\item{seed}{
+  Random seed for tie breakers, default to 1234.
+}
+}
+
+\value{
+  A vector of the transformed values.
+}
+

From 5c2725b5d70b38a4971ba9d447534948f3306b2f Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 09:38:56 -0400
Subject: [PATCH 16/27] Code Refactor: reorgnize files/functions based on
 objects

---
 PHESANT/R/binaryLogisticRegression.r          |   2 +-
 PHESANT/R/cleanData.R                         |  66 +++++++++-
 PHESANT/R/confounders.r                       | 121 ++++++++++++++++++
 PHESANT/R/equalSizedBins.r                    | 118 -----------------
 PHESANT/R/{getIsExposure.r => exposure.r}     |   0
 ...oadIndicatorFields.r => indicatorFields.r} |   4 +-
 PHESANT/R/{initFunctions.r => init.r}         |  37 ++++++
 PHESANT/R/loadConfounders.r                   |   4 +-
 PHESANT/R/loadData.r                          |  41 +-----
 PHESANT/R/makeTestDataFrame.r                 |  30 -----
 PHESANT/R/{loadPhenotypes.r => phenotypes.r}  |  56 ++++++++
 PHESANT/R/reassignValue.r                     |  73 -----------
 PHESANT/R/run.r                               |  68 ++++++++++
 PHESANT/R/storeNewVar.r                       |  14 --
 PHESANT/R/testCatMultiple.r                   |   2 +-
 PHESANT/R/testCatSingle.r                     |   2 +-
 PHESANT/R/testCategoricalOrdered.r            |   2 +-
 PHESANT/R/testCategoricalUnordered.r          |   2 +-
 PHESANT/R/testContinuous.r                    |  92 ++++++++++++-
 PHESANT/R/testInteger.r                       |   2 +-
 ...oadTraitOfInterest.r => traitOfInterest.r} |  38 ++++++
 PHESANT/R/{processArgs.r => utils.r}          |  41 +++---
 PHESANT/R/validatePhenotypeInput.r            |  85 ------------
 PHESANT/R/validateTraitInput.r                |  69 ----------
 PHESANT/phenomeScan.r                         | 109 +---------------
 .../R/unittests => unittests}/run-tests.sh    |   0
 .../test_equalSizedBins.r                     |   0
 .../test_reassignValue.r                      |   0
 .../test_testCatMultiple.r                    |   0
 .../test_testCatSingle.r                      |   0
 30 files changed, 514 insertions(+), 564 deletions(-)
 create mode 100644 PHESANT/R/confounders.r
 delete mode 100644 PHESANT/R/equalSizedBins.r
 rename PHESANT/R/{getIsExposure.r => exposure.r} (100%)
 rename PHESANT/R/{loadIndicatorFields.r => indicatorFields.r} (97%)
 rename PHESANT/R/{initFunctions.r => init.r} (65%)
 delete mode 100644 PHESANT/R/makeTestDataFrame.r
 rename PHESANT/R/{loadPhenotypes.r => phenotypes.r} (69%)
 delete mode 100644 PHESANT/R/reassignValue.r
 create mode 100644 PHESANT/R/run.r
 delete mode 100644 PHESANT/R/storeNewVar.r
 rename PHESANT/R/{loadTraitOfInterest.r => traitOfInterest.r} (58%)
 rename PHESANT/R/{processArgs.r => utils.r} (77%)
 delete mode 100644 PHESANT/R/validatePhenotypeInput.r
 delete mode 100644 PHESANT/R/validateTraitInput.r
 rename {PHESANT/R/unittests => unittests}/run-tests.sh (100%)
 rename {PHESANT/R/unittests => unittests}/test_equalSizedBins.r (100%)
 rename {PHESANT/R/unittests => unittests}/test_reassignValue.r (100%)
 rename {PHESANT/R/unittests => unittests}/test_testCatMultiple.r (100%)
 rename {PHESANT/R/unittests => unittests}/test_testCatSingle.r (100%)

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index 666b8b5..1112377 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -52,7 +52,7 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
 
 		if (opt$save == TRUE) {
 			# add pheno to dataframe
-			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
+			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
 			cat("SUCCESS results-logistic-binary ");			
 			incrementCounter("success.binary")
 		}
diff --git a/PHESANT/R/cleanData.R b/PHESANT/R/cleanData.R
index 5848f2d..7f27c4d 100644
--- a/PHESANT/R/cleanData.R
+++ b/PHESANT/R/cleanData.R
@@ -16,9 +16,73 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
+# Reassigns values as specified in data coding info file
+reassignValue <- function(vl, pheno, varName) {
+  	# get data code info - whether this data code is ordinal or not and any reordering and resassignments
+    dataPheno <- vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),]
+    dataCode <- dataPheno$DATA_CODING
+  
+  	# not all variables will have a data code info row
+    dataCodeRow <- which(vl$dataCodeInfo$dataCode==dataCode)
+    if (length(dataCodeRow)==0) { #do nothing 
+    } else if (length(dataCodeRow)==1) {
+          dataDataCode <- vl$dataCodeInfo[dataCodeRow,]
+          reassignments <- as.character(dataDataCode$reassignments)
+          
+          # Reassigns values in pheno, as specified in resassignments argument
+          # can be NA if row not included in data coding info file
+          if (!is.na(reassignments) && nchar(reassignments)>0) {
+                reassignParts <- unlist(strsplit(reassignments,"\\|"))
+                cat(paste("reassignments: ", reassignments, " || ", sep=""))
+                
+                # do each reassignment
+                for(i in reassignParts) {
+                      reassignParts <- unlist(strsplit(i,"="))
+                      
+                      # matrix version
+                      idx <- which(pheno==reassignParts[1],arr.ind=TRUE)
+                      pheno[idx] <- strtoi(reassignParts[2])
+                }
+                
+                ## see if type has changed (this happens for field 216 (X changed to -1))
+                ## as.numeric will set non numeric to NA so we know if it's ok to do this by seeing if there are extra NA's after the conversion
+                pNum = as.numeric(unlist(pheno))
+                isNum = length(which(is.na(pheno), arr.ind=TRUE))==length(which(is.na(pNum), arr.ind=TRUE))
+                if (isNum) {
+                      pheno = pNum
+                }
+          }
+    } else {
+          cat("WARNING: >1 ROWS IN DATA CODE INFO FILE || ")
+          
+    }
+    return(pheno)
+}
+
+fixOddFieldsToCatMul <- function(vl, data) {
+    # examples are variables: 40006, 40011, 40012, 40013
+    # get all variables that need their instances changing to arrays
+    dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
+    for (i in 1:nrow(dataPheno)) {
+      varID = dataPheno[i,]$FieldID;		
+      varidString = paste("x",varID,"_", sep="");			
+      
+      # get all columns in data dataframe for this variable	
+      colIdxs = which(grepl(varidString,names(data)));
+      
+      # change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
+      count = 0;
+      for (j in colIdxs) {	
+        colnames(data)[j] <- paste(varidString, "0_", count, sep="")
+        count = count + 1;
+      }				
+    }
+    return(data)
+}
+
 
 # Replace NaN and empty values with NA in pheno
-replaceNaN <- function(pheno) {
+.replaceNaN <- function(pheno) {
   	if (is.factor(pheno)) {
     		phenoReplaced <- pheno
     		nanStr  <- which(phenoReplaced=="NaN")
diff --git a/PHESANT/R/confounders.r b/PHESANT/R/confounders.r
new file mode 100644
index 0000000..9fc1c10
--- /dev/null
+++ b/PHESANT/R/confounders.r
@@ -0,0 +1,121 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+##
+## loads confounder variables from phenotype file
+
+loadConfounders <- function(opt, phenotypes) {
+    if (opt$save==TRUE) { 
+	      # saving not running tests so we add a fake confounder
+	      numRows = nrow(phenotypes)
+        data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+        colnames(data)[1] <- "userID"
+        colnames(data)[2] <- "conf1"
+        return(data)
+    } else {
+      	if (!is.null(opt$confounderfile)) {
+      		    print("Loading confounders from confounder file ...")
+      		    confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
+      	      confs = lapply(confs,function(x) type.convert(as.character(x)))
+      	      confs = as.data.frame(confs)
+      
+      		    ## find userID column and change name to userID
+      		    idx = which(colnames(confs) == opt$userId)
+      		    confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
+      		    colnames(confs)[1] <- "userID"
+    	  } else {
+    	        print("Loading confounders from phenotypes file ...")
+              confNames = .getConfounderNames(opt)
+              #####
+    	        ##### extract confounders from data file
+              confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
+    	        confs = lapply(confs,function(x) type.convert(as.character(x)))
+    	        confs = as.data.frame(confs)
+    
+              #####
+    	        ##### process genetic batch to create genetic chip variable
+              if (opt$genetic == TRUE) {
+                    genoBatch = confs[,"x22000_0_0"]
+                    # chip comes from batch field 22000
+                    genoChip = rep.int(NA,nrow(confs));
+                    idxForVar = which(genoBatch<0);
+                    genoChip[idxForVar] = 0;
+                    idxForVar = which(genoBatch>=0 & genoBatch<2000);
+                    genoChip[idxForVar] = 1;
+    
+    		            # remove geno batch from and add geno chip to confounders
+                    confs = confs[,-which(names(confs) == "x22000_0_0")]
+                    confs = cbind.data.frame(confs, genoChip)
+              }
+
+	            #####
+            	##### Convert assessment centre to an indicator variable
+	            if (opt$sensitivity==TRUE) {
+		                confs$x54_0_0 = as.factor(confs$x54_0_0)
+		                assCentre = model.matrix(~confs$x54_0_0)
+		                assCentre = assCentre[,2:ncol(assCentre)]
+		                confs = cbind(confs, assCentre)
+		                confs$x54_0_0 = NULL
+	            }
+	            colnames(confs)[1] <- "userID"
+
+	      }
+
+	      # remove any rows with no values
+	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
+	      confsComp = complete.cases(confs)
+	      print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
+	      confs = confs[confsComp==TRUE,]
+	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
+
+	      print("Confounder columns:")
+	      print(names(confs))
+
+	      return(confs)
+    }
+}
+
+.getConfounderNames <- function(opt) {
+    #####
+	  ##### first get vector of confounder names
+    # age and sex
+    confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
+    
+    # if genetic trait of interest then adjust for genotype chip
+    # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
+    if (opt$genetic == TRUE) {
+          confNames = append(confNames, "x22000_0_0")
+          if (opt$sensitivity==TRUE) {
+                confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
+                print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
+          } else {
+                print("Adjusting for age, sex and genotype chip")
+          }
+    } else {
+          # non genetic trait of interest, then sensitivity adjusts for assessment center
+          if (opt$sensitivity==TRUE) {
+                confNames = append(confNames, "x54_0_0")
+                print("Adjusting for age, sex and assessment centre")
+          } else {
+                print("Adjusting for age and sex")
+          }
+    }
+	  return(confNames)
+}
+
+
diff --git a/PHESANT/R/equalSizedBins.r b/PHESANT/R/equalSizedBins.r
deleted file mode 100644
index 15570cd..0000000
--- a/PHESANT/R/equalSizedBins.r
+++ /dev/null
@@ -1,118 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# splits the pheno into 3 bins with the cut points between values rather at the exact value for the quantile
-equalSizedBins <- function(phenoAvg) {
-
-	## equal sized bins 
-        q = quantile(phenoAvg, probs=c(1/3,2/3), na.rm=TRUE)
-
-	minX = min(phenoAvg, na.rm=TRUE)
-	maxX = max(phenoAvg, na.rm=TRUE)
-
-	phenoBinned = phenoAvg;
-	if (q[1]==minX) {
-		# edge case - quantile value is lowest value
-
-		# assign min value as cat1
-		idx1 = which(phenoAvg==q[1]);
-		phenoBinned[idx1] = 0;
-
-		# divide remaining values into cat2 and cat3
-		phenoAvgRemaining = phenoAvg[which(phenoAvg!=q[1])];
-		qx = quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
-		minXX = min(phenoAvgRemaining, na.rm=TRUE)
-		maxXX =	max(phenoAvgRemaining, na.rm=TRUE)
-
-		if (qx[1]==minXX) {
-			# edge case again - quantile value is lowest value
-			idx2 = which(phenoAvg==qx[1]);
-			idx3 = which(phenoAvg>qx[1]);
-		}
-		else if (qx[1]==maxXX) {
-			# edge case again - quantile value is max value
-			idx2 = which(phenoAvg<qx[1] & phenoAvg>q[1]);
-			idx3 = which(phenoAvg==qx[1]);
-		}
-		else {
-			idx2 = which(phenoAvg<qx[1] & phenoAvg>q[1]);
-			idx3 = which(phenoAvg>=qx[1]);
-		}
-		phenoBinned[idx2] = 1;
-                phenoBinned[idx3] = 2;
-	}
-	else if (q[2]==maxX) {
-		# edge case - quantile value is highest value
-
-		# assign max value as cat3
-		idx3 = which(phenoAvg==q[2]);
-		phenoBinned[idx3] = 2;
-
-		# divide remaining values into cat1 and cat2
-		phenoAvgRemaining = phenoAvg[which(phenoAvg!=q[2])];
-                qx = quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
-		minXX = min(phenoAvgRemaining, na.rm=TRUE)
-                maxXX = max(phenoAvgRemaining, na.rm=TRUE)
-
-		if (qx[1]==minXX) {
-			# edge case again - quantile value is lowest value
-                        idx1 = which(phenoAvg==qx[1]);
-                        idx2 = which(phenoAvg>qx[1] & phenoAvg<q[2]);
-                }
-                else if	(qx[1]==maxXX) {
-			# edge case again - quantile value is max value
-                        idx1 = which(phenoAvg<qx[1]);
-                        idx2 = which(phenoAvg==qx[1]);
-                }
-                else {
-	                idx1 = which(phenoAvg<qx[1]);  
-			idx2 = which(phenoAvg>=qx[1] & phenoAvg<q[2]);
-		}
-
-                phenoBinned[idx1] = 0;
-                phenoBinned[idx2] = 1;
-	}
-       else if (q[1] == q[2]) {
-		# both quantiles correspond to the same value so set 
-		# cat1 as < this value, cat2 as exactly this value and
-		# cat3 as > this value
-              	phenoBinned = phenoAvg;
-        	idx1 = which(phenoAvg<q[1]);
-                idx2 = which(phenoAvg==q[2]);
-                idx3 = which(phenoAvg>q[2]);
-                phenoBinned[idx1] = 0;
-                phenoBinned[idx2] = 1;
-                phenoBinned[idx3] = 2;
-       	}
-        else {
-		# standard case - split the data into three roughly equal parts where
-		# cat1<q1, cat2 between q1 and q2, and cat3>=q2
-         	phenoBinned = phenoAvg;
-                idx1 = which(phenoAvg<q[1]);
-                idx2 = which(phenoAvg>=q[1] & phenoAvg<q[2]);
-               	idx3 = which(phenoAvg>=q[2]);
-                phenoBinned[idx1] = 0;
-                phenoBinned[idx2] = 1;
-                phenoBinned[idx3] = 2;
-	}
-
-	cat("cat N: ", length(idx1),", ",length(idx2),", ",length(idx3), " || ", sep="");
-	
-	return(phenoBinned);
-}
-
diff --git a/PHESANT/R/getIsExposure.r b/PHESANT/R/exposure.r
similarity index 100%
rename from PHESANT/R/getIsExposure.r
rename to PHESANT/R/exposure.r
diff --git a/PHESANT/R/loadIndicatorFields.r b/PHESANT/R/indicatorFields.r
similarity index 97%
rename from PHESANT/R/loadIndicatorFields.r
rename to PHESANT/R/indicatorFields.r
index 1a815cd..04c860a 100644
--- a/PHESANT/R/loadIndicatorFields.r
+++ b/PHESANT/R/indicatorFields.r
@@ -27,7 +27,7 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
   	indVars = c(opt$userId)
   
   	## add indicator variables to pheno data
-  	indVars = addIndicatorVariables(vl, indVars, phenosToTest, phenoVarsAll)
+  	indVars = .addIndicatorVariables(vl, indVars, phenosToTest, phenoVarsAll)
   	if (length(indVars)>1) {
     		# not just user id column
     		print("Loading required related variable(s):")
@@ -44,7 +44,7 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
 }
 
 
-addIndicatorVariables <- function(vl, indVars, phenosToTest, phenoVarsAll) {
+.addIndicatorVariables <- function(vl, indVars, phenosToTest, phenoVarsAll) {
 
   	##### default value related fields for data codes
   	# get list of all indicator variables from outcome info file
diff --git a/PHESANT/R/initFunctions.r b/PHESANT/R/init.r
similarity index 65%
rename from PHESANT/R/initFunctions.r
rename to PHESANT/R/init.r
index 7e9f996..abcce78 100644
--- a/PHESANT/R/initFunctions.r
+++ b/PHESANT/R/init.r
@@ -17,6 +17,43 @@
 # DEALINGS IN THE SOFTWARE.
 
 
+initData <-function(opt) {
+    ## load the files we write to and use
+    initCounters()
+    if (opt$save==FALSE) {
+      initResultsFiles(opt)
+    }
+    vl <- initVariableLists(opt)
+    
+    ## load data
+    d <- loadData(opt, vl)
+    data <- d$datax
+    confounders <- d$confounders
+    vl$indicatorFields <- d$inds
+    
+    numPreceedingCols <- ncol(confounders)-1+2 
+    phenoStartIdx <- numPreceedingCols + 1; # confounders,minus id column, plus trait of interest and user ID
+    phenoVars <- colnames(data)[-c(1,2)] # first and second columns are the id and snpScore, respectively
+    
+    return(list(data = data, vl = vl, confounders = confounders, phenoStartIdx = phenoStartIdx, phenoVars = phenoVars))
+}
+
+initEnv <- function(opt, data) {
+    if (opt$save == TRUE) {
+        pkg.env$derivedBinary <- data.frame(userID=data$userID)
+        pkg.env$derivedCont <- data.frame(userID=data$userID)
+        pkg.env$derivedCatOrd <- data.frame(userID=data$userID)
+        pkg.env$derivedCatUnord <- data.frame(userID=data$userID)
+        pkg.env$resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
+        sink(pkg.env$resLogFile)
+    } else {
+        pkg.env$modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
+        sink(pkg.env$modelFitLogFile)
+        sink()
+        pkg.env$resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
+        sink(pkg.env$resLogFile)
+    }
+}
 # create new results files and headers
 initResultsFiles <- function(opt) {
 
diff --git a/PHESANT/R/loadConfounders.r b/PHESANT/R/loadConfounders.r
index b2cdda2..9fc1c10 100644
--- a/PHESANT/R/loadConfounders.r
+++ b/PHESANT/R/loadConfounders.r
@@ -40,7 +40,7 @@ loadConfounders <- function(opt, phenotypes) {
       		    colnames(confs)[1] <- "userID"
     	  } else {
     	        print("Loading confounders from phenotypes file ...")
-              confNames = getConfounderNames(opt)
+              confNames = .getConfounderNames(opt)
               #####
     	        ##### extract confounders from data file
               confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
@@ -90,7 +90,7 @@ loadConfounders <- function(opt, phenotypes) {
     }
 }
 
-getConfounderNames <- function(opt) {
+.getConfounderNames <- function(opt) {
     #####
 	  ##### first get vector of confounder names
     # age and sex
diff --git a/PHESANT/R/loadData.r b/PHESANT/R/loadData.r
index e73ca27..405af81 100644
--- a/PHESANT/R/loadData.r
+++ b/PHESANT/R/loadData.r
@@ -23,22 +23,12 @@
 # returns an object holding these two data frames
 loadData <- function(opt, vl) {
   
-  	##### validating data
-  	## check phenotype file headers
-  	validatePhenotypeInput(opt)
+  	validatePhenotypeInputHeader(opt)
+  	validateTraitInputHeader(opt)
   
-  	## check trait of interest file headers
-  	validateTraitInput(opt)
-  
-  	##### load data
-  	## load phenotype
   	print("Loading phenotypes ...")
   	phenotype = loadPhenotypes(opt)
-  
-  	## load trait of interest
     toi <- loadTraitOfInterest(opt, phenotype)
-  
-    ## load confounders
     conf <- loadConfounders(opt, phenotype)
   
   	## add trait of interest to phenotype data frame and remove rows with no trait of interest
@@ -47,13 +37,11 @@ loadData <- function(opt, vl) {
   	
     ## remove any rows with no trait of interest
     idxNotEmpty = which(!is.na(phenotype[,"geno"]))
-  
   	if (opt$save == TRUE) {
   	      print(paste("Phenotype file has ", nrow(phenotype), " rows.", sep=""))
   	} else {
   		    print(paste("Phenotype file has ", nrow(phenotype), " rows with ", length(idxNotEmpty), " not NA for trait of interest (",opt$traitofinterest,").", sep=""))
   	}
-  
     phenotype = phenotype[idxNotEmpty,]
   
   	# match ids from not empty phenotypes list
@@ -61,9 +49,9 @@ loadData <- function(opt, vl) {
     conf = conf[confsIdx,]
   
   	if (nrow(phenotype)==0) {
-  	      stop("No examples with row in both trait of interest and phenotype files", call.=FALSE)
+  	    stop("No examples with row in both trait of interest and phenotype files", call.=FALSE)
   	} else {
-  	     print(paste("Phenotype and trait of interest data files merged, with", nrow(phenotype),"examples"))
+  	    print(paste("Phenotype and trait of interest data files merged, with", nrow(phenotype),"examples"))
   	}
   
   	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
@@ -73,24 +61,3 @@ loadData <- function(opt, vl) {
   	return(d)
 }
 
-fixOddFieldsToCatMul <- function(vl, data) {
-  # examples are variables: 40006, 40011, 40012, 40013
-  # get all variables that need their instances changing to arrays
-  dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
-  for (i in 1:nrow(dataPheno)) {
-    varID = dataPheno[i,]$FieldID;		
-    varidString = paste("x",varID,"_", sep="");			
-    
-    # get all columns in data dataframe for this variable	
-    colIdxs = which(grepl(varidString,names(data)));
-    
-    # change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
-    count = 0;
-    for (j in colIdxs) {	
-      colnames(data)[j] <- paste(varidString, "0_", count, sep="")
-      count = count + 1;
-    }				
-  }
-  return(data)
-}
-
diff --git a/PHESANT/R/makeTestDataFrame.r b/PHESANT/R/makeTestDataFrame.r
deleted file mode 100644
index 39963b2..0000000
--- a/PHESANT/R/makeTestDataFrame.r
+++ /dev/null
@@ -1,30 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-## makes a smaller data frame containing the data for a particular test
-makeTestDataFrame <- function(datax, confounders, currentVarValues) {
-
-			thisdata = datax[,c("geno", "userID")]
-			thisdata = merge(thisdata, confounders, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
-			currentVarValues = cbind.data.frame(datax$userID, currentVarValues)
-                        colnames(currentVarValues)[1] = "userID"
-			thisdata = merge(thisdata, currentVarValues, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
-			return(thisdata)
-
-}
diff --git a/PHESANT/R/loadPhenotypes.r b/PHESANT/R/phenotypes.r
similarity index 69%
rename from PHESANT/R/loadPhenotypes.r
rename to PHESANT/R/phenotypes.r
index 0a55079..16b28e8 100644
--- a/PHESANT/R/loadPhenotypes.r
+++ b/PHESANT/R/phenotypes.r
@@ -108,4 +108,60 @@ loadPhenotypes <- function(opt) {
 
 }
 
+# Validate the contents of the phenotype file
+validatePhenotypeInputHeader <- function(opt) {
+  	print("Validating phenotype data ...")
+  
+  	## get just first row so we can check the column names
+  	phenoIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+  	
+  	### pheno file validation
+  	print(paste("Number of columns in phenotype file: ", ncol(phenoIn),sep=""))
+  	## check user id exists in pheno file
+  	idx1 = which(names(phenoIn) == opt$userId);
+  	if (length(idx1)==0) {
+        stop(paste("phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+    }
+  
+  	# we only need the confounders if we are actually running the tests
+  	if (opt$save==FALSE & is.null(opt$confounderfile)) {
+      	## confounder variables exist in pheno file
+      	idx = which(names(phenoIn) == "x21022_0_0");
+      	if (length(idx)==0) {
+            stop("phenotype file doesn't contain required age colunn: x21022_0_0", call.=FALSE)
+        }
+      
+      	idx = which(names(phenoIn) == "x31_0_0");
+        if (length(idx)==0) {
+            stop("phenotype file doesn't contain required sex colunn: x31_0_0", call.=FALSE)
+        }
+      
+      	if (opt$genetic ==TRUE) {
+      		  idx = which(names(phenoIn) == "x22000_0_0");
+          	if (length(idx)==0) {
+          	    stop("phenotype file doesn't contain required genetic batch colunn: x22000_0_0", call.=FALSE)
+          	}	
+      	}
+      
+      	## if running with sensitivity option then check extra columns exist in pheno file (genetic PCs and assessment centre)
+      	if (opt$sensitivity==TRUE) {
+        		if (opt$genetic ==TRUE) {
+          			## check first 10 genetic PCs exist
+          			for (i in 1:10) {
+            				idx = which(names(phenoIn) == paste("x22009_0_", i, sep=""));
+            				if (length(idx)==0) {
+               			    stop(paste("phenotype file doesn't contain required genetic principal component colunn: x22009_0_", i, sep=""), call.=FALSE)
+              			}
+          			}
+        		}
+        		## assessment centre field
+        		idx = which(names(phenoIn) == "x54_0_0");
+          	if (length(idx)==0) {
+          	    stop("phenotype file doesn't contain required assessment centre colunn: x54_0_0", call.=FALSE)
+          	}
+        }
+  
+  	}
+  	print("Phenotype file validated")
+}
 
diff --git a/PHESANT/R/reassignValue.r b/PHESANT/R/reassignValue.r
deleted file mode 100644
index c9bc289..0000000
--- a/PHESANT/R/reassignValue.r
+++ /dev/null
@@ -1,73 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Reassigns values as specified in data coding info file
-reassignValue <- function(vl, pheno, varName) {
-
-	# get data code info - whether this data code is ordinal or not and any reordering and resassignments
-        dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
-        dataCode = dataPheno$DATA_CODING;
-
-	# not all variables will have a data code info row
-        dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
-
-        if (length(dataCodeRow)==0) {
-                return(pheno);
-        }
-	else if (length(dataCodeRow)>1) {
-                cat("WARNING: >1 ROWS IN DATA CODE INFO FILE || ")
-		return(pheno);
-        }
-
-	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
-        reassignments = as.character(dataDataCode$reassignments);
-	
-	return(reassignValue2(vl, pheno, reassignments))
-	
-}
-
-# Reassigns values in pheno, as specified in resassignments argument
-reassignValue2 <- function(vl, pheno, reassignments) {
-
-	# can be NA if row not included in data coding info file
-	if (!is.na(reassignments) && nchar(reassignments)>0) {
-
-		reassignParts = unlist(strsplit(reassignments,"\\|"));
-		cat(paste("reassignments: ", reassignments, " || ", sep=""));
-
-		# do each reassignment
-		for(i in reassignParts) {
-			reassignParts = unlist(strsplit(i,"="));
-
-			# matrix version
-			idx = which(pheno==reassignParts[1],arr.ind=TRUE)
-			pheno[idx]=strtoi(reassignParts[2]);
-		}
-		
-		## see if type has changed (this happens for field 216 (X changed to -1))
-		## as.numeric will set non numeric to NA so we know if it's ok to do this by seeing if there are extra NA's after the conversion
-		pNum = as.numeric(unlist(pheno))
-		isNum = length(which(is.na(pheno), arr.ind=TRUE))==length(which(is.na(pNum), arr.ind=TRUE))
-		if (isNum) {
-			pheno = pNum
-		}
-	}
-
-	return(pheno)
-}
diff --git a/PHESANT/R/run.r b/PHESANT/R/run.r
new file mode 100644
index 0000000..70ed305
--- /dev/null
+++ b/PHESANT/R/run.r
@@ -0,0 +1,68 @@
+run <- function(opt) {
+    input <- initData(opt)
+    data <- input$data
+    vl <- input$vl
+    confounders <- input$confounders
+    phenoStartIdx <- input$phenoStartIdx
+    phenoVars <- input$phenoVars
+    print("LOADING DONE")
+    
+    initEnv(opt, data)
+    
+    currentVar <- ""
+    currentVarShort <- ""
+    first <- TRUE
+    
+    phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
+    for (var in phenoVars) { 
+        sink()
+        sink(pkg.env$resLogFile, append=TRUE)
+        
+        varx <- gsub("^x", "", var)
+        varx <- gsub("_[0-9]+$", "", varx)
+        
+        varxShort <- gsub("^x", "", var)
+        varxShort <- gsub("_[0-9]+_[0-9]+$", "", varxShort)
+        
+        ## test this variable
+        if (currentVar == varx) {
+          thisCol <- data[,eval(var)]
+          thisCol <- .replaceNaN(thisCol)
+          currentVarValues <- cbind.data.frame(currentVarValues, thisCol)
+        } else if (currentVarShort == varxShort) {
+          ## different time point of this var so skip
+        } else {
+          ## new variable so run test for previous (we have collected all the columns now)
+          if (first==FALSE) {
+            thisdata <- makeTestDataFrame(data, confounders, currentVarValues)
+            testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
+          }
+          first <- FALSE
+          
+          ## new variable so set values
+          currentVar <- varx
+          currentVarShort <- varxShort
+          
+          currentVarValues <- data[,eval(var)]
+          currentVarValues <- .replaceNaN(currentVarValues)
+        }
+        phenoIdx <- phenoIdx + 1
+    }
+    
+    if (phenoIdx>0){
+        # last variable so test association
+        thisdata = makeTestDataFrame(data, confounders, currentVarValues)
+        testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
+    }
+    sink()
+    
+    # save counters of each path in variable flow
+    saveCounts(opt)
+    if (opt$save == TRUE) {
+        write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+        write.table(pkg.env$derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+        write.table(pkg.env$derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+        write.table(pkg.env$derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+    }
+  
+}
\ No newline at end of file
diff --git a/PHESANT/R/storeNewVar.r b/PHESANT/R/storeNewVar.r
deleted file mode 100644
index e2f640a..0000000
--- a/PHESANT/R/storeNewVar.r
+++ /dev/null
@@ -1,14 +0,0 @@
-storeNewVar <- function(userIDData, phenoData, varName, type) {
-    # add pheno to dataframe
-  	newdata = data.frame(userID=userIDData, newvar=phenoData)
-    names(newdata)[names(newdata)=="newvar"] = varName
-  	if (type == "bin") {
-  	    pkg.env$derivedBinary <- merge(pkg.env$derivedBinary, newdata, by="userID", all=TRUE);
-  	} else if (type == "cont") {
-  	    pkg.env$derivedCont <- merge(pkg.env$derivedCont, newdata, by="userID", all=TRUE);
-  	} else if (type == "catOrd") {
-  	    pkg.env$derivedCatOrd <- merge(pkg.env$derivedCatOrd, newdata, by="userID", all=TRUE);
-  	} else if (type == "catUnord") {
-  	    pkg.env$derivedCatUnord <- merge(pkg.env$derivedCatUnord, newdata, by="userID", all=TRUE);
-  	}
-}
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 8184ee3..508c4e0 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -126,7 +126,7 @@ restrictSample2 <- function(vl, varName,pheno, varIndicator,variableVal, userID)
 		indicatorVar = indvarx[,indName]
 
 		# remove participants with NA value in this related field
-		indicatorVar = replaceNaN(indicatorVar)
+		indicatorVar = .replaceNaN(indicatorVar)
 		naIdxs = which(is.na(indicatorVar))
 
 		cat("Remove indicator var NAs: ", length(naIdxs), " || ", sep="");
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index bd32e4d..d840a22 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -169,7 +169,7 @@ setDefaultValue <- function(vl, pheno, defaultValue, defaultRelatedID, userID) {
                 indicatorVar = indvarx[,indName]
 
 	    	# remove participants with NA value in this related field
-	    	indicatorVar = replaceNaN(indicatorVar)
+	    	indicatorVar = .replaceNaN(indicatorVar)
 
 		# check if there are already examples with default value and if so display warning
 		numWithDefault = length(which(pheno==defaultValue))
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index 1f208db..78c8ea2 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -50,7 +50,7 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 
 		if (opt$save == TRUE) {
 			# add pheno to dataframe
-			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
+			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
 			cat("SUCCESS results-ordered-logistic");
 			incrementCounter("success.ordCat")
                 }
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index dce7cb0..320f522 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -46,7 +46,7 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 
 		if (opt$save == TRUE) {
 			# add pheno to dataframe
-			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
+			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
 			cat("SUCCESS results-notordered-logistic ");
 			incrementCounter("success.unordCat")
 		}
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 0f5171d..883ea75 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -100,7 +100,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 
 		  incrementCounter("cont.ordcattry")
 			## equal sized bins
-			phenoBinned = equalSizedBins(phenoAvg);
+			phenoBinned = .equalSizedBins(phenoAvg)
 
 			# check number of people in each bin
 			bin0Num = length(which(phenoBinned==0))
@@ -174,7 +174,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 
 			if (opt$save == TRUE) {
 				# add pheno to dataframe
-				storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
+				.storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
 				cat("SUCCESS results-linear");
 				incrementCounter("success.continuous")
                         }
@@ -248,3 +248,91 @@ irnt <- function(pheno, seed = 1234) {
   	return(phenoIRNT)
 }
 
+# splits the pheno into 3 bins with the cut points between values rather at the exact value for the quantile
+.equalSizedBins <- function(phenoAvg) {
+    ## equal sized bins 
+    q <- quantile(phenoAvg, probs=c(1/3,2/3), na.rm=TRUE)
+    
+    minX <- min(phenoAvg, na.rm=TRUE)
+    maxX <- max(phenoAvg, na.rm=TRUE)
+    
+    phenoBinned <- phenoAvg
+    if (q[1]==minX) {
+        # edge case - quantile value is lowest value
+        # assign min value as cat1
+        idx1 <- which(phenoAvg==q[1])
+        phenoBinned[idx1] <- 0
+        
+        # divide remaining values into cat2 and cat3
+        phenoAvgRemaining <- phenoAvg[which(phenoAvg!=q[1])]
+        qx <- quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
+        minXX <- min(phenoAvgRemaining, na.rm=TRUE)
+        maxXX <-	max(phenoAvgRemaining, na.rm=TRUE)
+        
+        if (qx[1]==minXX) {
+            # edge case again - quantile value is lowest value
+            idx2 <- which(phenoAvg==qx[1])
+            idx3 <- which(phenoAvg>qx[1])
+        } else if (qx[1]==maxXX) {
+            # edge case again - quantile value is max value
+            idx2 <- which(phenoAvg<qx[1] & phenoAvg>q[1])
+            idx3 <- which(phenoAvg==qx[1])
+        } else {
+            idx2 <- which(phenoAvg<qx[1] & phenoAvg>q[1])
+            idx3 <- which(phenoAvg>=qx[1])
+        }
+        phenoBinned[idx2] <- 1
+        phenoBinned[idx3] <- 2
+    } else if (q[2]==maxX) {
+        # edge case - quantile value is highest value
+        # assign max value as cat3
+        idx3 <- which(phenoAvg==q[2])
+        phenoBinned[idx3] <- 2
+        
+        # divide remaining values into cat1 and cat2
+        phenoAvgRemaining <- phenoAvg[which(phenoAvg!=q[2])]
+        qx <- quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
+        minXX <- min(phenoAvgRemaining, na.rm=TRUE)
+        maxXX <- max(phenoAvgRemaining, na.rm=TRUE)
+        
+        if (qx[1]==minXX) {
+            # edge case again - quantile value is lowest value
+            idx1 <- which(phenoAvg==qx[1])
+            idx2 <- which(phenoAvg>qx[1] & phenoAvg<q[2])
+        } else if	(qx[1]==maxXX) {
+            # edge case again - quantile value is max value
+            idx1 <- which(phenoAvg<qx[1])
+            idx2 <- which(phenoAvg==qx[1])
+        } else {
+            idx1 <- which(phenoAvg<qx[1])  
+            idx2 <- which(phenoAvg>=qx[1] & phenoAvg<q[2])
+        }
+        
+        phenoBinned[idx1] <- 0
+        phenoBinned[idx2] <- 1
+    } else if (q[1] == q[2]) {
+        # both quantiles correspond to the same value so set 
+        # cat1 as < this value, cat2 as exactly this value and
+        # cat3 as > this value
+        phenoBinned <- phenoAvg
+        idx1 <- which(phenoAvg<q[1])
+        idx2 <- which(phenoAvg==q[2])
+        idx3 <- which(phenoAvg>q[2])
+        phenoBinned[idx1] <- 0
+        phenoBinned[idx2] <- 1
+        phenoBinned[idx3] <- 2
+    } else {
+        # standard case - split the data into three roughly equal parts where
+        # cat1<q1, cat2 between q1 and q2, and cat3>=q2
+        phenoBinned <- phenoAvg
+        idx1 <- which(phenoAvg<q[1])
+        idx2 <- which(phenoAvg>=q[1] & phenoAvg<q[2])
+        idx3 <- which(phenoAvg>=q[2])
+        phenoBinned[idx1] <- 0
+        phenoBinned[idx2] <- 1
+        phenoBinned[idx3] <- 2
+    }
+    
+    cat("cat N: ", length(idx1),", ",length(idx2),", ",length(idx3), " || ", sep="")
+    return(phenoBinned)
+}
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 5ffac76..8591435 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -40,7 +40,7 @@ testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
                 phenoAvg = rowMeans(pheno, na.rm=TRUE)
 		
 		# if participant only has NA values then NaN is generated so we convert back to NA
-		phenoAvg = replaceNaN(phenoAvg)
+		phenoAvg = .replaceNaN(phenoAvg)
         }
 	else {
                 phenoAvg = pheno
diff --git a/PHESANT/R/loadTraitOfInterest.r b/PHESANT/R/traitOfInterest.r
similarity index 58%
rename from PHESANT/R/loadTraitOfInterest.r
rename to PHESANT/R/traitOfInterest.r
index 7f637f2..537686e 100644
--- a/PHESANT/R/loadTraitOfInterest.r
+++ b/PHESANT/R/traitOfInterest.r
@@ -43,3 +43,41 @@ loadTraitOfInterest <- function(opt, phenotypes) {
 
     return(data)
 }
+
+# Validate the contents of the trait of interest file
+validateTraitInputHeader <- function(opt) {
+    if (opt$save!=TRUE) {
+      
+    	### get header of trait of interest file or pheno file (if no trait of interest file is specified
+      print("Validating trait of interest data ...")
+    	if (is.null(opt$traitofinterestfile)) {
+    		  snpIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+    	} else {
+    		  snpIn = read.table(opt$traitofinterestfile, header=1, nrows=1, sep=',')	
+    	}
+    
+    	### trait of interest file validation
+    	print(paste("Number of columns in trait of interest file:", ncol(snpIn),sep=""))
+    	## check user id exists in snp file
+    	idx1 = which(names(snpIn) == opt$userId);
+    	if (length(idx1)==0) {
+      		if (is.null(opt$traitofinterestfile)) {
+      			stop(paste("Phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+      		} else {
+      			stop(paste("Trait of interest file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+      		}
+    	}
+    	
+    	## check trait of interest exists in trait of interest file
+    	idx2 = which(names(snpIn) == opt$traitofinterest);
+    	if (length(idx2)==0) {
+      		if (is.null(opt$traitofinterestfile)) {
+      			  stop(paste("No trait of interest file specified, and phenotypes file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+      		} else {
+      			  stop(paste("Trait of interest file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+      		}
+    	}
+    	print("Trait of interest file validated")
+    }
+}
+
diff --git a/PHESANT/R/processArgs.r b/PHESANT/R/utils.r
similarity index 77%
rename from PHESANT/R/processArgs.r
rename to PHESANT/R/utils.r
index bca4872..2e560fb 100644
--- a/PHESANT/R/processArgs.r
+++ b/PHESANT/R/utils.r
@@ -1,21 +1,28 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
 
+.storeNewVar <- function(userIDData, phenoData, varName, type) {
+  # add pheno to dataframe
+  newdata = data.frame(userID=userIDData, newvar=phenoData)
+  names(newdata)[names(newdata)=="newvar"] = varName
+  if (type == "bin") {
+    pkg.env$derivedBinary <- merge(pkg.env$derivedBinary, newdata, by="userID", all=TRUE);
+  } else if (type == "cont") {
+    pkg.env$derivedCont <- merge(pkg.env$derivedCont, newdata, by="userID", all=TRUE);
+  } else if (type == "catOrd") {
+    pkg.env$derivedCatOrd <- merge(pkg.env$derivedCatOrd, newdata, by="userID", all=TRUE);
+  } else if (type == "catUnord") {
+    pkg.env$derivedCatUnord <- merge(pkg.env$derivedCatUnord, newdata, by="userID", all=TRUE);
+  }
+}
+
+## makes a smaller data frame containing the data for a particular test
+makeTestDataFrame <- function(datax, confounders, currentVarValues) {
+			thisdata <- datax[,c("geno", "userID")]
+			thisdata <- merge(thisdata, confounders, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+			currentVarValues <- cbind.data.frame(datax$userID, currentVarValues)
+      colnames(currentVarValues)[1] <- "userID"
+			thisdata <- merge(thisdata, currentVarValues, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+			return(thisdata)
+}
 
 # Parse the arguments input by the user
 # if argument 'test' is used then run test phenome scan
diff --git a/PHESANT/R/validatePhenotypeInput.r b/PHESANT/R/validatePhenotypeInput.r
deleted file mode 100644
index cc3ca87..0000000
--- a/PHESANT/R/validatePhenotypeInput.r
+++ /dev/null
@@ -1,85 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Validate the contents of the phenotype file
-validatePhenotypeInput <- function(opt) {
-
-	print("Validating phenotype data ...")
-
-	## get just first row so we can check the column names
-	phenoIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
-	
-	###
-	### pheno file validation
-
-	print(paste("Number of columns in phenotype file: ", ncol(phenoIn),sep=""))
-
-	## check user id exists in pheno file
-	idx1 = which(names(phenoIn) == opt$userId);
-	if (length(idx1)==0) {
-                stop(paste("phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
-        }
-
-	# we only need the confounders if we are actually running the tests
-	if (opt$save==FALSE & is.null(opt$confounderfile)) {
-
-	## confounder variables exist in pheno file
-	idx = which(names(phenoIn) == "x21022_0_0");
-	if (length(idx)==0) {
-                stop("phenotype file doesn't contain required age colunn: x21022_0_0", call.=FALSE)
-        }
-
-	idx = which(names(phenoIn) == "x31_0_0");
-        if (length(idx)==0) {
-                stop("phenotype file doesn't contain required sex colunn: x31_0_0", call.=FALSE)
-        }
-
-	
-	if (opt$genetic ==TRUE) {
-		idx = which(names(phenoIn) == "x22000_0_0");
-        	if (length(idx)==0) {
-        	        stop("phenotype file doesn't contain required genetic batch colunn: x22000_0_0", call.=FALSE)
-        	}	
-	}
-
-	## if running with sensitivity option then check extra columns exist in pheno file (genetic PCs and assessment centre)
-	if (opt$sensitivity==TRUE) {
-
-		if (opt$genetic ==TRUE) {
-			## check first 10 genetic PCs exist
-			for (i in 1:10) {
-				idx = which(names(phenoIn) == paste("x22009_0_", i, sep=""));
-				if (length(idx)==0) {
-         			       stop(paste("phenotype file doesn't contain required genetic principal component colunn: x22009_0_", i, sep=""), call.=FALSE)
-        			}
-			}
-		}
-
-		## assessment centre field
-		idx = which(names(phenoIn) == "x54_0_0");
-        	if (length(idx)==0) {
-        	        stop("phenotype file doesn't contain required assessment centre colunn: x54_0_0", call.=FALSE)
-        	}
-	}
-
-	}
-
-	print("Phenotype file validated")
-
-}
diff --git a/PHESANT/R/validateTraitInput.r b/PHESANT/R/validateTraitInput.r
deleted file mode 100644
index a42c28b..0000000
--- a/PHESANT/R/validateTraitInput.r
+++ /dev/null
@@ -1,69 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Validate the contents of the trait of interest file
-validateTraitInput <- function(opt) {
-
-if (opt$save!=TRUE) {
-
-	print("Validating trait of interest data ...")
-
-	###
-	### get header of trait of interest file or pheno file (if no trait of interest file is specified
-
-	if (is.null(opt$traitofinterestfile)) {
-		snpIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
-	}
-	else {
-		snpIn = read.table(opt$traitofinterestfile, header=1, nrows=1, sep=',')	
-	}
-
-	###
-	### trait of interest file validation
-
-	print(paste("Number of columns in trait of interest file:", ncol(snpIn),sep=""))
-
-	## check user id exists in snp file
-	idx1 = which(names(snpIn) == opt$userId);
-	if (length(idx1)==0) {
-		if (is.null(opt$traitofinterestfile)) {
-			stop(paste("Phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
-		} else {
-			stop(paste("Trait of interest file doesn't contain userID colunn:", opt$userId), call.=FALSE)
-		}
-
-	}
-	
-	## check trait of interest exists in trait of interest file
-	idx2 = which(names(snpIn) == opt$traitofinterest);
-	if (length(idx2)==0) {
-
-		if (is.null(opt$traitofinterestfile)) {
-			stop(paste("No trait of interest file specified, and phenotypes file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
-		}
-		else {
-			stop(paste("Trait of interest file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
-		}
-	}
-
-	print("Trait of interest file validated")
-
-}
-
-}
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 3aa1ff5..0803a90 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -18,7 +18,6 @@
 
 ##
 ## main phenome scan file
-
 ## Updated by Quanli Wang 
 
 library(PHESANT)
@@ -33,110 +32,4 @@ if (length(args) == 0) {
     opt <- processArgs(opt, opt_parser)
 }
 
-## load the files we write to and use
-initCounters()
-if (opt$save==FALSE) {
-	initResultsFiles(opt)
-}
-vl <- initVariableLists(opt)
-
-## load data
-d <- loadData(opt, vl)
-data=d$datax
-confounders=d$confounders
-vl$indicatorFields=d$inds
-
-numPreceedingCols = ncol(confounders)-1+2; # confounders,minus id column, plus trait of interest and user ID
-phenoStartIdx = numPreceedingCols+1;
-
-print("LOADING DONE")
-
-phenoVars=colnames(data);
-# remove user id and age and sex columns
-phenoVars = phenoVars[-c(1,2)]; # first and second columns are the id and snpScore, respectively, as determined in loadData.r
-
-currentVar="";
-currentVarShort="";
-first=TRUE;
-
-if (opt$save == TRUE) {
-    pkg.env$derivedBinary <- data.frame(userID=data$userID)
-    pkg.env$derivedCont <- data.frame(userID=data$userID)
-    pkg.env$derivedCatOrd <- data.frame(userID=data$userID)
-    pkg.env$derivedCatUnord <- data.frame(userID=data$userID)
-    pkg.env$resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
-    sink(pkg.env$resLogFile)
-} else {
-    pkg.env$modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
-  	sink(pkg.env$modelFitLogFile)
-  	sink()
-  	pkg.env$resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
-  	sink(pkg.env$resLogFile)
-}
-
-
-phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
-for (var in phenoVars) { 
-
-
-	sink()
-#	print(var)
-	sink(pkg.env$resLogFile, append=TRUE)
-
-	varx = gsub("^x", "", var);
-        varx = gsub("_[0-9]+$", "", varx);
-	varxShort = gsub("^x", "", var);
-        varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
-
-	## test this variable
-	if (currentVar == varx) {
-		thisCol = data[,eval(var)]
-		thisCol = replaceNaN(thisCol)
-		currentVarValues = cbind.data.frame(currentVarValues, thisCol);
-	}
-	else if (currentVarShort == varxShort) {
-		## different time point of this var so skip
-	}
-	else {
-		## new variable so run test for previous (we have collected all the columns now)
-		if (first==FALSE) {
-
-			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-			testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
-		}
-		
-		first=FALSE;
-		
-		## new variable so set values
-		currentVar = varx;
-		currentVarShort = varxShort;
-
-		currentVarValues = data[,eval(var)]
-		currentVarValues = replaceNaN(currentVarValues)
-	}
-
-	phenoIdx = phenoIdx + 1;
-}
-
-if (phenoIdx>0){
-	# last variable so test association
-	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-	testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
-}
-
-sink()
-
-# save counters of each path in variable flow
-saveCounts(opt)
-
-if (opt$save == TRUE) {
-	write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(pkg.env$derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(pkg.env$derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-	write.table(pkg.env$derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
-}
-
-warnings()
-
-
-
+run(opt)
diff --git a/PHESANT/R/unittests/run-tests.sh b/unittests/run-tests.sh
similarity index 100%
rename from PHESANT/R/unittests/run-tests.sh
rename to unittests/run-tests.sh
diff --git a/PHESANT/R/unittests/test_equalSizedBins.r b/unittests/test_equalSizedBins.r
similarity index 100%
rename from PHESANT/R/unittests/test_equalSizedBins.r
rename to unittests/test_equalSizedBins.r
diff --git a/PHESANT/R/unittests/test_reassignValue.r b/unittests/test_reassignValue.r
similarity index 100%
rename from PHESANT/R/unittests/test_reassignValue.r
rename to unittests/test_reassignValue.r
diff --git a/PHESANT/R/unittests/test_testCatMultiple.r b/unittests/test_testCatMultiple.r
similarity index 100%
rename from PHESANT/R/unittests/test_testCatMultiple.r
rename to unittests/test_testCatMultiple.r
diff --git a/PHESANT/R/unittests/test_testCatSingle.r b/unittests/test_testCatSingle.r
similarity index 100%
rename from PHESANT/R/unittests/test_testCatSingle.r
rename to unittests/test_testCatSingle.r

From 2fadc11ded4e216e633c076c25f4bbb6183d3ce9 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 09:41:32 -0400
Subject: [PATCH 17/27] remove duplicate file

---
 PHESANT/R/loadConfounders.r | 121 ------------------------------------
 1 file changed, 121 deletions(-)
 delete mode 100644 PHESANT/R/loadConfounders.r

diff --git a/PHESANT/R/loadConfounders.r b/PHESANT/R/loadConfounders.r
deleted file mode 100644
index 9fc1c10..0000000
--- a/PHESANT/R/loadConfounders.r
+++ /dev/null
@@ -1,121 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
-# documentation files (the "Software"), to deal in the Software without restriction, including without 
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-# DEALINGS IN THE SOFTWARE.
-
-##
-## loads confounder variables from phenotype file
-
-loadConfounders <- function(opt, phenotypes) {
-    if (opt$save==TRUE) { 
-	      # saving not running tests so we add a fake confounder
-	      numRows = nrow(phenotypes)
-        data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
-        colnames(data)[1] <- "userID"
-        colnames(data)[2] <- "conf1"
-        return(data)
-    } else {
-      	if (!is.null(opt$confounderfile)) {
-      		    print("Loading confounders from confounder file ...")
-      		    confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
-      	      confs = lapply(confs,function(x) type.convert(as.character(x)))
-      	      confs = as.data.frame(confs)
-      
-      		    ## find userID column and change name to userID
-      		    idx = which(colnames(confs) == opt$userId)
-      		    confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
-      		    colnames(confs)[1] <- "userID"
-    	  } else {
-    	        print("Loading confounders from phenotypes file ...")
-              confNames = .getConfounderNames(opt)
-              #####
-    	        ##### extract confounders from data file
-              confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
-    	        confs = lapply(confs,function(x) type.convert(as.character(x)))
-    	        confs = as.data.frame(confs)
-    
-              #####
-    	        ##### process genetic batch to create genetic chip variable
-              if (opt$genetic == TRUE) {
-                    genoBatch = confs[,"x22000_0_0"]
-                    # chip comes from batch field 22000
-                    genoChip = rep.int(NA,nrow(confs));
-                    idxForVar = which(genoBatch<0);
-                    genoChip[idxForVar] = 0;
-                    idxForVar = which(genoBatch>=0 & genoBatch<2000);
-                    genoChip[idxForVar] = 1;
-    
-    		            # remove geno batch from and add geno chip to confounders
-                    confs = confs[,-which(names(confs) == "x22000_0_0")]
-                    confs = cbind.data.frame(confs, genoChip)
-              }
-
-	            #####
-            	##### Convert assessment centre to an indicator variable
-	            if (opt$sensitivity==TRUE) {
-		                confs$x54_0_0 = as.factor(confs$x54_0_0)
-		                assCentre = model.matrix(~confs$x54_0_0)
-		                assCentre = assCentre[,2:ncol(assCentre)]
-		                confs = cbind(confs, assCentre)
-		                confs$x54_0_0 = NULL
-	            }
-	            colnames(confs)[1] <- "userID"
-
-	      }
-
-	      # remove any rows with no values
-	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
-	      confsComp = complete.cases(confs)
-	      print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
-	      confs = confs[confsComp==TRUE,]
-	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
-
-	      print("Confounder columns:")
-	      print(names(confs))
-
-	      return(confs)
-    }
-}
-
-.getConfounderNames <- function(opt) {
-    #####
-	  ##### first get vector of confounder names
-    # age and sex
-    confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
-    
-    # if genetic trait of interest then adjust for genotype chip
-    # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
-    if (opt$genetic == TRUE) {
-          confNames = append(confNames, "x22000_0_0")
-          if (opt$sensitivity==TRUE) {
-                confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
-                print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
-          } else {
-                print("Adjusting for age, sex and genotype chip")
-          }
-    } else {
-          # non genetic trait of interest, then sensitivity adjusts for assessment center
-          if (opt$sensitivity==TRUE) {
-                confNames = append(confNames, "x54_0_0")
-                print("Adjusting for age, sex and assessment centre")
-          } else {
-                print("Adjusting for age and sex")
-          }
-    }
-	  return(confNames)
-}
-
-

From f9c36bacf7cb93322422053ff125ca3dc2daaca2 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 11:10:35 -0400
Subject: [PATCH 18/27] Code clean and Rd for input file heders

---
 PHESANT/R/counters.r                        |  2 +-
 PHESANT/R/init.r                            | 23 ++++----
 PHESANT/R/phenotypes.r                      | 22 +++-----
 PHESANT/R/run.r                             | 22 +++-----
 PHESANT/R/traitOfInterest.r                 | 61 ++++++++++-----------
 PHESANT/man/initVariableLists.Rd            | 31 +++++++++++
 PHESANT/man/validatePhenotypeInputHeader.Rd | 36 ++++++++++++
 PHESANT/man/validateTraitInputHeader.Rd     | 28 ++++++++++
 8 files changed, 154 insertions(+), 71 deletions(-)
 create mode 100644 PHESANT/man/initVariableLists.Rd
 create mode 100644 PHESANT/man/validatePhenotypeInputHeader.Rd
 create mode 100644 PHESANT/man/validateTraitInputHeader.Rd

diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
index 31a586b..ec47a29 100644
--- a/PHESANT/R/counters.r
+++ b/PHESANT/R/counters.r
@@ -50,6 +50,6 @@ saveCounts <- function(opt) {
 }
 
 # init the counters used to determine how many variables took each path in the variable processing flow.
-initCounters <- function() {
+.initCounters <- function() {
   pkg.env$counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
 }
\ No newline at end of file
diff --git a/PHESANT/R/init.r b/PHESANT/R/init.r
index abcce78..05d66ba 100644
--- a/PHESANT/R/init.r
+++ b/PHESANT/R/init.r
@@ -19,11 +19,11 @@
 
 initData <-function(opt) {
     ## load the files we write to and use
-    initCounters()
+    .initCounters()
     if (opt$save==FALSE) {
-      initResultsFiles(opt)
+        .initResultsFiles(opt)
     }
-    vl <- initVariableLists(opt)
+    vl <- initVariableLists(opt$variablelistfile, opt$datacodingfile)
     
     ## load data
     d <- loadData(opt, vl)
@@ -38,7 +38,7 @@ initData <-function(opt) {
     return(list(data = data, vl = vl, confounders = confounders, phenoStartIdx = phenoStartIdx, phenoVars = phenoVars))
 }
 
-initEnv <- function(opt, data) {
+.initEnv <- function(opt, data) {
     if (opt$save == TRUE) {
         pkg.env$derivedBinary <- data.frame(userID=data$userID)
         pkg.env$derivedCont <- data.frame(userID=data$userID)
@@ -55,7 +55,7 @@ initEnv <- function(opt, data) {
     }
 }
 # create new results files and headers
-initResultsFiles <- function(opt) {
+.initResultsFiles <- function(opt) {
 
 	## only linear and continuous fields can create linear results
 	file.create(paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt",sep=""));
@@ -76,14 +76,11 @@ initResultsFiles <- function(opt) {
 }
 
 # load the variable information and data code information files
-initVariableLists <- function(opt) {
-
-	phenoInfo=read.table(opt$variablelistfile,sep="\t",header=1,comment.char="",quote="");
-
-	dataCodeInfo=read.table(opt$datacodingfile,sep=",", header=1);
-
-	vars=list(phenoInfo=phenoInfo, dataCodeInfo=dataCodeInfo);
-	return(vars);
+initVariableLists <- function(variablelistfile, datacodingfile) {
+	  phenoInfo <- read.table(variablelistfile,sep="\t",header=1,comment.char="",quote="")
+	  dataCodeInfo <- read.table(datacodingfile,sep=",", header=1)
+	  vars <- list(phenoInfo=phenoInfo, dataCodeInfo=dataCodeInfo)
+	  return(vars)
 }
 
 
diff --git a/PHESANT/R/phenotypes.r b/PHESANT/R/phenotypes.r
index 16b28e8..06a970c 100644
--- a/PHESANT/R/phenotypes.r
+++ b/PHESANT/R/phenotypes.r
@@ -16,9 +16,7 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 # DEALINGS IN THE SOFTWARE.
 
-##
 ## load phenotypes from phenotype file
-
 loadPhenotypes <- function(opt) {
     ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
     if (opt$varTypeArg!="all") {
@@ -111,14 +109,13 @@ loadPhenotypes <- function(opt) {
 # Validate the contents of the phenotype file
 validatePhenotypeInputHeader <- function(opt) {
   	print("Validating phenotype data ...")
-  
   	## get just first row so we can check the column names
-  	phenoIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+  	phenoIn <- read.table(opt$phenofile, header=1, nrows=1, sep=',')
   	
   	### pheno file validation
   	print(paste("Number of columns in phenotype file: ", ncol(phenoIn),sep=""))
   	## check user id exists in pheno file
-  	idx1 = which(names(phenoIn) == opt$userId);
+  	idx1 <- which(names(phenoIn) == opt$userId)
   	if (length(idx1)==0) {
         stop(paste("phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
     }
@@ -126,18 +123,18 @@ validatePhenotypeInputHeader <- function(opt) {
   	# we only need the confounders if we are actually running the tests
   	if (opt$save==FALSE & is.null(opt$confounderfile)) {
       	## confounder variables exist in pheno file
-      	idx = which(names(phenoIn) == "x21022_0_0");
+      	idx <- which(names(phenoIn) == "x21022_0_0")
       	if (length(idx)==0) {
             stop("phenotype file doesn't contain required age colunn: x21022_0_0", call.=FALSE)
-        }
-      
-      	idx = which(names(phenoIn) == "x31_0_0");
+      	}
+      	
+      	idx <- which(names(phenoIn) == "x31_0_0")
         if (length(idx)==0) {
             stop("phenotype file doesn't contain required sex colunn: x31_0_0", call.=FALSE)
         }
       
       	if (opt$genetic ==TRUE) {
-      		  idx = which(names(phenoIn) == "x22000_0_0");
+      		  idx <- which(names(phenoIn) == "x22000_0_0")
           	if (length(idx)==0) {
           	    stop("phenotype file doesn't contain required genetic batch colunn: x22000_0_0", call.=FALSE)
           	}	
@@ -148,19 +145,18 @@ validatePhenotypeInputHeader <- function(opt) {
         		if (opt$genetic ==TRUE) {
           			## check first 10 genetic PCs exist
           			for (i in 1:10) {
-            				idx = which(names(phenoIn) == paste("x22009_0_", i, sep=""));
+            				idx <- which(names(phenoIn) == paste("x22009_0_", i, sep=""))
             				if (length(idx)==0) {
                			    stop(paste("phenotype file doesn't contain required genetic principal component colunn: x22009_0_", i, sep=""), call.=FALSE)
               			}
           			}
         		}
         		## assessment centre field
-        		idx = which(names(phenoIn) == "x54_0_0");
+        		idx <- which(names(phenoIn) == "x54_0_0")
           	if (length(idx)==0) {
           	    stop("phenotype file doesn't contain required assessment centre colunn: x54_0_0", call.=FALSE)
           	}
         }
-  
   	}
   	print("Phenotype file validated")
 }
diff --git a/PHESANT/R/run.r b/PHESANT/R/run.r
index 70ed305..cda7a06 100644
--- a/PHESANT/R/run.r
+++ b/PHESANT/R/run.r
@@ -1,20 +1,16 @@
 run <- function(opt) {
     input <- initData(opt)
-    data <- input$data
-    vl <- input$vl
-    confounders <- input$confounders
-    phenoStartIdx <- input$phenoStartIdx
-    phenoVars <- input$phenoVars
     print("LOADING DONE")
     
-    initEnv(opt, data)
+    #initilize package level variables, primarily for passing variables between functions and logging
+    .initEnv(opt, input$data)
     
     currentVar <- ""
     currentVarShort <- ""
     first <- TRUE
     
     phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
-    for (var in phenoVars) { 
+    for (var in input$phenoVars) { 
         sink()
         sink(pkg.env$resLogFile, append=TRUE)
         
@@ -26,7 +22,7 @@ run <- function(opt) {
         
         ## test this variable
         if (currentVar == varx) {
-          thisCol <- data[,eval(var)]
+          thisCol <- input$data[,eval(var)]
           thisCol <- .replaceNaN(thisCol)
           currentVarValues <- cbind.data.frame(currentVarValues, thisCol)
         } else if (currentVarShort == varxShort) {
@@ -34,8 +30,8 @@ run <- function(opt) {
         } else {
           ## new variable so run test for previous (we have collected all the columns now)
           if (first==FALSE) {
-            thisdata <- makeTestDataFrame(data, confounders, currentVarValues)
-            testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
+            thisdata <- makeTestDataFrame(input$data, input$confounders, currentVarValues)
+            testAssociations(opt, input$vl, currentVar, currentVarShort, thisdata, input$phenoStartIdx)
           }
           first <- FALSE
           
@@ -43,7 +39,7 @@ run <- function(opt) {
           currentVar <- varx
           currentVarShort <- varxShort
           
-          currentVarValues <- data[,eval(var)]
+          currentVarValues <- input$data[,eval(var)]
           currentVarValues <- .replaceNaN(currentVarValues)
         }
         phenoIdx <- phenoIdx + 1
@@ -51,8 +47,8 @@ run <- function(opt) {
     
     if (phenoIdx>0){
         # last variable so test association
-        thisdata = makeTestDataFrame(data, confounders, currentVarValues)
-        testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
+        thisdata = makeTestDataFrame(input$data, input$confounders, currentVarValues)
+        testAssociations(opt, input$vl, currentVar, currentVarShort, thisdata, input$phenoStartIdx)
     }
     sink()
     
diff --git a/PHESANT/R/traitOfInterest.r b/PHESANT/R/traitOfInterest.r
index 537686e..8d6ea10 100644
--- a/PHESANT/R/traitOfInterest.r
+++ b/PHESANT/R/traitOfInterest.r
@@ -47,37 +47,36 @@ loadTraitOfInterest <- function(opt, phenotypes) {
 # Validate the contents of the trait of interest file
 validateTraitInputHeader <- function(opt) {
     if (opt$save!=TRUE) {
+      	### get header of trait of interest file or pheno file (if no trait of interest file is specified
+        print("Validating trait of interest data ...")
+      	if (is.null(opt$traitofinterestfile)) {
+      		  snpIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+      	} else {
+      		  snpIn = read.table(opt$traitofinterestfile, header=1, nrows=1, sep=',')	
+      	}
       
-    	### get header of trait of interest file or pheno file (if no trait of interest file is specified
-      print("Validating trait of interest data ...")
-    	if (is.null(opt$traitofinterestfile)) {
-    		  snpIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
-    	} else {
-    		  snpIn = read.table(opt$traitofinterestfile, header=1, nrows=1, sep=',')	
-    	}
-    
-    	### trait of interest file validation
-    	print(paste("Number of columns in trait of interest file:", ncol(snpIn),sep=""))
-    	## check user id exists in snp file
-    	idx1 = which(names(snpIn) == opt$userId);
-    	if (length(idx1)==0) {
-      		if (is.null(opt$traitofinterestfile)) {
-      			stop(paste("Phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
-      		} else {
-      			stop(paste("Trait of interest file doesn't contain userID colunn:", opt$userId), call.=FALSE)
-      		}
-    	}
-    	
-    	## check trait of interest exists in trait of interest file
-    	idx2 = which(names(snpIn) == opt$traitofinterest);
-    	if (length(idx2)==0) {
-      		if (is.null(opt$traitofinterestfile)) {
-      			  stop(paste("No trait of interest file specified, and phenotypes file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
-      		} else {
-      			  stop(paste("Trait of interest file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
-      		}
-    	}
-    	print("Trait of interest file validated")
-    }
+      	### trait of interest file validation
+      	print(paste("Number of columns in trait of interest file:", ncol(snpIn),sep=""))
+      	## check user id exists in snp file
+      	idx1 = which(names(snpIn) == opt$userId);
+      	if (length(idx1)==0) {
+        		if (is.null(opt$traitofinterestfile)) {
+        			stop(paste("Phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+        		} else {
+        			stop(paste("Trait of interest file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+        		}
+      	}
+      	
+      	## check trait of interest exists in trait of interest file
+      	idx2 = which(names(snpIn) == opt$traitofinterest);
+      	if (length(idx2)==0) {
+        		if (is.null(opt$traitofinterestfile)) {
+        			  stop(paste("No trait of interest file specified, and phenotypes file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+        		} else {
+        			  stop(paste("Trait of interest file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+        		}
+      	}
+      	print("Trait of interest file validated")
+      }
 }
 
diff --git a/PHESANT/man/initVariableLists.Rd b/PHESANT/man/initVariableLists.Rd
new file mode 100644
index 0000000..26a7e5a
--- /dev/null
+++ b/PHESANT/man/initVariableLists.Rd
@@ -0,0 +1,31 @@
+\name{initVariableLists}
+\alias{initVariableLists}
+\title{
+Load the variable information and data code information files
+}
+\description{
+Load the variable information and data code information files
+}
+\usage{
+initVariableLists(variablelistfile, datacodingfile)
+}
+\arguments{
+  \item{variablelistfile}{
+  the input phenotype variable list file
+}
+  \item{datacodingfile}{
+  the input data coding file for the DATA_CODING field for phenotypre
+}
+}
+\details{
+Load the variable information and data code information files
+}
+\value{
+  return a list contain with two variables:
+  \item{phenoInfo }{A data frame that holds information about phenotype varibles from UKB}
+  \item{dataCodeInfo }{A data frame that provides further information about the DATA_CODING field of a phenotype from phenoInfo }
+}
+
+\examples{
+  #vl <- initVariableLists(opt$variablelistfile, opt$datacodingfile)
+}
diff --git a/PHESANT/man/validatePhenotypeInputHeader.Rd b/PHESANT/man/validatePhenotypeInputHeader.Rd
new file mode 100644
index 0000000..c4a8e4e
--- /dev/null
+++ b/PHESANT/man/validatePhenotypeInputHeader.Rd
@@ -0,0 +1,36 @@
+\name{validatePhenotypeInputHeader}
+\alias{validatePhenotypeInputHeader}
+\title{
+Validate the contents of the phenotype file
+}
+\description{
+Validate the contents of the phenotype file
+}
+\usage{
+validatePhenotypeInputHeader(opt)
+}
+\arguments{
+  \item{opt}{
+  The list of input options provided by user.
+}
+}
+\details{
+This function validates the input phenotype file based on user options. 
+\enumerate{
+    \item check if user id field exists in pheno file
+    \item check if phenotype file contains the required age colunn: x21022_0_0
+    \item check if phenotype file contains the required sex colunn: x31_0_0
+    \item check if phenotype file contains the required genetic batch colunn: x22000_0_0, when genetic option is used
+    \item check if phenotype file contains the required genetic principal component colunns(1 to 10): x22009_0_, when sensitivity and genetic options are used
+    \item check if phenotype file contains the required assessment centre colunn: x54_0_0, when sensitivity option is used
+    
+  }
+}
+\value{
+  No return values and reports error and stops the program if the validation fails.
+}
+
+
+\examples{
+#validatePhenotypeInputHeader(opt)
+}
diff --git a/PHESANT/man/validateTraitInputHeader.Rd b/PHESANT/man/validateTraitInputHeader.Rd
new file mode 100644
index 0000000..74b640d
--- /dev/null
+++ b/PHESANT/man/validateTraitInputHeader.Rd
@@ -0,0 +1,28 @@
+\name{validateTraitInputHeader}
+\alias{validateTraitInputHeader}
+\title{
+Validate the contents of the trait of interest file
+}
+\description{
+Validate the contents of the trait of interest file.
+}
+\usage{
+validateTraitInputHeader(opt)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+}
+\details{
+This function validates the contents of the trait of interest file based on user options. A trait of interest ca nbe either from phrenotype file or trait of interest file.
+\enumerate{
+    \item check if user id field exists in pheno file or traint of interest file if the trait of interest file is provided.
+    \item check if phenotype file or trait of interest file contains the required trait of interest variable column.
+  }
+}
+\value{
+No return values and reports error and stops the program if the validation fails.
+}
+
+\examples{
+#validateTraitInputHeader(opt)
+}

From e7d6ecfe92b023217327b54da4addbd001cfbbf9 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 14:19:39 -0400
Subject: [PATCH 19/27] Code refactor and documentation

---
 PHESANT/R/binaryLogisticRegression.r | 14 ++---
 PHESANT/R/cleanData.R                | 39 ++++++------
 PHESANT/R/confounders.r              | 68 ++++++++++-----------
 PHESANT/R/counters.r                 |  6 +-
 PHESANT/R/indicatorFields.r          | 86 +++++++++++++-------------
 PHESANT/R/init.r                     |  2 +-
 PHESANT/R/loadData.r                 |  9 +--
 PHESANT/R/phenotypes.r               | 90 ++++++++++++++++------------
 PHESANT/R/run.r                      |  2 +-
 PHESANT/R/testAssociations.r         | 30 +++++-----
 PHESANT/R/testCatMultiple.r          |  6 +-
 PHESANT/R/testCatSingle.r            | 12 ++--
 PHESANT/R/testCategoricalOrdered.r   | 12 ++--
 PHESANT/R/testCategoricalUnordered.r | 12 ++--
 PHESANT/R/testContinuous.r           | 30 +++++-----
 PHESANT/R/testInteger.r              | 10 ++--
 PHESANT/R/testNumExamples.r          | 37 ------------
 PHESANT/R/traitOfInterest.r          |  7 +--
 PHESANT/man/initData.Rd              | 29 +++++++++
 PHESANT/man/loadConfounders.Rd       | 32 ++++++++++
 PHESANT/man/loadData.Rd              | 37 ++++++++++++
 PHESANT/man/loadPhenotypes.Rd        | 25 ++++++++
 PHESANT/man/loadTraitOfInterest.Rd   | 25 ++++++++
 23 files changed, 365 insertions(+), 255 deletions(-)
 delete mode 100644 PHESANT/R/testNumExamples.r
 create mode 100644 PHESANT/man/initData.Rd
 create mode 100644 PHESANT/man/loadConfounders.Rd
 create mode 100644 PHESANT/man/loadData.Rd
 create mode 100644 PHESANT/man/loadPhenotypes.Rd
 create mode 100644 PHESANT/man/loadTraitOfInterest.Rd

diff --git a/PHESANT/R/binaryLogisticRegression.r b/PHESANT/R/binaryLogisticRegression.r
index 1112377..705b9b1 100644
--- a/PHESANT/R/binaryLogisticRegression.r
+++ b/PHESANT/R/binaryLogisticRegression.r
@@ -31,7 +31,7 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
 	if (length(facLevels)!=2) {
 		#stop(paste("Not 2 levels: ", length(facLevels), " || ", sep=""))
 		cat("BINARY-NOT2LEVELS- (", length(facLevels), ") || ",sep="");
-                incrementCounter("binary.nottwolevels")
+                .incrementCounter("binary.nottwolevels")
 	}
 
 	idxTrue = length(which(phenoFactor==facLevels[1]))
@@ -40,11 +40,11 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
   
         if (idxTrue<10 || idxFalse<10) {
 		cat("BINARY-LOGISTIC-SKIP-10 (", idxTrue, "/", idxFalse, ") || ", sep="")
-          incrementCounter("binary.10")
+          .incrementCounter("binary.10")
 	}
 	else if (numNotNA<500) {	
 		cat("BINARY-LOGISTIC-SKIP-500 (", numNotNA, ") || ",sep="");
-	  incrementCounter("binary.500")
+	  .incrementCounter("binary.500")
        	}
 	else {
 
@@ -54,7 +54,7 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
 			# add pheno to dataframe
 			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
 			cat("SUCCESS results-logistic-binary ");			
-			incrementCounter("success.binary")
+			.incrementCounter("success.binary")
 		}
 		else {
 
@@ -102,10 +102,10 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
                 write(paste(varName,varType,paste(idxTrue,"/",idxFalse,"(",numNotNA,")",sep=""), beta,lower,upper,pvalue, sep=","), file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
                 cat("SUCCESS results-logistic-binary ");
                 
-                incrementCounter("success.binary")
+                .incrementCounter("success.binary")
 
 		if (isExposure==TRUE) {
-		  incrementCounter("success.exposure.binary")
+		  .incrementCounter("success.exposure.binary")
 	        }	
 
 		## END TRYCATCH
@@ -113,7 +113,7 @@ binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure
                         sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        incrementCounter("binary.error")
+                        .incrementCounter("binary.error")
                 })
 		}	
 	}
diff --git a/PHESANT/R/cleanData.R b/PHESANT/R/cleanData.R
index 7f27c4d..adf98ae 100644
--- a/PHESANT/R/cleanData.R
+++ b/PHESANT/R/cleanData.R
@@ -59,28 +59,6 @@ reassignValue <- function(vl, pheno, varName) {
     return(pheno)
 }
 
-fixOddFieldsToCatMul <- function(vl, data) {
-    # examples are variables: 40006, 40011, 40012, 40013
-    # get all variables that need their instances changing to arrays
-    dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
-    for (i in 1:nrow(dataPheno)) {
-      varID = dataPheno[i,]$FieldID;		
-      varidString = paste("x",varID,"_", sep="");			
-      
-      # get all columns in data dataframe for this variable	
-      colIdxs = which(grepl(varidString,names(data)));
-      
-      # change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
-      count = 0;
-      for (j in colIdxs) {	
-        colnames(data)[j] <- paste(varidString, "0_", count, sep="")
-        count = count + 1;
-      }				
-    }
-    return(data)
-}
-
-
 # Replace NaN and empty values with NA in pheno
 .replaceNaN <- function(pheno) {
   	if (is.factor(pheno)) {
@@ -98,3 +76,20 @@ fixOddFieldsToCatMul <- function(vl, data) {
   	}
   	return(phenoReplaced)
 }
+
+.testNumExamples <- function(pheno) {
+  	## loop through values and remove if has < 10 examples
+    uniqVar <- unique(na.omit(pheno))
+    for (u in uniqVar) {
+        withValIdx <- which(pheno==u)
+        numWithVal <- length(withValIdx);
+        if (numWithVal<10) {
+            pheno[withValIdx] <- NA
+        	  cat(paste("Removed ",u ,": ", numWithVal, "<10 examples || ", sep=""));
+        } else {
+        	  cat(paste("Inc(>=10): ", u, "(", numWithVal, ") || ", sep=""));
+        }
+    }
+  	return(pheno)
+}
+
diff --git a/PHESANT/R/confounders.r b/PHESANT/R/confounders.r
index 9fc1c10..c2fa2ce 100644
--- a/PHESANT/R/confounders.r
+++ b/PHESANT/R/confounders.r
@@ -22,86 +22,80 @@
 loadConfounders <- function(opt, phenotypes) {
     if (opt$save==TRUE) { 
 	      # saving not running tests so we add a fake confounder
-	      numRows = nrow(phenotypes)
-        data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+	      numRows <- nrow(phenotypes)
+        data <- cbind.data.frame(phenotypes$userID, rep(-1, numRows))
         colnames(data)[1] <- "userID"
         colnames(data)[2] <- "conf1"
         return(data)
     } else {
       	if (!is.null(opt$confounderfile)) {
       		    print("Loading confounders from confounder file ...")
-      		    confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
-      	      confs = lapply(confs,function(x) type.convert(as.character(x)))
-      	      confs = as.data.frame(confs)
+      		    confs <- fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
+      	      confs <- lapply(confs,function(x) type.convert(as.character(x)))
+      	      confs <- as.data.frame(confs)
       
       		    ## find userID column and change name to userID
-      		    idx = which(colnames(confs) == opt$userId)
-      		    confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
+      		    idx <- which(colnames(confs) == opt$userId)
+      		    confs <- confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
       		    colnames(confs)[1] <- "userID"
     	  } else {
     	        print("Loading confounders from phenotypes file ...")
-              confNames = .getConfounderNames(opt)
-              #####
+              confNames <- .getConfounderNames(opt)
+              
     	        ##### extract confounders from data file
-              confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
-    	        confs = lapply(confs,function(x) type.convert(as.character(x)))
-    	        confs = as.data.frame(confs)
-    
-              #####
+              confs <- fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
+    	        confs <- lapply(confs,function(x) type.convert(as.character(x)))
+    	        confs <- as.data.frame(confs)
+
     	        ##### process genetic batch to create genetic chip variable
               if (opt$genetic == TRUE) {
-                    genoBatch = confs[,"x22000_0_0"]
+                    genoBatch <- confs[,"x22000_0_0"]
                     # chip comes from batch field 22000
-                    genoChip = rep.int(NA,nrow(confs));
-                    idxForVar = which(genoBatch<0);
-                    genoChip[idxForVar] = 0;
-                    idxForVar = which(genoBatch>=0 & genoBatch<2000);
-                    genoChip[idxForVar] = 1;
+                    genoChip <- rep.int(NA,nrow(confs))
+                    idxForVar <- which(genoBatch<0)
+                    genoChip[idxForVar] <- 0
+                    idxForVar <- which(genoBatch>=0 & genoBatch<2000)
+                    genoChip[idxForVar] <- 1
     
     		            # remove geno batch from and add geno chip to confounders
-                    confs = confs[,-which(names(confs) == "x22000_0_0")]
-                    confs = cbind.data.frame(confs, genoChip)
+                    confs <- confs[,-which(names(confs) == "x22000_0_0")]
+                    confs <- cbind.data.frame(confs, genoChip)
               }
 
-	            #####
             	##### Convert assessment centre to an indicator variable
 	            if (opt$sensitivity==TRUE) {
-		                confs$x54_0_0 = as.factor(confs$x54_0_0)
-		                assCentre = model.matrix(~confs$x54_0_0)
-		                assCentre = assCentre[,2:ncol(assCentre)]
-		                confs = cbind(confs, assCentre)
+		                confs$x54_0_0 <- as.factor(confs$x54_0_0)
+		                assCentre <- model.matrix(~confs$x54_0_0)
+		                assCentre <- assCentre[,2:ncol(assCentre)]
+		                confs <- cbind(confs, assCentre)
 		                confs$x54_0_0 = NULL
 	            }
 	            colnames(confs)[1] <- "userID"
-
 	      }
 
 	      # remove any rows with no values
 	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
-	      confsComp = complete.cases(confs)
+	      confsComp <- complete.cases(confs)
 	      print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
-	      confs = confs[confsComp==TRUE,]
+	      confs <- confs[confsComp==TRUE,]
 	      print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
 
 	      print("Confounder columns:")
 	      print(names(confs))
-
 	      return(confs)
     }
 }
 
 .getConfounderNames <- function(opt) {
-    #####
 	  ##### first get vector of confounder names
     # age and sex
-    confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
-    
+    confNames <- c(opt$userId, "x21022_0_0", "x31_0_0")
     # if genetic trait of interest then adjust for genotype chip
     # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
     if (opt$genetic == TRUE) {
-          confNames = append(confNames, "x22000_0_0")
+          confNames <- append(confNames, "x22000_0_0")
           if (opt$sensitivity==TRUE) {
-                confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
+                confNames <- append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
                 print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
           } else {
                 print("Adjusting for age, sex and genotype chip")
@@ -109,7 +103,7 @@ loadConfounders <- function(opt, phenotypes) {
     } else {
           # non genetic trait of interest, then sensitivity adjusts for assessment center
           if (opt$sensitivity==TRUE) {
-                confNames = append(confNames, "x54_0_0")
+                confNames <- append(confNames, "x54_0_0")
                 print("Adjusting for age, sex and assessment centre")
           } else {
                 print("Adjusting for age and sex")
diff --git a/PHESANT/R/counters.r b/PHESANT/R/counters.r
index ec47a29..d5a061d 100644
--- a/PHESANT/R/counters.r
+++ b/PHESANT/R/counters.r
@@ -17,7 +17,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # adds given value to counter, that are used to count how many variables go down each route in the data flow
-addToCounts <- function(countName, num) {
+.addToCounts <- function(countName, num) {
     idx = which(pkg.env$counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
@@ -29,7 +29,7 @@ addToCounts <- function(countName, num) {
 }
 
 # increments counters used to count how many variables go down each route in the data flow
-incrementCounter <- function(countName) {
+.incrementCounter <- function(countName) {
     idx = which(pkg.env$counters$name==countName)
     if (length(idx)==0) {
         # counter does not exist so add with countValue 1
@@ -41,7 +41,7 @@ incrementCounter <- function(countName) {
 }
 
 # Saves the counters stored in count variables, to a file in results directory
-saveCounts <- function(opt) {
+.saveCounts <- function(opt) {
     countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
     # sort on counter name
     sortIdx = order(as.character(pkg.env$counters[,"name"]))
diff --git a/PHESANT/R/indicatorFields.r b/PHESANT/R/indicatorFields.r
index 04c860a..2a0178b 100644
--- a/PHESANT/R/indicatorFields.r
+++ b/PHESANT/R/indicatorFields.r
@@ -18,13 +18,12 @@
 
 ##
 ## load data used for data code default value related field, and categorical multiple indicator field
-loadIndicatorFields <- function(opt, vl, phenosToTest) {
+.loadIndicatorFields <- function(opt, vl, phenosToTest) {
   	print("Loading indicator fields from phenotypes file ...")
-  
   	# read pheno file column names
-    phenoVarsAll = colnames(read.table(opt$phenofile, header=1, nrows=1, sep=','))
-    phenoVarsAll = phenoVarsAll[which(phenoVarsAll!=opt$userId)]
-  	indVars = c(opt$userId)
+    phenoVarsAll <- colnames(read.table(opt$phenofile, header=1, nrows=1, sep=','))
+    phenoVarsAll <- phenoVarsAll[which(phenoVarsAll!=opt$userId)]
+  	indVars <- c(opt$userId)
   
   	## add indicator variables to pheno data
   	indVars = .addIndicatorVariables(vl, indVars, phenosToTest, phenoVarsAll)
@@ -37,8 +36,8 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
   	}
   
     ## read in the right table columns
-    data = fread(opt$phenofile, select=indVars, sep=',', header=TRUE, data.table=FALSE)
-  	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+    data <- fread(opt$phenofile, select=indVars, sep=',', header=TRUE, data.table=FALSE)
+  	data <- data.frame(lapply(data,function(x) type.convert(as.character(x))))
   	colnames(data)[1] <- "userID"
   	return(data)
 }
@@ -48,30 +47,29 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
 
   	##### default value related fields for data codes
   	# get list of all indicator variables from outcome info file
-  
   	# get datacodes with an indicator variable
-  	dataCodeIdx = which(!is.na(vl$dataCodeInfo$default_related_field) & vl$dataCodeInfo$default_related_field!="")
+  	dataCodeIdx <- which(!is.na(vl$dataCodeInfo$default_related_field) & vl$dataCodeInfo$default_related_field!="")
   
   	# whether there are any related fields that are not in the phenotype data file when they should be
-    hasIssue=FALSE
+    hasIssue <- FALSE
   
   	if (length(dataCodeIdx)>0) {
-      	dataCodeWithRF = vl$dataCodeInfo[dataCodeIdx,]
-      	defaultFields = c()
+      	dataCodeWithRF <- vl$dataCodeInfo[dataCodeIdx,]
+      	defaultFields <- c()
       
       	# check there is a field in the phenotypes data for each data code and if so then add this data codes related field to the phenosToTest list
       	if (nrow(dataCodeWithRF)>0) {
           	for (i in 1:nrow(dataCodeWithRF)) {
-            		dc = dataCodeWithRF$dataCode[i]
+            		dc <- dataCodeWithRF$dataCode[i]
             
             		# get all fields with this datacode
-            		fieldsIdx = which(vl$phenoInfo$DATA_CODING == dc)
-            		fieldIDs = vl$phenoInfo$FieldID[fieldsIdx]
-            		fieldIDs = paste("x",fieldIDs,"_0_0", sep="")
+            		fieldsIdx <- which(vl$phenoInfo$DATA_CODING == dc)
+            		fieldIDs <- vl$phenoInfo$FieldID[fieldsIdx]
+            		fieldIDs <- paste("x",fieldIDs,"_0_0", sep="")
             
             		# datacode related field
-            		rf = dataCodeWithRF$default_related_field[i]
-            		rf = paste("x",rf,"_0_0", sep="")
+            		rf <- dataCodeWithRF$default_related_field[i]
+            		rf <- paste("x",rf,"_0_0", sep="")
             
             		# if one of these field IDs are in phenotypeColumns then data code related field is needed
             		if (length(intersect(fieldIDs, phenosToTest))>0) {
@@ -80,15 +78,15 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
       	    }
   	    }
   
-  	    defaultFields = unique(defaultFields)
-  	    indVars = append(indVars, defaultFields)
+  	    defaultFields <- unique(defaultFields)
+  	    indVars <- append(indVars, defaultFields)
   
   	    ##### check these required variables exist in phenotype file
         if(length(defaultFields)>0) {
             for (i in 1:length(defaultFields)) {
                 if (!(defaultFields[i] %in% phenoVarsAll)) {
                     print(paste("Required variable: Field ",defaultFields[i],"is a data code related field (default_related_field column in data code information file) but was not found in phenotype data"))
-                    hasIssue=TRUE
+                    hasIssue <- TRUE
   			        }
             }
         }
@@ -96,45 +94,43 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
   
   	##### categorical multiple indicator fields
   	# get field info, for fields with cat mult indicator fields
-          fieldsIdx = which(!is.na(vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS))
+          fieldsIdx <- which(!is.na(vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS))
   
   	if (length(fieldsIdx)>0) {
-        fieldsWithCMIF = vl$phenoInfo[fieldsIdx,]
-        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "NO_NAN"),]
-        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "ALL"),]
-        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == ""),]
+        fieldsWithCMIF <- vl$phenoInfo[fieldsIdx,]
+        fieldsWithCMIF <- fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "NO_NAN"),]
+        fieldsWithCMIF <- fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "ALL"),]
+        fieldsWithCMIF <- fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == ""),]
   
   		  if (nrow(fieldsWithCMIF)>0) {
         		# turn into variable format not field ID
-        		fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS = paste("x",fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS,"_0_0", sep="")
-        		fieldsWithCMIF$FieldID = paste("x",fieldsWithCMIF$FieldID,"_", sep="")
-        		phenosToTestIds = sub("_.*", "_", phenosToTest)
+        		fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS <- paste("x",fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS,"_0_0", sep="")
+        		fieldsWithCMIF$FieldID <- paste("x",fieldsWithCMIF$FieldID,"_", sep="")
+        		phenosToTestIds <- sub("_.*", "_", phenosToTest)
         	
         		# remove rows where the field isn't in the phenotypes list
-        		idxIn = which(fieldsWithCMIF$FieldID %in% phenosToTestIds)
-        		fieldsWithCMIF = fieldsWithCMIF[idxIn,]
+        		idxIn <- which(fieldsWithCMIF$FieldID %in% phenosToTestIds)
+        		fieldsWithCMIF <- fieldsWithCMIF[idxIn,]
         
-        		defaultFields = fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS
-        		defaultFields = unique(defaultFields)
+        		defaultFields <- fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS
+        		defaultFields <- unique(defaultFields)
         
-        	        #####
         		##### remove those that already exist in phenotypes 'part'
-        		idxExists = which(defaultFields %in% indVars)
+        		idxExists <- which(defaultFields %in% indVars)
         		if (length(idxExists>0)) {
-        		        defaultFields = defaultFields[-idxExists]
+        		    defaultFields <- defaultFields[-idxExists]
         		}
         	
-        		indVars = unique(append(indVars, defaultFields))
+        		indVars <- unique(append(indVars, defaultFields))
         
-        		#####
         		##### check these required variables exist in phenotype file
         		if(length(defaultFields)>0) {
-        			for (i in 1:length(defaultFields)) {
-        				if (!(defaultFields[i] %in% phenoVarsAll)) {
-        					print(paste("Required variable: Field ",defaultFields[i],"is a categorical multiple indicator field (CAT_MULT_INDICATOR_FIELDS column in variable information file) but was not found in phenotype data")) 
-        					hasIssue=TRUE
-        				}
-        			}
+          			for (i in 1:length(defaultFields)) {
+            				if (!(defaultFields[i] %in% phenoVarsAll)) {
+            					print(paste("Required variable: Field ",defaultFields[i],"is a categorical multiple indicator field (CAT_MULT_INDICATOR_FIELDS column in variable information file) but was not found in phenotype data")) 
+            					hasIssue <- TRUE
+            				}
+          			}
         		}
   
   		}
@@ -145,8 +141,6 @@ loadIndicatorFields <- function(opt, vl, phenosToTest) {
   		  print("!!! PHESANT has stopped - add required variables to phenotype file or remove relevant phenotypes (so that required variables are not needed).")
   		  quit()
   	}
-  	
   	return(indVars)
-
 }
 
diff --git a/PHESANT/R/init.r b/PHESANT/R/init.r
index 05d66ba..830a907 100644
--- a/PHESANT/R/init.r
+++ b/PHESANT/R/init.r
@@ -27,7 +27,7 @@ initData <-function(opt) {
     
     ## load data
     d <- loadData(opt, vl)
-    data <- d$datax
+    data <- d$phenotype
     confounders <- d$confounders
     vl$indicatorFields <- d$inds
     
diff --git a/PHESANT/R/loadData.r b/PHESANT/R/loadData.r
index 405af81..2a09f76 100644
--- a/PHESANT/R/loadData.r
+++ b/PHESANT/R/loadData.r
@@ -20,7 +20,8 @@
 # loads phenotype and trait of interest data files
 # creates phenotype / trait of interest data frame
 # creates confounder data frame
-# returns an object holding these two data frames
+# create indicator fields data frame
+# returns an object holding these three data frames
 loadData <- function(opt, vl) {
   
   	validatePhenotypeInputHeader(opt)
@@ -55,9 +56,9 @@ loadData <- function(opt, vl) {
   	}
   
   	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
-  	phenotype = fixOddFieldsToCatMul(vl, phenotype)
-  	indFields = loadIndicatorFields(opt, vl, colnames(phenotype))
-  	d = list(datax=phenotype, confounders=conf, inds=indFields)
+  	phenotype = .fixOddFieldsToCatMul(vl, phenotype)
+  	indFields = .loadIndicatorFields(opt, vl, colnames(phenotype))
+  	d = list(phenotype=phenotype, confounders=conf, inds=indFields)
   	return(d)
 }
 
diff --git a/PHESANT/R/phenotypes.r b/PHESANT/R/phenotypes.r
index 06a970c..d87f16d 100644
--- a/PHESANT/R/phenotypes.r
+++ b/PHESANT/R/phenotypes.r
@@ -19,88 +19,83 @@
 ## load phenotypes from phenotype file
 loadPhenotypes <- function(opt) {
     ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
-    if (opt$varTypeArg!="all") {
+    if (opt$varTypeArg != "all") {
 		      # read pheno file column names
-	        phenoVars = read.table(opt$phenofile, header=0, nrows=1, sep=',')
-	        phenoVars = phenoVars[which(phenoVars!=opt$userId)]
+	        phenoVars <- read.table(opt$phenofile, header=0, nrows=1, sep=',')
+	        phenoVars <- phenoVars[which(phenoVars!=opt$userId)]
 
-		      #####
 		      ##### calculate part start and end
-          partSize = ceiling(length(phenoVars)/opt$numParts);
-          partStart = (opt$partIdx-1)*partSize + 1;
+          partSize <- ceiling(length(phenoVars)/opt$numParts)
+          partStart <- (opt$partIdx-1)*partSize + 1
           if (opt$partIdx == opt$numParts) {
-                  partEnd = length(phenoVars);
+                  partEnd <- length(phenoVars)
           } else {
-                  partEnd = partStart + partSize - 1;
+                  partEnd <- partStart + partSize - 1
           }
-          print(paste(partStart, '-', partEnd));
+          print(paste(partStart, '-', partEnd))
 
-
-          #####
           ##### find range of columns to read in
-
       		## This is more complicated than just reading in a column range, because we need to determine cut points
       		## such that all columns of a particular field are loaded.
       		## A field is included in a 'part' if its last column is within the part range.
       		## e.g. for part 2 of 5 parts and for 100 columns, then fields having their last column at position 21 - 40 (i.e. its column index) are included in this part.
 
           ## user ID always included
-          phenosToTest = c(opt$userId)
+          phenosToTest <- c(opt$userId)
 
-      		currentVar=""
-      		currentVarLong=""
-      		currentVarShort=""
-      		first=TRUE
-      		phenoIdx=0
+      		currentVar <- ""
+      		currentVarLong <- ""
+      		currentVarShort <- ""
+      		first <- TRUE
+      		phenoIdx <- 0
 
 		      # all columns for a particular field
-		      thisPhenoToTest = c()
+		      thisPhenoToTest <- c()
           for (var in phenoVars) {
-              varx = gsub("^x", "", var);
-              varx = gsub("_[0-9]+$", "", varx);
-              varxShort = gsub("^x", "", var);
-              varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
-			        currentVarLong = var
+              varx <- gsub("^x", "", var)
+              varx <- gsub("_[0-9]+$", "", varx)
+              varxShort <- gsub("^x", "", var)
+              varxShort <- gsub("_[0-9]+_[0-9]+$", "", varxShort)
+			        currentVarLong <- var
 
         			if (currentVar == varx) { # same variable same timepoint
         				    # add current var to pheno list
-        				    thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+        				    thisPhenoToTest <- append(thisPhenoToTest, as.character(currentVarLong))
         			} else if (currentVarShort == varxShort) { # save var, diff timepoint
                     ## different time point of this var so skip in testing but add here because some are fixed to cat mult
-                    thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+                    thisPhenoToTest <- append(thisPhenoToTest, as.character(currentVarLong))
               } else {
                     ## new variable so run test for previous (we have collected all the columns now)
                     if (first==FALSE) {
                         if (phenoIdx>=partStart && phenoIdx<=partEnd) { # only start new variable processing if last column of it is within the idx range for this part
-                       		phenosToTest = append(phenosToTest, thisPhenoToTest)
+                       		phenosToTest <- append(phenosToTest, thisPhenoToTest)
                         }
                     }
-                    first=FALSE;
+                    first <- FALSE
 
                     ## new variable so set values
-                    currentVar = varx
-                    currentVarShort = varxShort
-                    thisPhenoToTest = c(as.character(currentVarLong))
+                    currentVar <- varx
+                    currentVarShort <- varxShort
+                    thisPhenoToTest <- c(as.character(currentVarLong))
               }
-              phenoIdx = phenoIdx + 1
+              phenoIdx <- phenoIdx + 1
       }
 
       # last variable so test association
       if (phenoIdx>=partStart && phenoIdx<=partEnd) {
-            #phenosToTest = append(phenosToTest, as.character(currentVarLong))
-			      phenosToTest = append(phenosToTest, as.character(thisPhenoToTest))
+			      phenosToTest <- append(phenosToTest, as.character(thisPhenoToTest))
       }
 
       ## read in the right table columns - a subset of the data file
-      data = fread(opt$phenofile, select=phenosToTest, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+      data <- fread(opt$phenofile, select=phenosToTest, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
 
   } else {
           # reading all data at once
-          data = fread(opt$phenofile, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+          data <- fread(opt$phenofile, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
   }
 
 	## this is type conversion as used in the read.table function (that we used to use ((this was changed because read.table cannot read column subsets))
-	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+	data <- data.frame(lapply(data,function(x) type.convert(as.character(x))))
 	colnames(data)[1] <- "userID"
 	return(data)
 
@@ -161,3 +156,24 @@ validatePhenotypeInputHeader <- function(opt) {
   	print("Phenotype file validated")
 }
 
+.fixOddFieldsToCatMul <- function(vl, data) {
+    # examples are variables: 40006, 40011, 40012, 40013
+    # get all variables that need their instances changing to arrays
+    dataPheno <- vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),]
+    for (i in 1:nrow(dataPheno)) {
+        varID <- dataPheno[i,]$FieldID		
+        varidString <- paste("x",varID,"_", sep="")			
+        
+        # get all columns in data dataframe for this variable	
+        colIdxs <- which(grepl(varidString,names(data)))
+        
+        # change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
+        count <- 0
+        for (j in colIdxs) {	
+            colnames(data)[j] <- paste(varidString, "0_", count, sep="")
+            count <- count + 1
+        }				
+    }
+    return(data)
+}
+
diff --git a/PHESANT/R/run.r b/PHESANT/R/run.r
index cda7a06..b4cf1c0 100644
--- a/PHESANT/R/run.r
+++ b/PHESANT/R/run.r
@@ -53,7 +53,7 @@ run <- function(opt) {
     sink()
     
     # save counters of each path in variable flow
-    saveCounts(opt)
+    .saveCounts(opt)
     if (opt$save == TRUE) {
         write.table(pkg.env$derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
         write.table(pkg.env$derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 8309b13..951e704 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -30,7 +30,7 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 		# check if variable info is found for this field
 		if (length(idx)==0) {
 			cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
-		  incrementCounter("notinphenofile")
+		  .incrementCounter("notinphenofile")
 		}
 		else {
 
@@ -47,12 +47,12 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 			
 			if (excluded!="") {
 				cat(paste("Excluded integer: ", excluded, " || ", sep=""))
-			  incrementCounter("excluded.int")
+			  .incrementCounter("excluded.int")
 			}
 			else {
-			  incrementCounter("start.int")
+			  .incrementCounter("start.int")
 				if (isExposure==TRUE) {
-				  incrementCounter("start.exposure.int")
+				  .incrementCounter("start.exposure.int")
 				}
 
 			    	testInteger(opt, vl, currentVarShort, "INTEGER", thisdata, phenoStartIdx);
@@ -66,12 +66,12 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 
 		    	if (excluded!="") {
 				cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
-		    	  incrementCounter("excluded.cont")
+		    	  .incrementCounter("excluded.cont")
 		    	}
 			else {
-			  incrementCounter("start.cont")
+			  .incrementCounter("start.cont")
 				if (isExposure==TRUE) {
-				  incrementCounter("start.exposure.cont")
+				  .incrementCounter("start.exposure.cont")
                                 }
 				testContinuous(opt, vl, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx);
 	        	}
@@ -84,12 +84,12 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 	
 	    		if (excluded!="") {
 				cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
-	    		  incrementCounter("excluded.catSin")
+	    		  .incrementCounter("excluded.catSin")
 			}
 			else {
-			  incrementCounter("start.catSin")
+			  .incrementCounter("start.catSin")
 				if (isExposure==TRUE) {
-				  incrementCounter("start.exposure.catSin")
+				  .incrementCounter("start.exposure.catSin")
                                 }
 			    testCategoricalSingle(opt, vl, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx);
 			}
@@ -102,24 +102,24 @@ testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phe
 
 			if (excluded!="") {
 				cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
-			  incrementCounter("excluded.catMul")
+			  .incrementCounter("excluded.catMul")
 			}
 			else {
 
 				if (catSinToMult!="") {
 					cat("cat-single to cat-multiple || ", sep="")
-				  incrementCounter("catSinToCatMul")
+				  .incrementCounter("catSinToCatMul")
 				}
 
-			  incrementCounter("start.catMul")	
+			  .incrementCounter("start.catMul")	
 				if (isExposure==TRUE) {
-				  incrementCounter("start.exposure.catMul")
+				  .incrementCounter("start.exposure.catMul")
                                 }
 				else {
 					# get number of cat mult values denoting trait of interest
 	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
 					if (numVals>0) {
-		                                addToCounts("start.exposure.catMulvalues", numVals)
+		                                .addToCounts("start.exposure.catMulvalues", numVals)
 					}
 				}
 		        	testCategoricalMultiple(opt, vl, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index 508c4e0..c2b5ffe 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -54,7 +54,7 @@ testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoSt
 		idxsTrue = idxForVar[,"row"]
 
 		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
-		incrementCounter("catMul.binary")
+		.incrementCounter("catMul.binary")
 		
 		# make zero vector and set 1s for those with this variable value
 		varBinary = rep.int(0,numRows);
@@ -77,12 +77,12 @@ testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoSt
                 
 	        if (idxTrue<10 || idxFalse<10) {
 	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
-			incrementCounter("catMul.10")
+			.incrementCounter("catMul.10")
 	        }
 		else {
 			isExposure = getIsCatMultExposure(vl, varName, variableVal)
 
-			incrementCounter("catMul.over10")
+			.incrementCounter("catMul.over10")
 		     	# binary - so logistic regression
 			binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
 		}
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index d840a22..7cfb006 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -61,18 +61,18 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
         pheno = .replaceMissingCodes(pheno)
 
 	## remove categories if < 10 examples
-	pheno = testNumExamples(pheno)
+	pheno = .testNumExamples(pheno)
 
 	uniqVar = unique(na.omit(pheno))
 	uniqVar = sort(uniqVar)
 
 	if (length(uniqVar)<=1) {
 		cat("SKIP (only one value) || ");
-		incrementCounter("catSin.onevalue")
+		.incrementCounter("catSin.onevalue")
 	}
 	else if (length(uniqVar)==2) {		
 		cat("CAT-SINGLE-BINARY || ");
-	  incrementCounter("catSin.case3")
+	  .incrementCounter("catSin.case3")
 		# binary so logistic regression
 
 		phenoFactor = factor(pheno)
@@ -91,7 +91,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
 		if (ordered == 0) {
 			
 			cat("CAT-SINGLE-UNORDERED || ")
-		  incrementCounter("catSin.case2")
+		  .incrementCounter("catSin.case2")
 
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
 			testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
@@ -101,7 +101,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
 		
 			## ordered
 			cat("ordered || ");
-		  incrementCounter("catSin.case1")
+		  .incrementCounter("catSin.case1")
 
 			## reorder variable values into increasing order
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
@@ -110,7 +110,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
 		}
 		else if (ordered == -2) {
 			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
-		  incrementCounter( "catSin.binaryorexcluded")
+		  .incrementCounter( "catSin.binaryorexcluded")
 		}
 		else {
 			print(paste("ERROR", varName, varType, dataCode));
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index 78c8ea2..efbce8b 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -25,7 +25,7 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 	geno = thisdata[,"geno"]
 
 	cat("CAT-ORD || ");
-	incrementCounter("ordCat")
+	.incrementCounter("ordCat")
 
 	doCatOrdAssertions(pheno)
 
@@ -39,7 +39,7 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="");
-	  incrementCounter("ordCat.500")
+	  .incrementCounter("ordCat.500")
 	}
 	else {
 		# test this cat ordered variable with ordered logistic regression	
@@ -52,7 +52,7 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 			# add pheno to dataframe
 			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
 			cat("SUCCESS results-ordered-logistic");
-			incrementCounter("success.ordCat")
+			.incrementCounter("success.ordCat")
                 }
                 else {
 
@@ -93,11 +93,11 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 
 		write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
 		cat("SUCCESS results-ordered-logistic");
-		incrementCounter("success.ordCat")
+		.incrementCounter("success.ordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                  incrementCounter("success.exposure.ordCat")
+                  .incrementCounter("success.exposure.ordCat")
                 }
 
 		### END TRYCATCH
@@ -105,7 +105,7 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 			sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        incrementCounter("ordCat.error")
+                        .incrementCounter("ordCat.error")
                 })
 		}
 
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index 320f522..177020c 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -27,7 +27,7 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 	numNotNA = length(which(!is.na(pheno)))
 	if (numNotNA<500) {
 		cat("CATUNORD-SKIP-500 (", numNotNA, ") || ",sep="");
-	  incrementCounter("unordCat.500")
+	  .incrementCounter("unordCat.500")
 	}
 	else {
 
@@ -38,7 +38,7 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 		numWeights=(numUnique-1)*(((phenoStartIdx -1)-2)+1+1)
 		if (numWeights>1000) {
 			cat("Too many weights in model: ", numWeights, " > 1000, (num outcomes values: ", numUnique, ") || SKIP ", sep="")
-		  incrementCounter("unordCat.cats")
+		  .incrementCounter("unordCat.cats")
 			return(NULL)
 		}
 
@@ -48,7 +48,7 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 			# add pheno to dataframe
 			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
 			cat("SUCCESS results-notordered-logistic ");
-			incrementCounter("success.unordCat")
+			.incrementCounter("success.unordCat")
 		}
                 else {
 		
@@ -127,11 +127,11 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 		}
 
 		cat("SUCCESS results-notordered-logistic ");
-		incrementCounter("success.unordCat")
+		.incrementCounter("success.unordCat")
 
 		isExposure = getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                  incrementCounter("success.exposure.unordCat")
+                  .incrementCounter("success.exposure.unordCat")
                 }
 		
 		## END TRYCATCH
@@ -139,7 +139,7 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
                         sink()
                         sink(pkg.env$resLogFile, append=TRUE)
                         cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        incrementCounter("unordCat.error")
+                        .incrementCounter("unordCat.error")
 		})
 		}
 	}
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 883ea75..6d5e687 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -78,7 +78,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 
 			## remove categories if < 10 examples to see if this should be binary or not, but if ordered categorical
 			## then we include all values when generating this
-	    		phenoAvgMoreThan10 = testNumExamples(phenoAvg)
+	    		phenoAvgMoreThan10 = .testNumExamples(phenoAvg)
 
 			## binary if 2 distinct values, else ordered categorical
         		phenoFactor = factor(phenoAvgMoreThan10)
@@ -86,11 +86,11 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 
         		if (numLevels<=1) {
        				cat("SKIP (number of levels: ",numLevels,")",sep="")
-        		  incrementCounter("cont.onevalue")
+        		  .incrementCounter("cont.onevalue")
         		}
         		else if (numLevels==2) {
 	        		# binary
-        		  incrementCounter("cont.binary")
+        		  .incrementCounter("cont.binary")
         			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);			
         			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx);
         		}
@@ -98,7 +98,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 		else {
 			## try to treat as ordered categorical
 
-		  incrementCounter("cont.ordcattry")
+		  .incrementCounter("cont.ordcattry")
 			## equal sized bins
 			phenoBinned = .equalSizedBins(phenoAvg)
 
@@ -111,7 +111,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 
 				# successful binning. >=10 examples in each of the 3 bins
 
-			  incrementCounter("cont.ordcattry.ordcat")
+			  .incrementCounter("cont.ordcattry.ordcat")
 			        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned);
 				testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
 			}
@@ -122,12 +122,12 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 					## skip - not possible to create binary variable because first and third bins are too small
 					## ie. could merge bin1 with bin 2 but then bin3 still too small etc
 					cat("SKIP 2 bins are too small || ")
-				  incrementCounter("cont.ordcattry.smallbins")
+				  .incrementCounter("cont.ordcattry.smallbins")
 				} 
 				else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
 
 					# combine first and second bin to create binary variable
-				  incrementCounter("cont.ordcattry.binsbinary")
+				  .incrementCounter("cont.ordcattry.binsbinary")
 					cat("Combine first two bins and treat as binary || ")
 					phenoBinned[which(phenoBinned==0)] = 1	
 
@@ -138,7 +138,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
 
 					# combine second and last bin to create binary variable
-				  incrementCounter("cont.ordcattry.binsbinary")
+				  .incrementCounter("cont.ordcattry.binsbinary")
 					cat("Combine last two bins and treat as binary || ")
                                         phenoBinned[which(phenoBinned==2)] = 1
 					
@@ -151,7 +151,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 				else {
 					## skip - not possible to create binary variable because combining bins would still be too small
 					cat("SKIP 2 bins are too small(2) || ")
-				  incrementCounter("cont.ordcattry.smallbins2")
+				  .incrementCounter("cont.ordcattry.smallbins2")
 				}
 
 			}
@@ -160,13 +160,13 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 	}
 	else {		
 		cat("IRNT || ");
-	  incrementCounter("cont.main")
+	  .incrementCounter("cont.main")
 
 		# check there are at least 500 examples
 		numNotNA = length(which(!is.na(phenoAvg)))
 		if (numNotNA<500) {
 			cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="");
-		  incrementCounter("cont.main.500")
+		  .incrementCounter("cont.main.500")
 		}
 		else {
 			## inverse rank normal transformation
@@ -176,7 +176,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 				# add pheno to dataframe
 				.storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
 				cat("SUCCESS results-linear");
-				incrementCounter("success.continuous")
+				.incrementCounter("success.continuous")
                         }
                         else {
 		
@@ -223,9 +223,9 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
 			write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE");
 			cat("SUCCESS results-linear");
 
-			incrementCounter("success.continuous")
+			.incrementCounter("success.continuous")
                 	if (isExposure == TRUE) {
-                	  incrementCounter("success.exposure.continuous")
+                	  .incrementCounter("success.exposure.continuous")
                 	}
 
 			## END TRYCATCH
@@ -233,7 +233,7 @@ testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx)
                 	        sink()
                 	        sink(pkg.env$resLogFile, append=TRUE)
                 	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                	        incrementCounter("continuous.error")
+                	        .incrementCounter("continuous.error")
                 	})
 			}			
 		}
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 8591435..0759155 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -53,29 +53,29 @@ testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg);
 		testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
-		incrementCounter( "int.continuous")
+		.incrementCounter( "int.continuous")
 	}
 	else {
 		
 		## remove categories if < 10 examples
-	    	phenoAvg = testNumExamples(phenoAvg)
+	  phenoAvg = .testNumExamples(phenoAvg)
 	    
 		## binary if 2 distinct values, else ordered categorical
 		phenoFactor = factor(phenoAvg)
 		numLevels = length(levels(phenoFactor))
 		if (numLevels<=1) {
 			cat("SKIP (number of levels: ",numLevels,")",sep="");
-		  incrementCounter("int.onevalue")
+		  .incrementCounter("int.onevalue")
 		}
 		else if (numLevels==2) {
-		  incrementCounter("int.binary")
+		  .incrementCounter("int.binary")
 
 			# binary
 			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
 			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure,phenoStartIdx);
 		}
 		else {
-		  incrementCounter("int.catord")
+		  .incrementCounter("int.catord")
 			cat("3-20 values || ")
 
 			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
diff --git a/PHESANT/R/testNumExamples.r b/PHESANT/R/testNumExamples.r
deleted file mode 100644
index 37cc992..0000000
--- a/PHESANT/R/testNumExamples.r
+++ /dev/null
@@ -1,37 +0,0 @@
-# The MIT License (MIT)
-# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-# Remove variable values if less than 10 examples have this value
-testNumExamples <- function(pheno) {
-	
-	## loop through values and remove if has < 10 examples
-        uniqVar = unique(na.omit(pheno));
-        for (u in uniqVar) {
-                withValIdx = which(pheno==u)
-                numWithVal = length(withValIdx);
-                if (numWithVal<10) {
-                        pheno[withValIdx]=NA
-			cat(paste("Removed ",u ,": ", numWithVal, "<10 examples || ", sep=""));
-                }
-		else {
-			cat(paste("Inc(>=10): ", u, "(", numWithVal, ") || ", sep=""));
-		}
-        }
-	return(pheno);
-}
diff --git a/PHESANT/R/traitOfInterest.r b/PHESANT/R/traitOfInterest.r
index 8d6ea10..ec700e0 100644
--- a/PHESANT/R/traitOfInterest.r
+++ b/PHESANT/R/traitOfInterest.r
@@ -24,8 +24,8 @@ loadTraitOfInterest <- function(opt, phenotypes) {
     if (opt$save==TRUE) { 
 	      # saving not running tests so we don't have a trait of interest
         # add pretend trait of interest so other code doesn't break
-        numRows = nrow(phenotypes)
-	      data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+        numRows <- nrow(phenotypes)
+	      data <- cbind.data.frame(phenotypes$userID, rep(-1, numRows))
     } else {
 	      # load the trait of interest specified by the user
         if (is.null(opt$traitofinterestfile)) {
@@ -35,12 +35,11 @@ loadTraitOfInterest <- function(opt, phenotypes) {
               print("Loading trait of interest file ...")
               data = fread(opt$traitofinterestfile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
         }
-	    data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+	    data <- data.frame(lapply(data,function(x) type.convert(as.character(x))))
     }
 
     colnames(data)[1] <- "userID"
     colnames(data)[2] <- "geno"
-
     return(data)
 }
 
diff --git a/PHESANT/man/initData.Rd b/PHESANT/man/initData.Rd
new file mode 100644
index 0000000..359918d
--- /dev/null
+++ b/PHESANT/man/initData.Rd
@@ -0,0 +1,29 @@
+\name{initData}
+\alias{initData}
+\title{
+Validates, loads input data  and inilizes working environment to run PHESANT 
+}
+\description{
+Validates, loads input data  and inilizes working environment to run PHESANT. 
+}
+\usage{
+initData(opt)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+}
+
+\value{
+Return a list objerct containing all the data used to run PHESANT.
+  \enumerate{
+      \item data: data frame for all phenotypes
+      \item vl: variable list for all phenotypes
+      \item confounders: data frame for all confounders
+      \item phenoStartIdx: the column index for first phenotype column in data data frame
+      \item phenoVars: a list of all phenotypes, excluding user ID
+  }
+}
+
+\examples{
+# input <- initData(opt)
+}
diff --git a/PHESANT/man/loadConfounders.Rd b/PHESANT/man/loadConfounders.Rd
new file mode 100644
index 0000000..1e51a5d
--- /dev/null
+++ b/PHESANT/man/loadConfounders.Rd
@@ -0,0 +1,32 @@
+\name{loadConfounders}
+\alias{loadConfounders}
+\title{
+Loads confounder variables
+}
+\description{
+Loads confounder variables from phenotype file or confounder file based on user options. If user provides a confunder file, all confounder variables will be extracted from it. Otherwise, confounder variables will be read from phenotype file and a set of predefined confounders will be extracted.
+}
+\usage{
+loadConfounders(opt, phenotypes)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{phenotypes}{The phenotype data from loadPhenotypes function.}
+}
+\details{
+If user provides a confunder file, all confounder variables will be extracted from it. Otherwise, confounder variables will be read from phenotype file and a set of predefined confounders will be extracted. When reading from phenotype file for confounders, the following variables will be extracted:
+  \enumerate{
+      \item age: x21022_0_0
+      \item sex: x31_0_0
+      \item genetic: x22000_0_0, with genetic batch used to create genetic chip variable, optional, present only if opt$genetic is set.
+      \item genetic principal components and assessment centre, optional, present only if opt$genetic and opt$sensitivity are set: x22009_0_1 through x22009_0_10, and x54_0_0.
+      \item assessment centre only if opt$genetic is false and opt$sensitivity is true: x54_0_0.
+  }
+}
+
+\value{
+Returns a dataframe for all confounder variables, with first column being UserID.}
+
+\examples{
+#conf <- loadConfounders(opt, phenotype)
+}
diff --git a/PHESANT/man/loadData.Rd b/PHESANT/man/loadData.Rd
new file mode 100644
index 0000000..ca14664
--- /dev/null
+++ b/PHESANT/man/loadData.Rd
@@ -0,0 +1,37 @@
+\name{loadData}
+\alias{loadData}
+\title{
+Loads phenotype, trait of interest data files
+}
+\description{
+Validates and loads phenotype, trait of interest, and relted fields from data files. 
+}
+\usage{
+loadData(opt, vl)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list}
+}
+\details{
+Taking user options as input, this function validates and loads phenotype, trait of interest, and relted fields from data files. 
+  \enumerate{
+      \item loads phenotype and trait of interest data files
+      \item creates phenotype / trait of interest data frame
+      \item creates confounder data frame
+      \item parses and creates indicator data frame
+      \item returns an object holding these three data frames
+  }
+}
+\value{
+  Returns a list of dataframes:
+  \enumerate{
+      \item phenotype: data frame for all phenotypes
+      \item confounders: data frame for all confounders
+      \item inds: data frame for all related indicators
+  }
+}
+
+\examples{
+#d <- loadData(opt, vl)
+}
diff --git a/PHESANT/man/loadPhenotypes.Rd b/PHESANT/man/loadPhenotypes.Rd
new file mode 100644
index 0000000..1fecd9f
--- /dev/null
+++ b/PHESANT/man/loadPhenotypes.Rd
@@ -0,0 +1,25 @@
+\name{loadPhenotypes}
+\alias{loadPhenotypes}
+\title{
+Load phenotypes from phenotype file
+}
+\description{
+Load all or part of phenotypes from phenotype file based on user option.
+}
+\usage{
+loadPhenotypes(opt)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+}
+\details{
+Load all or part of phenotypes from phenotype file based on user option.
+}
+\value{
+Return a data frame object that conbtains all the required phenotypes. 
+}
+
+\examples{
+#phenotype = loadPhenotypes(opt)
+}
+
diff --git a/PHESANT/man/loadTraitOfInterest.Rd b/PHESANT/man/loadTraitOfInterest.Rd
new file mode 100644
index 0000000..ef60560
--- /dev/null
+++ b/PHESANT/man/loadTraitOfInterest.Rd
@@ -0,0 +1,25 @@
+\name{loadTraitOfInterest}
+\alias{loadTraitOfInterest}
+\title{
+Load trait of interest
+}
+\description{
+Load trait of interest, either from separate trait of interest file, or from phenotype file.
+}
+\usage{
+loadTraitOfInterest(opt, phenotypes)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{phenotypes}{The phenotype data from loadPhenotypes function.}
+}
+\details{
+Load trait of interest, either from separate trait of interest file, or from phenotype file.
+}
+\value{
+Returns a data frame of two columns for User ID and trait of interest.
+}
+
+\examples{
+#toi <- loadTraitOfInterest(opt, phenotype)
+}

From 36d191bd18db99f0b5d63cb393dbe043bdcb4826 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 14:29:17 -0400
Subject: [PATCH 20/27] Code refactor

---
 PHESANT/R/testCatMultiple.r | 248 +++++++++++++++++-------------------
 1 file changed, 117 insertions(+), 131 deletions(-)

diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index c2b5ffe..ab90ef2 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -24,143 +24,129 @@
 # 3) Checking derived variable has at least 10 cases in each group
 # 4) Calling binaryLogisticRegression function for this derived binary variable
 testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
-	cat("CAT-MULTIPLE || ");
-
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
-	pheno = reassignValue(vl, pheno, varName)
-
-	## get unique values from all columns of this variable
-	uniqueValues = unique(na.omit(pheno[,1]));
-	numCols = ncol(pheno);
-	numRows = nrow(pheno);
-	if (numCols>1) {
-		for (num in 2:numCols) {
-			u = unique(na.omit(pheno[,num]))
-			uniqueValues = union(uniqueValues,u);
-		}
-	}
-
-	## for each value create a binary variable and test this
-	for (variableVal in uniqueValues) {
-
-		## numeric negative values we assume are missing - check this
-		if(is.numeric(variableVal) & variableVal<0) {
-			cat("SKIP_val:", variableVal," < 0", sep="");
-			next;
-		}
-	
-		# make variable for this value
-		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
-		idxsTrue = idxForVar[,"row"]
-
-		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
-		.incrementCounter("catMul.binary")
-		
-		# make zero vector and set 1s for those with this variable value
-		varBinary = rep.int(0,numRows);
-		varBinary[idxsTrue] = 1;
-		varBinaryFactor = factor(varBinary)
-
-		## data for this new binary variable
-		newthisdata = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], varBinaryFactor)
-
-		## one of 3 ways to decide which examples are negative
-        	idxsToRemove = restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
-
-		if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
-			newthisdata = newthisdata[-idxsToRemove,]
-		}
-
-		facLevels = levels(newthisdata[,phenoStartIdx])		
-		idxTrue = length(which(newthisdata[,phenoStartIdx]==facLevels[1]))
-	        idxFalse = length(which(newthisdata[,phenoStartIdx]==facLevels[2]))
-                
-	        if (idxTrue<10 || idxFalse<10) {
-	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
-			.incrementCounter("catMul.10")
-	        }
-		else {
-			isExposure = getIsCatMultExposure(vl, varName, variableVal)
-
-			.incrementCounter("catMul.over10")
-		     	# binary - so logistic regression
-			binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
-		}
-	}
+  	cat("CAT-MULTIPLE || ")
+  
+  	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
+  	pheno = reassignValue(vl, pheno, varName)
+  
+  	## get unique values from all columns of this variable
+  	uniqueValues = unique(na.omit(pheno[,1]));
+  	numCols = ncol(pheno);
+  	numRows = nrow(pheno);
+  	if (numCols>1) {
+  		for (num in 2:numCols) {
+  			u = unique(na.omit(pheno[,num]))
+  			uniqueValues = union(uniqueValues,u);
+  		}
+  	}
+  
+  	## for each value create a binary variable and test this
+  	for (variableVal in uniqueValues) {
+  
+  		## numeric negative values we assume are missing - check this
+  		if(is.numeric(variableVal) & variableVal<0) {
+  			cat("SKIP_val:", variableVal," < 0", sep="");
+  			next;
+  		}
+  	
+  		# make variable for this value
+  		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
+  		idxsTrue = idxForVar[,"row"]
+  
+  		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
+  		.incrementCounter("catMul.binary")
+  		
+  		# make zero vector and set 1s for those with this variable value
+  		varBinary = rep.int(0,numRows);
+  		varBinary[idxsTrue] = 1;
+  		varBinaryFactor = factor(varBinary)
+  
+  		## data for this new binary variable
+  		newthisdata = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], varBinaryFactor)
+  
+  		## one of 3 ways to decide which examples are negative
+          	idxsToRemove = .restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
+  
+  		if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
+  			newthisdata = newthisdata[-idxsToRemove,]
+  		}
+  
+  		facLevels = levels(newthisdata[,phenoStartIdx])		
+  		idxTrue = length(which(newthisdata[,phenoStartIdx]==facLevels[1]))
+  	        idxFalse = length(which(newthisdata[,phenoStartIdx]==facLevels[2]))
+                  
+  	        if (idxTrue<10 || idxFalse<10) {
+  	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
+  			.incrementCounter("catMul.10")
+  	        }
+  		else {
+  			isExposure = getIsCatMultExposure(vl, varName, variableVal)
+  
+  			.incrementCounter("catMul.over10")
+  		     	# binary - so logistic regression
+  			binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
+  		}
+  	}
 }
 
 # restricts sample based on value in CAT_MULT_INDICATOR_FIELDS column of variable info file,
 # either NO_NAN, ALL or a field ID
 # returns idx's that should be removed from the sample
-restrictSample <- function(vl, varName,pheno,variableVal, userID) {
-
-	# get definition for sample for this variable either NO_NAN, ALL or a variable ID
-	varIndicator = vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
-
-	return(restrictSample2(vl, varName,pheno,varIndicator,variableVal, userID))
+.restrictSample <- function(vl, varName,pheno,variableVal, userID) {
+  	# get definition for sample for this variable either NO_NAN, ALL or a variable ID
+  	varIndicator = vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
+  	return(.restrictSample2(vl, varName,pheno,varIndicator,variableVal, userID))
 }
 
-
-restrictSample2 <- function(vl, varName,pheno, varIndicator,variableVal, userID) {
-	
-	if (varIndicator=="NO_NAN") { # remove NAs
-		## remove all people with no value for this variable
-
-		# row indexes with NA in all columns of this cat mult field		
-		ind <- apply(pheno, 1, function(x) all(is.na(x)))
-		naIdxs = which(ind==TRUE)
-		cat("NO_NAN Remove NA participants ", length(naIdxs), " || ", sep="");
-	}
-	else if (varIndicator=="ALL") {
-
-		# use all people (no missing assumed) so return empty vector
-		# e.g. hospital data and death registry
-		naIdxs = cbind()
-		cat("ALL || ")
-	}
-	else if (varIndicator!="") {
-		# remove people who have no value for indicator variable
-		indName = paste("x",varIndicator,"_0_0",sep="");
-		cat("Indicator name ", indName, " || ", sep="");
-		indvarx = merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)		
-		indicatorVar = indvarx[,indName]
-
-		# remove participants with NA value in this related field
-		indicatorVar = .replaceNaN(indicatorVar)
-		naIdxs = which(is.na(indicatorVar))
-
-		cat("Remove indicator var NAs: ", length(naIdxs), " || ", sep="");
-
-		if (is.numeric(as.matrix(indicatorVar))) {
-			# remove participants with value <0 in this related field - assumed missing indicators
-			lessZero = which(indicatorVar<0)
-			naIdxs = union(naIdxs, lessZero)
-			cat("Remove indicator var <0: ", length(lessZero), " || ", sep="")
-		}
-	}
-	else {
-		stop("Categorical multiples variables need a value for CAT_MULT_INDICATOR_FIELDS", call.=FALSE)
-	}
-
-	## remove people with pheno<0 if they aren't a positive example for this variable indicator
-	## because we can't know if they are a negative example or not
-	if (is.numeric(as.matrix(pheno))) {
-		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
-		idxMissing = which(pheno < 0, arr.ind=TRUE)
-
-		# all people with <0 value and not variableVal
-		naMissing = setdiff(idxMissing,idxForVar)
-		
-		# add these people with unknowns to set to remove from sample
-		naIdxs = union(naIdxs, naMissing)
-		
-		cat(paste("Removed ", length(naMissing) ," examples != ", variableVal, " but with missing value (<0) || ", sep=""));
-	}
-	else {
-		cat("Not numeric || ")
-	}
-	
-	return(naIdxs);
-
+.restrictSample2 <- function(vl, varName,pheno, varIndicator,variableVal, userID) {
+  	if (varIndicator=="NO_NAN") { # remove NAs
+    		## remove all people with no value for this variable
+    		# row indexes with NA in all columns of this cat mult field		
+    		ind <- apply(pheno, 1, function(x) all(is.na(x)))
+    		naIdxs <- which(ind==TRUE)
+    		cat("NO_NAN Remove NA participants ", length(naIdxs), " || ", sep="")
+  	} else if (varIndicator=="ALL") {
+    		# use all people (no missing assumed) so return empty vector
+    		# e.g. hospital data and death registry
+    		naIdxs = cbind()
+    		cat("ALL || ")
+  	} else if (varIndicator!="") {
+    		# remove people who have no value for indicator variable
+    		indName <- paste("x",varIndicator,"_0_0",sep="");
+    		cat("Indicator name ", indName, " || ", sep="");
+    		indvarx <- merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)		
+    		indicatorVar <- indvarx[,indName]
+    
+    		# remove participants with NA value in this related field
+    		indicatorVar <- .replaceNaN(indicatorVar)
+    		naIdxs <- which(is.na(indicatorVar))
+    		cat("Remove indicator var NAs: ", length(naIdxs), " || ", sep="");
+    
+    		if (is.numeric(as.matrix(indicatorVar))) {
+      			# remove participants with value <0 in this related field - assumed missing indicators
+      			lessZero <- which(indicatorVar<0)
+      			naIdxs <- union(naIdxs, lessZero)
+      			cat("Remove indicator var <0: ", length(lessZero), " || ", sep="")
+    		}
+  	} else {
+  		  stop("Categorical multiples variables need a value for CAT_MULT_INDICATOR_FIELDS", call.=FALSE)
+  	}
+  
+  	## remove people with pheno<0 if they aren't a positive example for this variable indicator
+  	## because we can't know if they are a negative example or not
+  	if (is.numeric(as.matrix(pheno))) {
+    		idxForVar <- which(pheno == variableVal, arr.ind=TRUE)
+    		idxMissing <- which(pheno < 0, arr.ind=TRUE)
+    
+    		# all people with <0 value and not variableVal
+    		naMissing <- setdiff(idxMissing,idxForVar)
+    		
+    		# add these people with unknowns to set to remove from sample
+    		naIdxs <- union(naIdxs, naMissing)
+    		cat(paste("Removed ", length(naMissing) ," examples != ", variableVal, " but with missing value (<0) || ", sep=""))
+  	} else {
+  		  cat("Not numeric || ")
+  	}
+	  return(naIdxs)
 }
 

From e2b121336675a8d74bd0a93a700862d96396cb35 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 14:38:14 -0400
Subject: [PATCH 21/27] Hide testContinuous2

---
 PHESANT/R/testContinuous.r | 22 +++++++---------------
 PHESANT/R/testInteger.r    |  2 +-
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index 6d5e687..c19214c 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -16,28 +16,20 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-
 # Main function called for continuous fields
 testContinuous <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
-
-	cat("CONTINUOUS MAIN || ");	
-
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-
-	# reassign values
-        pheno = reassignValue(vl, pheno, varName)
-
-	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
-
-	testContinuous2(opt, vl, varName, varType, thisdata, phenoStartIdx)
-	
+  	cat("CONTINUOUS MAIN || ")	
+  	pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
+  	# reassign values
+    pheno <- reassignValue(vl, pheno, varName)
+  	thisdata[,phenoStartIdx:ncol(thisdata)] <- pheno
+  	.testContinuous2(opt, vl, varName, varType, thisdata, phenoStartIdx)
 }
 
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
-testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
+.testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	cat("CONTINUOUS || ");
-
 	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
 	isExposure = getIsExposure(vl, varName)
 
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 0759155..863b5bf 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -52,7 +52,7 @@ testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 	if (length(uniqVar)>=20) {
 		
 		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg);
-		testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+		.testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
 		.incrementCounter( "int.continuous")
 	}
 	else {

From 08327b89d1cef7740ecaf60fea32ff085e18fdb6 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 16:42:09 -0400
Subject: [PATCH 22/27] Add more documents

---
 PHESANT/R/testCatSingle.r              | 297 ++++++++++++-------------
 PHESANT/R/testCategoricalUnordered.r   |  31 +--
 PHESANT/man/chooseReferenceCategory.Rd |  18 ++
 PHESANT/man/option_list.Rd             |   9 +
 PHESANT/man/pkg.env.Rd                 |  11 +
 5 files changed, 187 insertions(+), 179 deletions(-)
 create mode 100644 PHESANT/man/chooseReferenceCategory.Rd
 create mode 100644 PHESANT/man/option_list.Rd
 create mode 100644 PHESANT/man/pkg.env.Rd

diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 7cfb006..7b0b509 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -24,175 +24,158 @@
 # 4) Remove values with <10 cases
 # 5) Deterimine correct test to perform, either binary, ordered or unordered.
 testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
-	cat("CAT-SINGLE || ");
-
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(vl, varName)
-
-	# assert variable has only one column
-	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
-
-	pheno = reassignValue(vl, pheno, varName)
-
-	# get data code info - whether this data code is ordinal or not and any reordering
-        dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
-        dataCode = dataPheno$DATA_CODING;
-
-	# get data coding information	
-       	dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
-	if (length(dataCodeRow)==0) {
-                cat("ERROR: No row in data coding info file || ");
-		return(NULL);
-        }
-	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
-        ordered = dataDataCode$ordinal;
-        order = as.character(dataDataCode$ordering);
-
-	## reorder variable values into increasing order (we do this now as this may convert variable to binary rather than ordered)
-        pheno = reorderOrderedCategory(pheno,order);
-
-	## if data code has a default_value then recode NA's to this value for participants with value in default_related_field
-	## this is used where there is no zero option e.g. field 100200
-	defaultValue = dataDataCode$default_value
-	defaultRelatedID = dataDataCode$default_related_field
-	pheno = setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
-
-        ## all categories coded as <0 we assume are `missing' values
-        pheno = .replaceMissingCodes(pheno)
-
-	## remove categories if < 10 examples
-	pheno = .testNumExamples(pheno)
-
-	uniqVar = unique(na.omit(pheno))
-	uniqVar = sort(uniqVar)
-
-	if (length(uniqVar)<=1) {
-		cat("SKIP (only one value) || ");
-		.incrementCounter("catSin.onevalue")
-	}
-	else if (length(uniqVar)==2) {		
-		cat("CAT-SINGLE-BINARY || ");
-	  .incrementCounter("catSin.case3")
-		# binary so logistic regression
-
-		phenoFactor = factor(pheno)
-		# binary - so logistic regression
-		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-		binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)	
-	}
-	else {
-		# > 2 categories
-		if (is.na(ordered)) {
-			cat(" ERROR: 'ordered' not found in data code info file")	
-		}
-		else {
-
-		## unordered
-		if (ordered == 0) {
-			
-			cat("CAT-SINGLE-UNORDERED || ")
-		  .incrementCounter("catSin.case2")
-
-			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-			testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
-			
-		}
-		else if (ordered == 1) {
-		
-			## ordered
-			cat("ordered || ");
-		  .incrementCounter("catSin.case1")
-
-			## reorder variable values into increasing order
-			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx, order)
-		
-		}
-		else if (ordered == -2) {
-			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
-		  .incrementCounter( "catSin.binaryorexcluded")
-		}
-		else {
-			print(paste("ERROR", varName, varType, dataCode));
-		}
-		}
-	}
+  	cat("CAT-SINGLE || ")
+  	pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
+  	isExposure <- getIsExposure(vl, varName)
+  
+  	# assert variable has only one column
+  	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
+  
+  	pheno = reassignValue(vl, pheno, varName)
+  
+  	# get data code info - whether this data code is ordinal or not and any reordering
+          dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
+          dataCode = dataPheno$DATA_CODING;
+  
+  	# get data coding information	
+         	dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
+  	if (length(dataCodeRow)==0) {
+                  cat("ERROR: No row in data coding info file || ");
+  		return(NULL);
+          }
+  	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
+          ordered = dataDataCode$ordinal;
+          order = as.character(dataDataCode$ordering);
+  
+  	## reorder variable values into increasing order (we do this now as this may convert variable to binary rather than ordered)
+    pheno = .reorderOrderedCategory(pheno,order);
+  
+  	## if data code has a default_value then recode NA's to this value for participants with value in default_related_field
+  	## this is used where there is no zero option e.g. field 100200
+  	defaultValue = dataDataCode$default_value
+  	defaultRelatedID = dataDataCode$default_related_field
+  	pheno = .setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
+  
+          ## all categories coded as <0 we assume are `missing' values
+          pheno = .replaceMissingCodes(pheno)
+  
+  	## remove categories if < 10 examples
+  	pheno = .testNumExamples(pheno)
+  
+  	uniqVar = unique(na.omit(pheno))
+  	uniqVar = sort(uniqVar)
+  
+  	if (length(uniqVar)<=1) {
+  		cat("SKIP (only one value) || ");
+  		.incrementCounter("catSin.onevalue")
+  	}
+  	else if (length(uniqVar)==2) {		
+  		cat("CAT-SINGLE-BINARY || ");
+  	  .incrementCounter("catSin.case3")
+  		# binary so logistic regression
+  
+  		phenoFactor = factor(pheno)
+  		# binary - so logistic regression
+  		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
+  		binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)	
+  	}
+  	else {
+  		# > 2 categories
+  		if (is.na(ordered)) {
+  			cat(" ERROR: 'ordered' not found in data code info file")	
+  		}
+  		else {
+  
+  		## unordered
+  		if (ordered == 0) {
+  			
+  			cat("CAT-SINGLE-UNORDERED || ")
+  		  .incrementCounter("catSin.case2")
+  
+  			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
+  			testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
+  			
+  		}
+  		else if (ordered == 1) {
+  		
+  			## ordered
+  			cat("ordered || ");
+  		  .incrementCounter("catSin.case1")
+  
+  			## reorder variable values into increasing order
+  			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
+  			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx, order)
+  		
+  		}
+  		else if (ordered == -2) {
+  			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
+  		  .incrementCounter( "catSin.binaryorexcluded")
+  		}
+  		else {
+  			print(paste("ERROR", varName, varType, dataCode));
+  		}
+  		}
+  	}
 }
 
 ## values are reordered and assigned values 1:N for N categories
-reorderOrderedCategory <- function(pheno,order) {
-
-	## new pheno of NAs (all values not in order are assumed to be NA)
-
-	if (!is.na(order) && nchar(order)>0) {
-
-		# make empty pheno		
-		pheno2 = rep(NA,length(pheno));
-			
-		## get ordering
-		orderParts = unlist(strsplit(order,"\\|"));
-		
-		# go through values in correct order and set value
-		# from 1 to the number of values
-		count=1;
-		for(i in orderParts) {
-			idx = which(pheno==i);
-			pheno2[idx] = count;
-		
-			count=count+1;
-		}
-		
-		cat("reorder ",order," || ",sep="");
-		
-		return(pheno2)
-	}	
-	else {
-		return(pheno);
-	}
-	
+.reorderOrderedCategory <- function(pheno,order) {
+  	## new pheno of NAs (all values not in order are assumed to be NA)
+  	if (!is.na(order) && nchar(order)>0) {
+    		# make empty pheno		
+    		pheno2 <- rep(NA,length(pheno))
+    		## get ordering
+    		orderParts <- unlist(strsplit(order,"\\|"))
+    		
+    		# go through values in correct order and set value
+    		# from 1 to the number of values
+    		count <- 1
+    		for (i in orderParts) {
+      			idx <- which(pheno==i)
+      			pheno2[idx] <- count
+      			count <- count + 1
+    		}
+    		cat("reorder ",order," || ",sep="");
+    		return(pheno2)
+  	}	else {
+  		  return(pheno)
+  	}
 }
 
 ## sets default value for people with no value in pheno, but with a value in the
 ## field specified in the default_value_related_field column in the data coding info file.
 ## the default value is specified in the default_value column in the data coding info file.
-setDefaultValue <- function(vl, pheno, defaultValue, defaultRelatedID, userID) {
-
-
-	if (!is.na(defaultValue) && nchar(defaultValue)>0) {
-
-		# remove people who have no value for indicator variable
-	       	indName = paste("x",defaultRelatedID,"_0_0",sep="");
-
-	     	cat("Default related field: ", indName, " || ", sep="");
-		indicatorVar = vl$indicatorFields[,indName]
-		indvarx = merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
-                indicatorVar = indvarx[,indName]
-
+.setDefaultValue <- function(vl, pheno, defaultValue, defaultRelatedID, userID) {
+	  if (!is.na(defaultValue) && nchar(defaultValue)>0) {
+		    # remove people who have no value for indicator variable
+	      indName <- paste("x",defaultRelatedID,"_0_0",sep="")
+
+	     	cat("Default related field: ", indName, " || ", sep="")
+    		indicatorVar <- vl$indicatorFields[,indName]
+    		indvarx <- merge(userID, vl$indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+        indicatorVar <- indvarx[,indName]
+    
 	    	# remove participants with NA value in this related field
-	    	indicatorVar = .replaceNaN(indicatorVar)
-
-		# check if there are already examples with default value and if so display warning
-		numWithDefault = length(which(pheno==defaultValue))
-		if (numWithDefault>0) {
-			cat("(WARNING: already ", numWithDefault, " values with default value) ", sep="")
-		}
-		
-		# set default value in people who have no value in the pheno but do have a value in the default_value_related_field
-	    	defaultIdxs = which(!is.na(indicatorVar) & is.na(pheno))
-		pheno[defaultIdxs] = defaultValue
-
-	       	cat("default value ", defaultValue, " set, N= ", length(defaultIdxs), " || ", sep="");
-
-	}
-
-	return(pheno)
-
+	    	indicatorVar <- .replaceNaN(indicatorVar)
+    
+    		# check if there are already examples with default value and if so display warning
+    		numWithDefault <- length(which(pheno==defaultValue))
+    		if (numWithDefault>0) {
+    			  cat("(WARNING: already ", numWithDefault, " values with default value) ", sep="")
+    		}
+    		
+    		# set default value in people who have no value in the pheno but do have a value in the default_value_related_field
+    	  defaultIdxs <- which(!is.na(indicatorVar) & is.na(pheno))
+    		pheno[defaultIdxs] <- defaultValue
+    	  cat("default value ", defaultValue, " set, N= ", length(defaultIdxs), " || ", sep="");
+	  }
+	  return(pheno)
 }
 
 
 # Replace negative values with NA as these are assumed to be missing
 .replaceMissingCodes <- function(pheno) {
-  phenoReplaced <- pheno
-  phenoReplaced[phenoReplaced <0 ] <- NA
-  return(phenoReplaced)
+    phenoReplaced <- pheno
+    phenoReplaced[phenoReplaced <0 ] <- NA
+    return(phenoReplaced)
 }
diff --git a/PHESANT/R/testCategoricalUnordered.r b/PHESANT/R/testCategoricalUnordered.r
index 177020c..639ecd0 100644
--- a/PHESANT/R/testCategoricalUnordered.r
+++ b/PHESANT/R/testCategoricalUnordered.r
@@ -147,29 +147,16 @@ testCategoricalUnordered <- function(opt, vl, varName, varType, thisdata, phenoS
 
 # find reference category - category with most number of examples
 chooseReferenceCategory <- function(pheno) {
-
-	uniqVar = unique(na.omit(pheno));
-	phenoFactor = factor(pheno)
-
-	maxFreq=0;
-	maxFreqVar = "";
-	for (u in uniqVar) {
-		withValIdx = which(pheno==u)
-		numWithVal = length(withValIdx);
-		if (numWithVal>maxFreq) {
-			maxFreq = numWithVal;
-			maxFreqVar = u;
-		}
-	}
-
-	cat("reference: ", maxFreqVar,"=",maxFreq, " || ", sep="");
-		
-	## choose reference (category with largest frequency)
-	phenoFactor <- relevel(phenoFactor, ref = paste("",maxFreqVar,sep=""))
-	
-	return(phenoFactor);
+    freq  <- summary(factor(na.omit(pheno)))
+    maxFreq <- max(freq)
+    maxFreqVar <- names(which(freq == max(freq)))[1]
+    cat("reference: ", maxFreqVar,"=", maxFreq, " || ", sep="")
+    
+    ## choose reference (category with largest frequency)
+  	phenoFactor <- factor(pheno)
+  	phenoFactor <- relevel(phenoFactor, ref = maxFreqVar)
+  	return(phenoFactor)
 }
 
 
-
diff --git a/PHESANT/man/chooseReferenceCategory.Rd b/PHESANT/man/chooseReferenceCategory.Rd
new file mode 100644
index 0000000..210767a
--- /dev/null
+++ b/PHESANT/man/chooseReferenceCategory.Rd
@@ -0,0 +1,18 @@
+\name{chooseReferenceCategory}
+\alias{chooseReferenceCategory}
+\title{
+Choose Reference Category 
+}
+\description{
+Convert an integer vector with possible NA's to a factor and use the category/factor with most number of examples as reference.
+}
+\usage{
+chooseReferenceCategory(pheno)
+}
+\arguments{
+  \item{pheno}{ An integer vector with possible NA's. }
+}
+
+\value{
+  A factor with the most number of examples as reference.
+}
diff --git a/PHESANT/man/option_list.Rd b/PHESANT/man/option_list.Rd
new file mode 100644
index 0000000..21a696c
--- /dev/null
+++ b/PHESANT/man/option_list.Rd
@@ -0,0 +1,9 @@
+\name{option_list}
+\alias{option_list}
+\docType{data}
+\title{
+User Input Option List
+}
+
+
+
diff --git a/PHESANT/man/pkg.env.Rd b/PHESANT/man/pkg.env.Rd
new file mode 100644
index 0000000..0a02f01
--- /dev/null
+++ b/PHESANT/man/pkg.env.Rd
@@ -0,0 +1,11 @@
+\name{pkg.env}
+\alias{pkg.env}
+\docType{data}
+\title{
+package level envrionment
+}
+\description{
+A package level envrionment used to hold package level variables. 
+}
+
+\keyword{datasets}

From 390b2bfbaad079db64e8e95ee8288924dfce66c6 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 17:11:12 -0400
Subject: [PATCH 23/27] Code refactor and fix the buggy "end" word in function
 setOrderString

---
 PHESANT/R/cleanData.R              |  2 +-
 PHESANT/R/testCatMultiple.r        |  2 +-
 PHESANT/R/testCatSingle.r          |  2 +-
 PHESANT/R/testCategoricalOrdered.r | 60 +++++++++++++-----------------
 PHESANT/R/testContinuous.r         |  2 +-
 PHESANT/R/testInteger.r            |  2 +-
 6 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/PHESANT/R/cleanData.R b/PHESANT/R/cleanData.R
index adf98ae..fa78a52 100644
--- a/PHESANT/R/cleanData.R
+++ b/PHESANT/R/cleanData.R
@@ -17,7 +17,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # Reassigns values as specified in data coding info file
-reassignValue <- function(vl, pheno, varName) {
+.reassignValue <- function(vl, pheno, varName) {
   	# get data code info - whether this data code is ordinal or not and any reordering and resassignments
     dataPheno <- vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),]
     dataCode <- dataPheno$DATA_CODING
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index ab90ef2..b9f7573 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -27,7 +27,7 @@ testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoSt
   	cat("CAT-MULTIPLE || ")
   
   	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
-  	pheno = reassignValue(vl, pheno, varName)
+  	pheno = .reassignValue(vl, pheno, varName)
   
   	## get unique values from all columns of this variable
   	uniqueValues = unique(na.omit(pheno[,1]));
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 7b0b509..85d988b 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -31,7 +31,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
   	# assert variable has only one column
   	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
   
-  	pheno = reassignValue(vl, pheno, varName)
+  	pheno = .reassignValue(vl, pheno, varName)
   
   	# get data code info - whether this data code is ordinal or not and any reordering
           dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index efbce8b..7089ff7 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -26,13 +26,12 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 
 	cat("CAT-ORD || ");
 	.incrementCounter("ordCat")
-
-	doCatOrdAssertions(pheno)
+	.doCatOrdAssertions(pheno)
 
 	uniqVar = unique(na.omit(pheno));
 
 	# log the ordering of categories used
-	orderStr = setOrderString(orderStr, uniqVar);
+	orderStr = .setOrderString(orderStr, uniqVar);
 	cat("order: ", orderStr, " || ",  sep="");
 
 	# check sample size
@@ -115,46 +114,39 @@ testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoSta
 # check that the phenotype is valid - that there are more than two categories
 # and that these all have at least 10 cases
 # something has gone wrong if this is the case
-doCatOrdAssertions <- function(pheno) {
-
-	# assert variable has only one column    
-    	if (!is.null(dim(pheno))) stop("More than one column for categorical ordered")
-
-		uniqVar = unique(na.omit(pheno));
-	
+.doCatOrdAssertions <- function(pheno) {
+  	# assert variable has only one column    
+    if (!is.null(dim(pheno))) stop("More than one column for categorical ordered")
+		uniqVar <- unique(na.omit(pheno))
 		# assert more than 2 categories
 		if (length(uniqVar)<=1) stop("1 or zero values")
 		if (length(uniqVar)==2) stop("this variable is binary")
 
 		# assert each value has >= 10 examples
 		for (u in uniqVar) {
-     			withValIdx = which(pheno==u)
-        		numWithVal = length(withValIdx);
-		
-			if (numWithVal<10) stop("value with <10 examples")
-		}
+        withValIdx <- which(pheno==u)
+        numWithVal <- length(withValIdx)
+			  if (numWithVal<10) stop("value with <10 examples")
+    }
 }
 
 # If data coding file does not specify an order then we use the default order as in coding defined by Biobank
 # and this function just generates a string with this order for logging purposes
-setOrderString <- function(orderStr, uniqVar) {
-
-	if (is.na(orderStr) || nchar(orderStr)==0) {
-
-		orderStr="";
-
-		# create order str by appending each value
-                uniqVarSorted = sort(uniqVar);
-                first=1;
-                for (i in uniqVarSorted) {
-                        if (first==0) {
-                                orderStr = paste(orderStr, "|",	sep="");
-                        }
-			if (i>=0) # ignore missing values
-                        	orderStr = paste(orderStr, i, sep="");
-				first=0;
-			end
-                }
+.setOrderString <- function(orderStr, uniqVar) {
+  	if (is.na(orderStr) || nchar(orderStr)==0) {
+    		orderStr <- ""
+    		# create order str by appending each value
+        uniqVarSorted <- sort(uniqVar)
+        first <- TRUE
+        for (i in uniqVarSorted) {
+            if (!first) {
+                orderStr <- paste(orderStr, "|",	sep="")
+            }
+            if (i >= 0)  {# ignore missing values
+                	orderStr <- paste(orderStr, i, sep="")
+            }
+            first <- FALSE
         }
-	return(orderStr);
+    }
+  	return(orderStr)
 }
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index c19214c..aeef1a6 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -21,7 +21,7 @@ testContinuous <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
   	cat("CONTINUOUS MAIN || ")	
   	pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
   	# reassign values
-    pheno <- reassignValue(vl, pheno, varName)
+    pheno <- .reassignValue(vl, pheno, varName)
   	thisdata[,phenoStartIdx:ncol(thisdata)] <- pheno
   	.testContinuous2(opt, vl, varName, varType, thisdata, phenoStartIdx)
 }
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 863b5bf..98735c9 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -33,7 +33,7 @@ testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 		return(NULL)
 	}
 
-	pheno = reassignValue(vl, pheno, varName)
+	pheno = .reassignValue(vl, pheno, varName)
 
 	## average if multiple columns
 	if (!is.null(dim(pheno))) {

From c55bbea85e6d9a8df61ab45e0e4367705f9f5fe0 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 17:25:05 -0400
Subject: [PATCH 24/27] Code clean for exposure related functions

---
 PHESANT/R/exposure.r | 113 +++++++++++++++++--------------------------
 1 file changed, 45 insertions(+), 68 deletions(-)

diff --git a/PHESANT/R/exposure.r b/PHESANT/R/exposure.r
index a05d561..7a05247 100644
--- a/PHESANT/R/exposure.r
+++ b/PHESANT/R/exposure.r
@@ -21,13 +21,9 @@
 # in the variable information file
 # to determine if values of cat mult fields (not the whole field) are exposure values, use getIsCatMultExposure function instead.
 getIsExposure <- function(vl, varName) {
-
-	idx=which(vl$phenoInfo$FieldID==varName)
-        isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
-        if (!is.na(isExposure) & isExposure=="YES") {
-		return(TRUE)
-    	}
-	return(FALSE)
+    idx <- which(vl$phenoInfo$FieldID==varName)
+    isExposure <- vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+    return(!is.na(isExposure) && isExposure=="YES")
 }
 
 # looks up categorical multiple field in the variable info file, return
@@ -36,74 +32,55 @@ getIsExposure <- function(vl, varName) {
 # as a trait of interest in the TRAIT_OF_INTEREST column (multiple values are
 # separated by "|" in this field
 getIsCatMultExposure <- function(vl, varName, varValue) {
-  
-  # get row index of field in variable information file
-  idx=which(vl$phenoInfo$FieldID==varName)
-  
-  # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
-  # cat mult values denoting exposure variable
-  isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
-  
-  if (!is.na(isExposure) & isExposure!="") {
-    
-    isExposure = as.character(isExposure)
-    
-    ## first check if value is YES, then all values are exposure traits
-    if (isExposure == "YES") {
-      cat("IS_CM_ALL_EXPOSURE || ")
-      return(TRUE)
+    # get row index of field in variable information file
+    idx <- which(vl$phenoInfo$FieldID==varName)
+    # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+    # cat mult values denoting exposure variable
+    isExposure <- vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+    if (!is.na(isExposure) && isExposure!="") {
+        isExposure <- as.character(isExposure)
+        ## first check if value is YES, then all values are exposure traits
+        if (isExposure == "YES") {
+            cat("IS_CM_ALL_EXPOSURE || ")
+            return(TRUE)
+        }
+      
+        ## try to split by |, to set particular values as exposure
+        # split into variable Values
+        exposureValues = unlist(strsplit(isExposure,"\\|"))
+        # for each value stated, check whether it is varValue
+        for (thisVal in exposureValues) {
+            if (thisVal == varValue) {
+                cat("IS_CM_EXPOSURE || ")
+                return(TRUE)
+            }
+        }
     }
     
-    ## try to split by |, to set particular values as exposure
-    
-    # split into variable Values
-    exposureValues = unlist(strsplit(isExposure,"\\|"))
-    
-    # for each value stated, check whether it is varValue
-    for (thisVal in exposureValues) {
-      if (thisVal == varValue) {
-        cat("IS_CM_EXPOSURE || ")
-        return(TRUE)
-      }
-    }
-  }
-  
-  # varValue is not in list of exposure values
-  return(FALSE)
-  
+    # varValue is not in list of exposure values
+    return(FALSE)
 }
 
 # looks up categorical multiple field in the variable info file, return
 # number of values denoted as trait of interest.
 # returns zero if whole field is denoted trait of interest, not particular values.
 getNumValuesCatMultExposure <- function(vl, varName) {
-  
-  # get row index of field in variable information file
-  idx=which(vl$phenoInfo$FieldID==varName)
-  
-  # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
-  # cat mult values denoting exposure variable
-  isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
-  
-  if (!is.na(isExposure) & isExposure!="") {
-    
-    isExposure = as.character(isExposure)
-    
-    ## first check if value is YES, then no partic values are traits of interest
-    if (isExposure == "YES") {
-      return(0)
+    # get row index of field in variable information file
+    idx <- which(vl$phenoInfo$FieldID==varName)
+    # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+    # cat mult values denoting exposure variable
+    isExposure <- vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+    if (!is.na(isExposure) & isExposure!="") {
+        isExposure <- as.character(isExposure)
+        ## first check if value is YES, then no partic values are traits of interest
+        if (isExposure == "YES") {
+            return(0)
+        }
+        ## try to split by |, to set particular values as exposure
+        # split into variable Values
+        exposureValues <- unlist(strsplit(isExposure,"\\|"))
+        return(length(exposureValues))
     }
-    
-    ## try to split by |, to set particular values as exposure
-    
-    # split into variable Values
-    exposureValues = unlist(strsplit(isExposure,"\\|"))
-    
-    return(length(exposureValues))
-    
-  }
-  
-  # varValue is not in list of exposure values
-  return(0)
-  
+    # varValue is not in list of exposure values
+    return(0)
 }

From 0871bb48c3dcde9f894027189cc2adebc9ef42a0 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Fri, 15 Mar 2019 17:40:31 -0400
Subject: [PATCH 25/27] Code refactor

---
 PHESANT/R/run.r   |  4 ++--
 PHESANT/R/utils.r | 47 +++++++++++++++++++++--------------------------
 2 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/PHESANT/R/run.r b/PHESANT/R/run.r
index b4cf1c0..8d43b88 100644
--- a/PHESANT/R/run.r
+++ b/PHESANT/R/run.r
@@ -30,7 +30,7 @@ run <- function(opt) {
         } else {
           ## new variable so run test for previous (we have collected all the columns now)
           if (first==FALSE) {
-            thisdata <- makeTestDataFrame(input$data, input$confounders, currentVarValues)
+            thisdata <- .makeTestDataFrame(input$data, input$confounders, currentVarValues)
             testAssociations(opt, input$vl, currentVar, currentVarShort, thisdata, input$phenoStartIdx)
           }
           first <- FALSE
@@ -47,7 +47,7 @@ run <- function(opt) {
     
     if (phenoIdx>0){
         # last variable so test association
-        thisdata = makeTestDataFrame(input$data, input$confounders, currentVarValues)
+        thisdata = .makeTestDataFrame(input$data, input$confounders, currentVarValues)
         testAssociations(opt, input$vl, currentVar, currentVarShort, thisdata, input$phenoStartIdx)
     }
     sink()
diff --git a/PHESANT/R/utils.r b/PHESANT/R/utils.r
index 2e560fb..563d949 100644
--- a/PHESANT/R/utils.r
+++ b/PHESANT/R/utils.r
@@ -15,7 +15,7 @@
 }
 
 ## makes a smaller data frame containing the data for a particular test
-makeTestDataFrame <- function(datax, confounders, currentVarValues) {
+.makeTestDataFrame <- function(datax, confounders, currentVarValues) {
 			thisdata <- datax[,c("geno", "userID")]
 			thisdata <- merge(thisdata, confounders, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
 			currentVarValues <- cbind.data.frame(datax$userID, currentVarValues)
@@ -43,7 +43,7 @@ processArgs <- function(opt, opt_parser) {
       		opt$sensitivity <- FALSE;
       		opt$genetic <- TRUE;
       	}
-    	  opt <- processParts(opt, opt_parser,opt$partIdx, opt$numParts)
+    	  opt <- .processParts(opt, opt_parser,opt$partIdx, opt$numParts)
     }
     else {
     	## check arguments are supplied correctly
@@ -88,7 +88,7 @@ processArgs <- function(opt, opt_parser) {
     	else if (!file.exists(opt$resDir)) {
     		  stop(paste("results directory resDir=", opt$resDir, " does not exist", sep=""), call.=FALSE)
     	}
-    	opt <-processParts(opt, opt_parser,opt$partIdx, opt$numParts);
+    	opt <-.processParts(opt, opt_parser,opt$partIdx, opt$numParts);
     }
     if (opt$save==TRUE) {
     	  print("Saving phenotypes to file. Tests of association will not run!")
@@ -97,27 +97,22 @@ processArgs <- function(opt, opt_parser) {
 }
 
 # Parse the 'part' arguments and check they are valid
-processParts <- function(opt, opt_parser, pIdx, nParts) {
-
-	if (is.null(pIdx) && is.null(nParts)) {
-                opt$varTypeArg <- "all";
-		print(paste("Running with all traits in phenotype file:", opt$phenofile));
-        }
-	else if (is.null(pIdx)) {
-                print_help(opt_parser)
-                stop("pIdx argument must be supplied when nParts argument is supplied", call.=FALSE)
-        }
-	else if (is.null(nParts)) {
-                print_help(opt_parser)
-                stop("nParts argument must be supplied when pIdx argument is supplied", call.=FALSE)
-        }
-	else if (pIdx<1 || pIdx>nParts) {
-		print_help(opt_parser)
-                stop("pIdx arguments must be between 1 and nParts inclusive", call.=FALSE)
-	}
-	else {
-                opt$varTypeArg <- paste(pIdx, "-", nParts, sep="");
-        	print(paste("Running with part",pIdx,"of",nParts," in phenotype file:", opt$phenofile));
-	}
-  return(opt)
+.processParts <- function(opt, opt_parser, pIdx, nParts) {
+  	if (is.null(pIdx) && is.null(nParts)) {
+        opt$varTypeArg <- "all";
+  		  print(paste("Running with all traits in phenotype file:", opt$phenofile));
+    } else if (is.null(pIdx)) {
+        print_help(opt_parser)
+        stop("pIdx argument must be supplied when nParts argument is supplied", call.=FALSE)
+    } else if (is.null(nParts)) {
+        print_help(opt_parser)
+        stop("nParts argument must be supplied when pIdx argument is supplied", call.=FALSE)
+    } else if (pIdx<1 || pIdx>nParts) {
+  		  print_help(opt_parser)
+        stop("pIdx arguments must be between 1 and nParts inclusive", call.=FALSE)
+  	} else {
+        opt$varTypeArg <- paste(pIdx, "-", nParts, sep="");
+        print(paste("Running with part",pIdx,"of",nParts," in phenotype file:", opt$phenofile));
+  	}
+    return(opt)
 }

From 0bc16c247cdd085cd1a095673275a8e4987c9ca1 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Sat, 16 Mar 2019 13:12:05 -0400
Subject: [PATCH 26/27] Code refactor and documentation: continued

---
 PHESANT/R/exposure.r                   |   4 +-
 PHESANT/R/testAssociations.r           | 201 ++++++--------
 PHESANT/R/testCatMultiple.r            |  88 +++---
 PHESANT/R/testCatSingle.r              | 133 ++++-----
 PHESANT/R/testContinuous.r             | 361 +++++++++++--------------
 PHESANT/R/testInteger.r                | 109 ++++----
 PHESANT/R/utils.r                      |  10 +-
 PHESANT/man/getIsExposure.Rd           |  21 ++
 PHESANT/man/option_list.Rd             |   4 +-
 PHESANT/man/parseOptions.Rd            |  15 +
 PHESANT/man/run.Rd                     |  23 ++
 PHESANT/man/testAssociations.Rd        |  27 ++
 PHESANT/man/testCategoricalMultiple.Rd |  34 +++
 PHESANT/man/testCategoricalSingle.Rd   |  32 +++
 PHESANT/man/testContinuous.Rd          |  34 +++
 PHESANT/man/testInteger.Rd             |  32 +++
 PHESANT/phenomeScan.r                  |   4 +-
 17 files changed, 630 insertions(+), 502 deletions(-)
 create mode 100644 PHESANT/man/getIsExposure.Rd
 create mode 100644 PHESANT/man/parseOptions.Rd
 create mode 100644 PHESANT/man/run.Rd
 create mode 100644 PHESANT/man/testAssociations.Rd
 create mode 100644 PHESANT/man/testCategoricalMultiple.Rd
 create mode 100644 PHESANT/man/testCategoricalSingle.Rd
 create mode 100644 PHESANT/man/testContinuous.Rd
 create mode 100644 PHESANT/man/testInteger.Rd

diff --git a/PHESANT/R/exposure.r b/PHESANT/R/exposure.r
index 7a05247..fcbfcdf 100644
--- a/PHESANT/R/exposure.r
+++ b/PHESANT/R/exposure.r
@@ -31,7 +31,7 @@ getIsExposure <- function(vl, varName) {
 # this field denote the exposure), or whether varName has varValue stated 
 # as a trait of interest in the TRAIT_OF_INTEREST column (multiple values are
 # separated by "|" in this field
-getIsCatMultExposure <- function(vl, varName, varValue) {
+.getIsCatMultExposure <- function(vl, varName, varValue) {
     # get row index of field in variable information file
     idx <- which(vl$phenoInfo$FieldID==varName)
     # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
@@ -64,7 +64,7 @@ getIsCatMultExposure <- function(vl, varName, varValue) {
 # looks up categorical multiple field in the variable info file, return
 # number of values denoted as trait of interest.
 # returns zero if whole field is denoted trait of interest, not particular values.
-getNumValuesCatMultExposure <- function(vl, varName) {
+.getNumValuesCatMultExposure <- function(vl, varName) {
     # get row index of field in variable information file
     idx <- which(vl$phenoInfo$FieldID==varName)
     # may be empty of may contain VALUE1|VALUE2 etc .. to denote those
diff --git a/PHESANT/R/testAssociations.r b/PHESANT/R/testAssociations.r
index 951e704..016e1fc 100644
--- a/PHESANT/R/testAssociations.r
+++ b/PHESANT/R/testAssociations.r
@@ -16,124 +16,93 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-
 # Tests the association of a field, determined by its field type
 testAssociations <- function(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx) {
-
-	## call file for variable type
-
-	tryCatch({
-
-		# retrieve whether phenotype is excluded etc
-		idx=which(vl$phenoInfo$FieldID==currentVarShort);
-
-		# check if variable info is found for this field
-		if (length(idx)==0) {
-			cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
-		  .incrementCounter("notinphenofile")
-		}
-		else {
-
-		# get info from variable info file
-		excluded = vl$phenoInfo$EXCLUDED[idx]
-		catSinToMult = vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT[idx]
-		fieldType = vl$phenoInfo$ValueType[idx]
-		isExposure = getIsExposure(vl, currentVarShort) #vl$phenoInfo$EXPOSURE_PHENOTYPE[idx]
-
-		if (fieldType=="Integer") {		
-
-			#### INTEGER
-			cat(currentVar, "|| ", sep="")
-			
-			if (excluded!="") {
-				cat(paste("Excluded integer: ", excluded, " || ", sep=""))
-			  .incrementCounter("excluded.int")
-			}
-			else {
-			  .incrementCounter("start.int")
-				if (isExposure==TRUE) {
-				  .incrementCounter("start.exposure.int")
-				}
-
-			    	testInteger(opt, vl, currentVarShort, "INTEGER", thisdata, phenoStartIdx);
-			}
-			cat("\n");
-	    	}
-		else if (fieldType=="Continuous") {
-
-			#### CONTINUOUS
-			cat(currentVar, "|| ", sep="")
-
-		    	if (excluded!="") {
-				cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
-		    	  .incrementCounter("excluded.cont")
-		    	}
-			else {
-			  .incrementCounter("start.cont")
-				if (isExposure==TRUE) {
-				  .incrementCounter("start.exposure.cont")
-                                }
-				testContinuous(opt, vl, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx);
-	        	}
-	        	cat("\n");
-		}
-	    	else if (fieldType=="Categorical single" && catSinToMult=="") {
-
-			#### CAT SINGLE
-			cat(currentVar, "|| ", sep="")
-	
-	    		if (excluded!="") {
-				cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
-	    		  .incrementCounter("excluded.catSin")
-			}
-			else {
-			  .incrementCounter("start.catSin")
-				if (isExposure==TRUE) {
-				  .incrementCounter("start.exposure.catSin")
-                                }
-			    testCategoricalSingle(opt, vl, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx);
-			}
-			cat("\n");
-	  	}
-		else if (fieldType=="Categorical multiple" || catSinToMult!="") {
-		
-			#### CAT MULTIPLE
-			cat(currentVar, "|| ", sep="")
-
-			if (excluded!="") {
-				cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
-			  .incrementCounter("excluded.catMul")
-			}
-			else {
-
-				if (catSinToMult!="") {
-					cat("cat-single to cat-multiple || ", sep="")
-				  .incrementCounter("catSinToCatMul")
-				}
-
-			  .incrementCounter("start.catMul")	
-				if (isExposure==TRUE) {
-				  .incrementCounter("start.exposure.catMul")
-                                }
-				else {
-					# get number of cat mult values denoting trait of interest
-	                                numVals = getNumValuesCatMultExposure(vl, currentVarShort)
-					if (numVals>0) {
-		                                .addToCounts("start.exposure.catMulvalues", numVals)
-					}
-				}
-		        	testCategoricalMultiple(opt, vl, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx);
-			}
-			cat("\n");
-		}
-		else {
-	        	#cat("VAR MISSING ", currentVarShort, "\n", sep="");
-	    	}
-		}
-
-	}, error = function(e) {
-		print(paste("ERROR:", currentVar,e))
-	})
+  	## call coresponding test function for variable type
+  	tryCatch({
+    		# retrieve whether phenotype is excluded etc
+    		idx=which(vl$phenoInfo$FieldID==currentVarShort);
+    		# check if variable info is found for this field
+    		if (length(idx)==0) {
+    			  cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
+    		    .incrementCounter("notinphenofile")
+    		} else {
+        		# get info from variable info file
+        		excluded = vl$phenoInfo$EXCLUDED[idx]
+        		catSinToMult = vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT[idx]
+        		fieldType = vl$phenoInfo$ValueType[idx]
+        		isExposure = getIsExposure(vl, currentVarShort) #vl$phenoInfo$EXPOSURE_PHENOTYPE[idx]
+        
+        		if (fieldType=="Integer") {		
+        			  #### INTEGER
+        			  cat(currentVar, "|| ", sep="")
+        			  if (excluded!="") {
+        				    cat(paste("Excluded integer: ", excluded, " || ", sep=""))
+        			      .incrementCounter("excluded.int")
+        			  } else {
+        			      .incrementCounter("start.int")
+        				    if (isExposure==TRUE) {
+        				        .incrementCounter("start.exposure.int")
+        				    }
+        			    	testInteger(opt, vl, currentVarShort, "INTEGER", thisdata, phenoStartIdx)
+        			  }
+        			  cat("\n")
+        	  } else if (fieldType=="Continuous") {
+          			cat(currentVar, "|| ", sep="")
+          		  if (excluded!="") {
+          				  cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
+          		    	.incrementCounter("excluded.cont")
+          		  } else {
+          			    .incrementCounter("start.cont")
+          				  if (isExposure==TRUE) {
+          				      .incrementCounter("start.exposure.cont")
+                    }
+          				  testContinuous(opt, vl, currentVarShort, "CONTINUOUS", thisdata, phenoStartIdx)
+          	    }
+          	    cat("\n")
+        		} else if (fieldType=="Categorical single" && catSinToMult=="") {
+          			cat(currentVar, "|| ", sep="")
+          	    if (excluded!="") {
+          			    cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
+          	    		.incrementCounter("excluded.catSin")
+          			} else {
+          			    .incrementCounter("start.catSin")
+          				  if (isExposure==TRUE) {
+          				      .incrementCounter("start.exposure.catSin")
+                    }
+          			    testCategoricalSingle(opt, vl, currentVarShort, "CAT-SIN", thisdata, phenoStartIdx)
+          			}
+          			cat("\n")
+        	  } else if (fieldType=="Categorical multiple" || catSinToMult!="") {
+        			  cat(currentVar, "|| ", sep="")
+          			if (excluded!="") {
+          			    cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
+          			    .incrementCounter("excluded.catMul")
+          			} else {
+            				if (catSinToMult!="") {
+            					  cat("cat-single to cat-multiple || ", sep="")
+            				    .incrementCounter("catSinToCatMul")
+            				}
+            			  .incrementCounter("start.catMul")	
+            				if (isExposure==TRUE) {
+            				    .incrementCounter("start.exposure.catMul")
+                    } else {
+            				    # get number of cat mult values denoting trait of interest
+            	          numVals = .getNumValuesCatMultExposure(vl, currentVarShort)
+            					  if (numVals>0) {
+            		            .addToCounts("start.exposure.catMulvalues", numVals)
+            					  }
+            				}
+            		    testCategoricalMultiple(opt, vl, currentVarShort, "CAT-MUL", thisdata, phenoStartIdx)
+            			}
+            			cat("\n")
+        		} else {
+        	      #cat("VAR MISSING ", currentVarShort, "\n", sep="")
+        	  }
+        }
+  	}, error = function(e) {
+  		  print(paste("ERROR:", currentVar,e))
+  	})
 }
 
 
diff --git a/PHESANT/R/testCatMultiple.r b/PHESANT/R/testCatMultiple.r
index b9f7573..b9613af 100644
--- a/PHESANT/R/testCatMultiple.r
+++ b/PHESANT/R/testCatMultiple.r
@@ -25,67 +25,63 @@
 # 4) Calling binaryLogisticRegression function for this derived binary variable
 testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
   	cat("CAT-MULTIPLE || ")
-  
-  	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
-  	pheno = .reassignValue(vl, pheno, varName)
+  	pheno <- thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
+  	pheno <- .reassignValue(vl, pheno, varName)
   
   	## get unique values from all columns of this variable
-  	uniqueValues = unique(na.omit(pheno[,1]));
-  	numCols = ncol(pheno);
-  	numRows = nrow(pheno);
+  	uniqueValues <- unique(na.omit(pheno[,1]))
+  	numCols <- ncol(pheno)
+  	numRows <- nrow(pheno)
   	if (numCols>1) {
-  		for (num in 2:numCols) {
-  			u = unique(na.omit(pheno[,num]))
-  			uniqueValues = union(uniqueValues,u);
-  		}
+  		  for (num in 2:numCols) {
+  			    u <- unique(na.omit(pheno[,num]))
+  			    uniqueValues <- union(uniqueValues,u)
+  		  }
   	}
   
   	## for each value create a binary variable and test this
   	for (variableVal in uniqueValues) {
-  
-  		## numeric negative values we assume are missing - check this
-  		if(is.numeric(variableVal) & variableVal<0) {
-  			cat("SKIP_val:", variableVal," < 0", sep="");
-  			next;
-  		}
+  		  ## numeric negative values we assume are missing - check this
+  		  if(is.numeric(variableVal) & variableVal<0) {
+  			    cat("SKIP_val:", variableVal," < 0", sep="")
+  			    next
+  		  }
   	
-  		# make variable for this value
-  		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
-  		idxsTrue = idxForVar[,"row"]
+  		  # make variable for this value
+  		  idxForVar <- which(pheno == variableVal, arr.ind=TRUE)
+  		  idxsTrue <- idxForVar[,"row"]
   
-  		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
-  		.incrementCounter("catMul.binary")
+  		  cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="")
+  		  .incrementCounter("catMul.binary")
   		
-  		# make zero vector and set 1s for those with this variable value
-  		varBinary = rep.int(0,numRows);
-  		varBinary[idxsTrue] = 1;
-  		varBinaryFactor = factor(varBinary)
+  		  # make zero vector and set 1s for those with this variable value
+  		  varBinary <- rep.int(0,numRows)
+  		  varBinary[idxsTrue] <- 1
+  		  varBinaryFactor <- factor(varBinary)
   
-  		## data for this new binary variable
-  		newthisdata = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], varBinaryFactor)
+  		  ## data for this new binary variable
+  		  newthisdata <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], varBinaryFactor)
   
-  		## one of 3 ways to decide which examples are negative
-          	idxsToRemove = .restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
+  		  ## one of 3 ways to decide which examples are negative
+        idxsToRemove <- .restrictSample(vl, varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
   
-  		if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
-  			newthisdata = newthisdata[-idxsToRemove,]
-  		}
+  		  if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
+  			    newthisdata <- newthisdata[-idxsToRemove,]
+  		  }
   
-  		facLevels = levels(newthisdata[,phenoStartIdx])		
-  		idxTrue = length(which(newthisdata[,phenoStartIdx]==facLevels[1]))
-  	        idxFalse = length(which(newthisdata[,phenoStartIdx]==facLevels[2]))
+  		  facLevels <- levels(newthisdata[,phenoStartIdx])		
+  		  idxTrue <- length(which(newthisdata[,phenoStartIdx]==facLevels[1]))
+  	    idxFalse <- length(which(newthisdata[,phenoStartIdx]==facLevels[2]))
                   
-  	        if (idxTrue<10 || idxFalse<10) {
-  	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
-  			.incrementCounter("catMul.10")
-  	        }
-  		else {
-  			isExposure = getIsCatMultExposure(vl, varName, variableVal)
-  
-  			.incrementCounter("catMul.over10")
+  	    if (idxTrue<10 || idxFalse<10) {
+  	        cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="")
+  			    .incrementCounter("catMul.10")
+  	    } else {
+  			    isExposure <- .getIsCatMultExposure(vl, varName, variableVal)
+  			    .incrementCounter("catMul.over10")
   		     	# binary - so logistic regression
-  			binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
-  		}
+  			    binaryLogisticRegression(opt, paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure, phenoStartIdx)
+  		  }
   	}
 }
 
@@ -94,7 +90,7 @@ testCategoricalMultiple <- function(opt, vl, varName, varType, thisdata, phenoSt
 # returns idx's that should be removed from the sample
 .restrictSample <- function(vl, varName,pheno,variableVal, userID) {
   	# get definition for sample for this variable either NO_NAN, ALL or a variable ID
-  	varIndicator = vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
+  	varIndicator <- vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
   	return(.restrictSample2(vl, varName,pheno,varIndicator,variableVal, userID))
 }
 
diff --git a/PHESANT/R/testCatSingle.r b/PHESANT/R/testCatSingle.r
index 85d988b..bc812e1 100644
--- a/PHESANT/R/testCatSingle.r
+++ b/PHESANT/R/testCatSingle.r
@@ -30,91 +30,78 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
   
   	# assert variable has only one column
   	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
-  
-  	pheno = .reassignValue(vl, pheno, varName)
-  
+  	pheno <- .reassignValue(vl, pheno, varName)
+  	
   	# get data code info - whether this data code is ordinal or not and any reordering
-          dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
-          dataCode = dataPheno$DATA_CODING;
+    dataPheno <- vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),]
+    dataCode <- dataPheno$DATA_CODING
   
   	# get data coding information	
-         	dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
+    dataCodeRow <- which(vl$dataCodeInfo$dataCode==dataCode)
   	if (length(dataCodeRow)==0) {
-                  cat("ERROR: No row in data coding info file || ");
-  		return(NULL);
-          }
-  	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
-          ordered = dataDataCode$ordinal;
-          order = as.character(dataDataCode$ordering);
+        cat("ERROR: No row in data coding info file || ")
+  		  return
+    }
+  	dataDataCode <- vl$dataCodeInfo[dataCodeRow,]
+    ordered <- dataDataCode$ordinal
+    order <- as.character(dataDataCode$ordering)
   
   	## reorder variable values into increasing order (we do this now as this may convert variable to binary rather than ordered)
-    pheno = .reorderOrderedCategory(pheno,order);
+    pheno <- .reorderOrderedCategory(pheno,order)
   
   	## if data code has a default_value then recode NA's to this value for participants with value in default_related_field
   	## this is used where there is no zero option e.g. field 100200
-  	defaultValue = dataDataCode$default_value
-  	defaultRelatedID = dataDataCode$default_related_field
-  	pheno = .setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
+  	defaultValue <- dataDataCode$default_value
+  	defaultRelatedID <- dataDataCode$default_related_field
+  	pheno <- .setDefaultValue(vl, pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
   
-          ## all categories coded as <0 we assume are `missing' values
-          pheno = .replaceMissingCodes(pheno)
+    ## all categories coded as <0 we assume are `missing' values
+    pheno <- .replaceMissingCodes(pheno)
   
   	## remove categories if < 10 examples
-  	pheno = .testNumExamples(pheno)
-  
-  	uniqVar = unique(na.omit(pheno))
-  	uniqVar = sort(uniqVar)
-  
-  	if (length(uniqVar)<=1) {
-  		cat("SKIP (only one value) || ");
-  		.incrementCounter("catSin.onevalue")
-  	}
-  	else if (length(uniqVar)==2) {		
-  		cat("CAT-SINGLE-BINARY || ");
-  	  .incrementCounter("catSin.case3")
-  		# binary so logistic regression
+  	pheno <- .testNumExamples(pheno)
   
-  		phenoFactor = factor(pheno)
-  		# binary - so logistic regression
-  		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-  		binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)	
-  	}
-  	else {
-  		# > 2 categories
-  		if (is.na(ordered)) {
-  			cat(" ERROR: 'ordered' not found in data code info file")	
-  		}
-  		else {
-  
-  		## unordered
-  		if (ordered == 0) {
-  			
-  			cat("CAT-SINGLE-UNORDERED || ")
-  		  .incrementCounter("catSin.case2")
-  
-  			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-  			testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
-  			
-  		}
-  		else if (ordered == 1) {
-  		
-  			## ordered
-  			cat("ordered || ");
-  		  .incrementCounter("catSin.case1")
+  	uniqVar <- unique(na.omit(pheno))
+  	uniqVar <- sort(uniqVar)
   
-  			## reorder variable values into increasing order
-  			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno);
-  			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx, order)
-  		
-  		}
-  		else if (ordered == -2) {
-  			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
-  		  .incrementCounter( "catSin.binaryorexcluded")
-  		}
-  		else {
-  			print(paste("ERROR", varName, varType, dataCode));
-  		}
-  		}
+  	if (length(uniqVar)<=1) { 
+  		  cat("SKIP (only one value) || ")
+  		  .incrementCounter("catSin.onevalue")
+  	} else if (length(uniqVar)==2) {		
+  		  cat("CAT-SINGLE-BINARY || ")
+  	    .incrementCounter("catSin.case3")
+  		  # binary so logistic regression
+  		  phenoFactor <- factor(pheno)
+  		  # binary - so logistic regression
+  		  thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor)
+  		  binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)	
+  	} else {
+  		  # > 2 categories
+  		  if (is.na(ordered)) {
+  			    cat(" ERROR: 'ordered' not found in data code info file")	
+  		  }  else {
+      		  ## unordered
+      		  if (ordered == 0) {
+      			     cat("CAT-SINGLE-UNORDERED || ")
+      		       .incrementCounter("catSin.case2")
+      			     thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno)
+      			     testCategoricalUnordered(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+      		  } else if (ordered == 1) {
+    			       ## ordered
+    			       cat("ordered || ")
+    		         .incrementCounter("catSin.case1")
+    
+    			       ## reorder variable values into increasing order
+    			       thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], pheno)
+    			       testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx, order)
+    		
+    		    } else if (ordered == -2) {
+    			       cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
+    		         .incrementCounter( "catSin.binaryorexcluded")
+    		    } else {
+    			       print(paste("ERROR", varName, varType, dataCode));
+    		    }
+  		  }
   	}
 }
 
@@ -135,7 +122,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
       			pheno2[idx] <- count
       			count <- count + 1
     		}
-    		cat("reorder ",order," || ",sep="");
+    		cat("reorder ",order," || ",sep="")
     		return(pheno2)
   	}	else {
   		  return(pheno)
@@ -167,7 +154,7 @@ testCategoricalSingle <- function(opt, vl, varName, varType, thisdata, phenoStar
     		# set default value in people who have no value in the pheno but do have a value in the default_value_related_field
     	  defaultIdxs <- which(!is.na(indicatorVar) & is.na(pheno))
     		pheno[defaultIdxs] <- defaultValue
-    	  cat("default value ", defaultValue, " set, N= ", length(defaultIdxs), " || ", sep="");
+    	  cat("default value ", defaultValue, " set, N= ", length(defaultIdxs), " || ", sep="")
 	  }
 	  return(pheno)
 }
diff --git a/PHESANT/R/testContinuous.r b/PHESANT/R/testContinuous.r
index aeef1a6..b60c0c6 100644
--- a/PHESANT/R/testContinuous.r
+++ b/PHESANT/R/testContinuous.r
@@ -29,207 +29,172 @@ testContinuous <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
 # Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
 # This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
 .testContinuous2 <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
-	cat("CONTINUOUS || ");
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(vl, varName)
-
-	if (!is.null(dim(pheno))) {
-		phenoAvg = rowMeans(pheno, na.rm=TRUE)
-	}
-	else {
-		phenoAvg = pheno
-	}
-
-	## recode NaN to NA, which is generated if all cols of pheno are NA for a given person
-	idxNan = which(is.nan(phenoAvg))
-	phenoAvg[idxNan] = NA;
-	numNotNA=length(na.omit(phenoAvg));
-
-	## check whether >20% examples with same value
-	uniqVar = unique(na.omit(phenoAvg))
-	valid = TRUE
-	for (uniq in uniqVar) {
-		numWithValue = length(which(phenoAvg==uniq))
-		if (numWithValue/numNotNA >=0.2) {
-			valid = FALSE;
-			break
-		}
-	}
-
-	if (valid == FALSE) {
-
-		## treat as ordinal categorical
-		cat(">20% IN ONE CATEGORY || ");
-		
-		# if >2 unique values then treat as ordered categorical
-		numUniqueValues = length(uniqVar)
-
-		# straight forward case that there are two (or one) values		
-		if (numUniqueValues<=2) {
-			## treat as binary or skip (binary requires>=10 per category)
-
-			## remove categories if < 10 examples to see if this should be binary or not, but if ordered categorical
-			## then we include all values when generating this
-	    		phenoAvgMoreThan10 = .testNumExamples(phenoAvg)
-
-			## binary if 2 distinct values, else ordered categorical
-        		phenoFactor = factor(phenoAvgMoreThan10)
-        		numLevels = length(unique(na.omit(phenoAvgMoreThan10))) #length(levels(phenoFactor))
+	  cat("CONTINUOUS || ")
+	  pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
+	  isExposure <- getIsExposure(vl, varName)
+
+  	if (!is.null(dim(pheno))) {
+  		  phenoAvg <- rowMeans(pheno, na.rm=TRUE)
+  	} else {
+  		  phenoAvg <- pheno
+  	}
+
+  	## recode NaN to NA, which is generated if all cols of pheno are NA for a given person
+  	idxNan <- which(is.nan(phenoAvg))
+  	phenoAvg[idxNan] <- NA
+  	numNotNA <- length(na.omit(phenoAvg))
+  
+  	## check whether >20% examples with same value
+  	uniqVar <- unique(na.omit(phenoAvg))
+  	valid <- TRUE
+  	for (uniq in uniqVar) {
+  		  numWithValue <- length(which(phenoAvg==uniq))
+  		  if (numWithValue/numNotNA >=0.2) {
+  			    valid <- FALSE
+  			    break
+  		  }
+  	}
+
+	  if (!valid) {
+    		## treat as ordinal categorical
+    		cat(">20% IN ONE CATEGORY || ")
+    		# if >2 unique values then treat as ordered categorical
+    		numUniqueValues <- length(uniqVar)
+    		# straight forward case that there are two (or one) values		
+    		if (numUniqueValues<=2) {
+    			  ## treat as binary or skip (binary requires>=10 per category)
+    			  ## remove categories if < 10 examples to see if this should be binary or not, but if ordered categorical
+    			  ## then we include all values when generating this
+    	    	phenoAvgMoreThan10 <- .testNumExamples(phenoAvg)
+    
+    			  ## binary if 2 distinct values, else ordered categorical
+        		phenoFactor <- factor(phenoAvgMoreThan10)
+        		numLevels <- length(unique(na.omit(phenoAvgMoreThan10))) #length(levels(phenoFactor))
 
         		if (numLevels<=1) {
-       				cat("SKIP (number of levels: ",numLevels,")",sep="")
-        		  .incrementCounter("cont.onevalue")
-        		}
-        		else if (numLevels==2) {
-	        		# binary
-        		  .incrementCounter("cont.binary")
-        			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);			
-        			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx);
+       			    cat("SKIP (number of levels: ",numLevels,")",sep="")
+        		    .incrementCounter("cont.onevalue")
+        		} else if (numLevels==2) {
+	        		  # binary
+        		    .incrementCounter("cont.binary")
+        			  thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor)			
+        			  binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)
         		}
-        	}
-		else {
-			## try to treat as ordered categorical
-
-		  .incrementCounter("cont.ordcattry")
-			## equal sized bins
-			phenoBinned = .equalSizedBins(phenoAvg)
-
-			# check number of people in each bin
-			bin0Num = length(which(phenoBinned==0))
-			bin1Num = length(which(phenoBinned==1))
-			bin2Num = length(which(phenoBinned==2))
-
-			if (bin0Num>=10 & bin1Num>=10 & bin2Num>=10) {
-
-				# successful binning. >=10 examples in each of the 3 bins
-
-			  .incrementCounter("cont.ordcattry.ordcat")
-			        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned);
-				testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
-			}
-			else {
-				# try to treat as binary because not enough examples in each bin
-								
-				if (bin0Num<10 & bin2Num<10) {
-					## skip - not possible to create binary variable because first and third bins are too small
-					## ie. could merge bin1 with bin 2 but then bin3 still too small etc
-					cat("SKIP 2 bins are too small || ")
-				  .incrementCounter("cont.ordcattry.smallbins")
-				} 
-				else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
-
-					# combine first and second bin to create binary variable
-				  .incrementCounter("cont.ordcattry.binsbinary")
-					cat("Combine first two bins and treat as binary || ")
-					phenoBinned[which(phenoBinned==0)] = 1	
-
-					# test binary
-					thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
-	                                binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx);
-				}
-				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
-
-					# combine second and last bin to create binary variable
-				  .incrementCounter("cont.ordcattry.binsbinary")
-					cat("Combine last two bins and treat as binary || ")
-                                        phenoBinned[which(phenoBinned==2)] = 1
-					
-                                        # test binary
-                                        thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
-                                        binaryLogisticRegression(opt, varName,varType, thisdatanew, isExposure, phenoStartIdx)
-                                }
-				
-
-				else {
-					## skip - not possible to create binary variable because combining bins would still be too small
-					cat("SKIP 2 bins are too small(2) || ")
-				  .incrementCounter("cont.ordcattry.smallbins2")
-				}
-
-			}
-	
-		}
-	}
-	else {		
-		cat("IRNT || ");
-	  .incrementCounter("cont.main")
-
-		# check there are at least 500 examples
-		numNotNA = length(which(!is.na(phenoAvg)))
-		if (numNotNA<500) {
-			cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="");
-		  .incrementCounter("cont.main.500")
-		}
-		else {
-			## inverse rank normal transformation
-			phenoIRNT = irnt(phenoAvg)
-
-			if (opt$save == TRUE) {
-				# add pheno to dataframe
-				.storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
-				cat("SUCCESS results-linear");
-				.incrementCounter("success.continuous")
-                        }
-                        else {
-		
-			## do regression (use standardised geno values)
-			if (opt$standardise==TRUE) {
-				geno = scale(thisdata[,"geno"])
-			}
-			else {
-                	        geno = thisdata[,"geno"] 
-                	}
-			confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
-
-			sink()
-			sink(pkg.env$modelFitLogFile, append=TRUE)
-			print("--------------")			
-			print(varName)
-
-			###### BEGIN TRYCATCH
-	                tryCatch({
-
-			fit <- lm(phenoIRNT ~ geno + ., data=confounders)
-			
-			sink()
-			sink(pkg.env$resLogFile, append=TRUE)
-			
-			sumx = summary(fit)
-
-			pvalue = sumx$coefficients['geno','Pr(>|t|)']
-			beta = sumx$coefficients["geno","Estimate"]
-
-			if (opt$confidenceintervals == TRUE) {
-				cis = confint(fit, level=0.95)
-				lower = cis["geno", "2.5 %"]
-	                        upper = cis["geno", "97.5 %"]
-                        }
-                        else {
-                                lower = NA
-                                upper = NA
-                        }
-
-			numNotNA = length(which(!is.na(phenoIRNT)))
-
-			## save result to file
-			write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE");
-			cat("SUCCESS results-linear");
-
-			.incrementCounter("success.continuous")
-                	if (isExposure == TRUE) {
-                	  .incrementCounter("success.exposure.continuous")
-                	}
-
-			## END TRYCATCH
-                	}, error = function(e) {
-                	        sink()
-                	        sink(pkg.env$resLogFile, append=TRUE)
-                	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                	        .incrementCounter("continuous.error")
-                	})
-			}			
-		}
-	}
+        } else {
+    			  ## try to treat as ordered categorical
+    		    .incrementCounter("cont.ordcattry")
+    			  ## equal sized bins
+    			  phenoBinned <- .equalSizedBins(phenoAvg)
+    
+    			  # check number of people in each bin
+    			  bin0Num <- length(which(phenoBinned==0))
+    			  bin1Num <- length(which(phenoBinned==1))
+    			  bin2Num <- length(which(phenoBinned==2))
+    
+      			if (bin0Num>=10 & bin1Num>=10 & bin2Num>=10) {
+      				  # successful binning. >=10 examples in each of the 3 bins
+      			    .incrementCounter("cont.ordcattry.ordcat")
+      			    thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
+      			    testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+      			} else {
+      				  # try to treat as binary because not enough examples in each bin
+      				  if (bin0Num<10 & bin2Num<10) {
+      					    ## skip - not possible to create binary variable because first and third bins are too small
+      					    ## ie. could merge bin1 with bin 2 but then bin3 still too small etc
+      					    cat("SKIP 2 bins are too small || ")
+      				      .incrementCounter("cont.ordcattry.smallbins")
+      				  } else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
+      					    # combine first and second bin to create binary variable
+      				      .incrementCounter("cont.ordcattry.binsbinary")
+      					    cat("Combine first two bins and treat as binary || ")
+      					    phenoBinned[which(phenoBinned==0)] <-1	
+      
+      					    # test binary
+      					    thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
+      	            binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure, phenoStartIdx)
+      				  } else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
+      					    # combine second and last bin to create binary variable
+      				      .incrementCounter("cont.ordcattry.binsbinary")
+      					    cat("Combine last two bins and treat as binary || ")
+                    phenoBinned[which(phenoBinned==2)] <- 1
+      					
+                    # test binary
+                    thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoBinned)
+                    binaryLogisticRegression(opt, varName,varType, thisdatanew, isExposure, phenoStartIdx)
+                } else {
+      					    ## skip - not possible to create binary variable because combining bins would still be too small
+      					    cat("SKIP 2 bins are too small(2) || ")
+      				      .incrementCounter("cont.ordcattry.smallbins2")
+      				  }
+      		  }
+    	  }
+	  } else {		
+    		cat("IRNT || ")
+    	  .incrementCounter("cont.main")
+    		# check there are at least 500 examples
+    		numNotNA <- length(which(!is.na(phenoAvg)))
+    		if (numNotNA<500) {
+    			  cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="")
+    		    .incrementCounter("cont.main.500")
+    		} else {
+    			  ## inverse rank normal transformation
+    			  phenoIRNT <- irnt(phenoAvg)
+    			  if (opt$save == TRUE) {
+    				    # add pheno to dataframe
+    				    .storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
+    				    cat("SUCCESS results-linear")
+    				    .incrementCounter("success.continuous")
+            } else {
+    			      ## do regression (use standardised geno values)
+    			      if (opt$standardise==TRUE) {
+    				        geno <- scale(thisdata[,"geno"])
+    			      } else {
+                    geno <- thisdata[,"geno"] 
+                }
+    			      confounders <- thisdata[,3:(phenoStartIdx -1), drop = FALSE]
+    
+    			      sink()
+    			      sink(pkg.env$modelFitLogFile, append=TRUE)
+    			      print("--------------")			
+    			      print(varName)
+    
+    			      ###### BEGIN TRYCATCH
+    	          tryCatch({
+      			        fit <- lm(phenoIRNT ~ geno + ., data=confounders)
+      			        sink()
+      			        sink(pkg.env$resLogFile, append=TRUE)
+      			        sumx <- summary(fit)
+      			        pvalue <- sumx$coefficients['geno','Pr(>|t|)']
+      			        beta <- sumx$coefficients["geno","Estimate"]
+      
+      			        if (opt$confidenceintervals == TRUE) {
+      				          cis <- confint(fit, level=0.95)
+      				          lower <- cis["geno", "2.5 %"]
+      	                upper <- cis["geno", "97.5 %"]
+                    } else {
+                        lower <- NA
+                        upper <- NA
+                    }
+      
+      			        numNotNA <- length(which(!is.na(phenoIRNT)))
+      			        ## save result to file
+      			        write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE")
+      			        cat("SUCCESS results-linear")
+      			        .incrementCounter("success.continuous")
+                    if (isExposure == TRUE) {
+                        .incrementCounter("success.exposure.continuous")
+                    }
+    
+    			      ## END TRYCATCH
+                }, error = function(e) {
+                    sink()
+                    sink(pkg.env$resLogFile, append=TRUE)
+                    cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                    .incrementCounter("continuous.error")
+                })
+    			  }			
+    		}
+	  }
 }
 
 irnt <- function(pheno, seed = 1234) {
diff --git a/PHESANT/R/testInteger.r b/PHESANT/R/testInteger.r
index 98735c9..de76887 100644
--- a/PHESANT/R/testInteger.r
+++ b/PHESANT/R/testInteger.r
@@ -16,73 +16,58 @@
 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-
 # Processing integer fields, namely:
 # 1) Reassigning values as specified in the data code information file
 # 2) Generate a single value if there are several values (arrays) by taking the mean
 # 3) Treating this field as continuous if at least 20 distinct values.
 # Otherwise treat as binary or ordered categorical if 2 or more than two values. 
 testInteger <- function(opt, vl, varName, varType, thisdata, phenoStartIdx) {
-	cat("INTEGER || ");
-
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	isExposure = getIsExposure(vl, varName)
-
-	if (!is.numeric(as.matrix(pheno))) {
-		cat("SKIP Integer type but not numeric",sep="");
-		return(NULL)
-	}
-
-	pheno = .reassignValue(vl, pheno, varName)
-
-	## average if multiple columns
-	if (!is.null(dim(pheno))) {
-                phenoAvg = rowMeans(pheno, na.rm=TRUE)
-		
-		# if participant only has NA values then NaN is generated so we convert back to NA
-		phenoAvg = .replaceNaN(phenoAvg)
-        }
-	else {
-                phenoAvg = pheno
+  	cat("INTEGER || ")
+  	pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
+  	isExposure <- getIsExposure(vl, varName)
+  	if (is.numeric(as.matrix(pheno))) {
+  	    pheno <- .reassignValue(vl, pheno, varName)
+  	    
+      	## average if multiple columns
+      	if (!is.null(dim(pheno))) {
+            phenoAvg <- rowMeans(pheno, na.rm=TRUE)
+      		  # if participant only has NA values then NaN is generated so we convert back to NA
+      		  phenoAvg <- .replaceNaN(phenoAvg)
+        } else {
+              phenoAvg <- pheno
         }
-
-	uniqVar = unique(na.omit(phenoAvg))
-
-	# if >=20 separate values then treat as continuous
-	if (length(uniqVar)>=20) {
-		
-		thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg);
-		.testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
-		.incrementCounter( "int.continuous")
-	}
-	else {
-		
-		## remove categories if < 10 examples
-	  phenoAvg = .testNumExamples(phenoAvg)
-	    
-		## binary if 2 distinct values, else ordered categorical
-		phenoFactor = factor(phenoAvg)
-		numLevels = length(levels(phenoFactor))
-		if (numLevels<=1) {
-			cat("SKIP (number of levels: ",numLevels,")",sep="");
-		  .incrementCounter("int.onevalue")
-		}
-		else if (numLevels==2) {
-		  .incrementCounter("int.binary")
-
-			# binary
-			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-			binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure,phenoStartIdx);
-		}
-		else {
-		  .incrementCounter("int.catord")
-			cat("3-20 values || ")
-
-			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
-			thisdatanew = cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
-
-			# treat as ordinal categorical
-			testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx);
-		}
-	}
+  
+      	uniqVar <- unique(na.omit(phenoAvg))
+      	# if >=20 separate values then treat as continuous
+      	if (length(uniqVar)>=20) {
+      		  thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoAvg)
+      		  .testContinuous2(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+      		  .incrementCounter( "int.continuous")
+      	} else {
+      		  ## remove categories if < 10 examples
+      	    phenoAvg <- .testNumExamples(phenoAvg)
+      		  ## binary if 2 distinct values, else ordered categorical
+      		  phenoFactor <- factor(phenoAvg)
+      		  numLevels <- length(levels(phenoFactor))
+      		  if (numLevels<=1) {
+      			    cat("SKIP (number of levels: ",numLevels,")",sep="")
+      		      .incrementCounter("int.onevalue")
+      		  } else if (numLevels==2) {
+      		      .incrementCounter("int.binary")
+      			    # binary
+      			    thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor);
+      			    binaryLogisticRegression(opt, varName, varType, thisdatanew, isExposure,phenoStartIdx)
+      		  } else {
+      		      .incrementCounter("int.catord")
+      			    cat("3-20 values || ")
+      			    # we don't use equal sized bins just the original integers (that have >=10 examples) as categories
+      			    thisdatanew <- cbind.data.frame(thisdata[,1:(phenoStartIdx -1)], phenoFactor)
+      
+      			    # treat as ordinal categorical
+      			    testCategoricalOrdered(opt, vl, varName, varType, thisdatanew, phenoStartIdx)
+      		  }
+      	}
+  	} else {
+  	    cat("SKIP Integer type but not numeric",sep="")
+  	}
 }
diff --git a/PHESANT/R/utils.r b/PHESANT/R/utils.r
index 563d949..b64c99d 100644
--- a/PHESANT/R/utils.r
+++ b/PHESANT/R/utils.r
@@ -24,9 +24,17 @@
 			return(thisdata)
 }
 
+parseOptions <- function() {
+    opt_parser <- OptionParser(option_list=option_list)
+    opt <- parse_args(opt_parser)
+    opt <- .processArgs(opt, opt_parser)
+    return(opt)
+}
+
+    
 # Parse the arguments input by the user
 # if argument 'test' is used then run test phenome scan
-processArgs <- function(opt, opt_parser) {
+.processArgs <- function(opt, opt_parser) {
     if (opt$test==TRUE) {
       	# set up the test phenome scan settings
       	datadir='../testWAS/data/';
diff --git a/PHESANT/man/getIsExposure.Rd b/PHESANT/man/getIsExposure.Rd
new file mode 100644
index 0000000..b21ca2a
--- /dev/null
+++ b/PHESANT/man/getIsExposure.Rd
@@ -0,0 +1,21 @@
+\name{getIsExposure}
+\alias{getIsExposure}
+\title{
+Check field exposure 
+}
+\description{
+Check whether a field denotes the trait of interest, as specified.
+}
+\usage{
+getIsExposure(vl, varName)
+}
+\arguments{
+  \item{vl}{variable list object
+}
+  \item{varName}{ the field ID to be checked
+}
+}
+
+\value{
+Return true if the field has a "YES" value in TRAIT_OF_INTEST column.
+}
diff --git a/PHESANT/man/option_list.Rd b/PHESANT/man/option_list.Rd
index 21a696c..83712e6 100644
--- a/PHESANT/man/option_list.Rd
+++ b/PHESANT/man/option_list.Rd
@@ -4,6 +4,8 @@
 \title{
 User Input Option List
 }
-
+\description{
+This data object defines the options that the program accepts.
+}
 
 
diff --git a/PHESANT/man/parseOptions.Rd b/PHESANT/man/parseOptions.Rd
new file mode 100644
index 0000000..9b59ae2
--- /dev/null
+++ b/PHESANT/man/parseOptions.Rd
@@ -0,0 +1,15 @@
+\name{parseOptions}
+\alias{parseOptions}
+\title{
+Validate and parse user options
+}
+\description{
+Validate and parse user options
+}
+\usage{
+parseOptions()
+}
+
+\examples{
+# opt <- parseOptions()
+}
diff --git a/PHESANT/man/run.Rd b/PHESANT/man/run.Rd
new file mode 100644
index 0000000..ebd1497
--- /dev/null
+++ b/PHESANT/man/run.Rd
@@ -0,0 +1,23 @@
+\name{run}
+\alias{run}
+\title{
+Run an PHESANT analysis with user specificed options
+}
+\description{
+Run an PHESANT analysis with user specificed options
+}
+\usage{
+run(opt)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+}
+
+\value{
+No return value. All output will be writen to the user specificed oiutput folder.
+}
+
+\examples{
+# run(opt)
+}
+
diff --git a/PHESANT/man/testAssociations.Rd b/PHESANT/man/testAssociations.Rd
new file mode 100644
index 0000000..b16c7fa
--- /dev/null
+++ b/PHESANT/man/testAssociations.Rd
@@ -0,0 +1,27 @@
+\name{testAssociations}
+\alias{testAssociations}
+
+\title{
+Association Test Dispacher 
+}
+\description{
+The main function to identify the right kind of association test to perfrom based on phenotype variable information.
+}
+\usage{
+testAssociations(opt, vl, currentVar, currentVarShort, thisdata, phenoStartIdx)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list.}
+  \item{currentVar}{The phenotype/variable to be tested, in the form of FIELDID_INSTANCE. For example 21022_0.}
+  \item{currentVarShort}{The phenotype/variable to be tested, in the form of FIELDID. For example 21022.}
+  \item{thisdata}{The data frame object holds all variables including phenotypes, confounders, depeinding variables and trait of interest.}
+  \item{phenoStartIdx}{The column index of the first phenotype.}
+}
+\details{
+For a given variable identified by its FIELDID, it reads from variable list to decide if to run association test and chooses the approriate assocition test based on variable value type. Currently, recognized variable types are "Integer", "Continuous", "Categorical single", and "Categorical multiple" and all other types are ignored.
+}
+\value{
+Return nothing and all output and lof information are written to output directory specified by user.
+}
diff --git a/PHESANT/man/testCategoricalMultiple.Rd b/PHESANT/man/testCategoricalMultiple.Rd
new file mode 100644
index 0000000..acb10f0
--- /dev/null
+++ b/PHESANT/man/testCategoricalMultiple.Rd
@@ -0,0 +1,34 @@
+\name{testCategoricalMultiple}
+\alias{testCategoricalMultiple}
+\title{
+Association Test dispacther for "Categorical multiple" value type
+}
+\description{
+The function takes the declared "Categorical multiple" value type of a field and validates/decides on the actual type of association test and perform the test accordingly.
+}
+\usage{
+testCategoricalMultiple(opt, vl, varName, varType, thisdata, phenoStartIdx)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list.}
+  \item{varName}{The phenotype/variable to be tested, in the form of FIELDID. For example 21022.}
+  \item{varType}{The value type of the variable. Should always be "Categorical multiple" here and used for logging purpose only.}
+  \item{thisdata}{The data frame object holds all variables including phenotypes, confounders, depeinding variables and trait of interest.}
+  \item{phenoStartIdx}{The column index of the first phenotype.}
+}
+\details{
+Performs variable processing for categorical (multiple) fields, namely
+  \enumerate{
+      \item Reassign values as specified in data coding file
+      \item Generate binary variable for each category in field, restricting to correct set of participants as specified
+      \item Replace missing codes - we assume values < 0 are missing for categorical (single) variables
+      \item Check derived variable has at least 10 cases in each group
+      \item Call binaryLogisticRegression function for this derived binary variable
+  }
+}
+
+\value{
+Return nothing and all output and lof information are written to output directory specified by user.
+}
+
diff --git a/PHESANT/man/testCategoricalSingle.Rd b/PHESANT/man/testCategoricalSingle.Rd
new file mode 100644
index 0000000..1148619
--- /dev/null
+++ b/PHESANT/man/testCategoricalSingle.Rd
@@ -0,0 +1,32 @@
+\name{testCategoricalSingle}
+\alias{testCategoricalSingle}
+\title{
+Association Test dispacther for "Categorical single" value type
+}
+\description{
+The function takes the declared "Categorical single" value type of a field and validates/decides on the actual type of association test and perform the test accordingly. 
+}
+\usage{
+testCategoricalSingle(opt, vl, varName, varType, thisdata, phenoStartIdx)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list.}
+  \item{varName}{The phenotype/variable to be tested, in the form of FIELDID. For example 21022.}
+  \item{varType}{The value type of the variable. Should always be "Categorical single" here and used for logging purpose only.}
+  \item{thisdata}{The data frame object holds all variables including phenotypes, confounders, depeinding variables and trait of interest.}
+  \item{phenoStartIdx}{The column index of the first phenotype.}
+}
+\details{
+Performs variable processing for categorical (single) fields, namely
+  \enumerate{
+      \item Reassign values as specified in data coding information file
+      \item Reorder categories for ordered fields
+      \item Replace missing codes - we assume values < 0 are missing for categorical (single) variables
+      \item Remove values with <10 cases
+      \item Deterimine correct test to perform, either binary, ordered or unordered
+  }
+}
+\value{
+Return nothing and all output and lof information are written to output directory specified by user.
+}
\ No newline at end of file
diff --git a/PHESANT/man/testContinuous.Rd b/PHESANT/man/testContinuous.Rd
new file mode 100644
index 0000000..679d7ba
--- /dev/null
+++ b/PHESANT/man/testContinuous.Rd
@@ -0,0 +1,34 @@
+\name{testContinuous}
+\alias{testContinuous}
+\title{
+Association Test dispacther for Continuous value type
+}
+\description{
+The function takes the declared "Continuous" value type of a field and validates/decides on the actual type of association test and perform the test accordingly. 
+}
+\usage{
+testContinuous(opt, vl, varName, varType, thisdata, phenoStartIdx)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list.}
+  \item{varName}{The phenotype/variable to be tested, in the form of FIELDID. For example 21022.}
+  \item{varType}{The value type of the variable. Should always be "Continuous" here and used for logging purpose only.}
+  \item{thisdata}{The data frame object holds all variables including phenotypes, confounders, depeinding variables and trait of interest.}
+  \item{phenoStartIdx}{The column index of the first phenotype.}
+}
+\details{
+  Processing integer fields, namely:
+  \enumerate{
+      \item Reassigning values as specified in the data code information file
+      \item Generate a single value if there are several values (arrays) by taking the mean
+      \item Treating this field as ordinal categorical if >20\% examples with same value
+      \item Otherwise Treat as continuous (with Rank-based Inverse Normal Transformation) if 500 or more samples have values
+      \item In the case of 3), treat it as binary if only two types of values, or ordinal categorical if its values can be binned approximately into three equl bins.
+      \item In the case of 5), if more than 3 type of values and can not be binned reasonably into 3 euqal bins, merge them into two bins ans treat it as binary, or give up.
+  }
+}
+\value{
+Return nothing and all output and lof information are written to output directory specified by user.
+}
+
diff --git a/PHESANT/man/testInteger.Rd b/PHESANT/man/testInteger.Rd
new file mode 100644
index 0000000..c2c85ee
--- /dev/null
+++ b/PHESANT/man/testInteger.Rd
@@ -0,0 +1,32 @@
+\name{testInteger}
+\alias{testInteger}
+\title{
+Association Test dispacther for Integer value type
+}
+\description{
+The function takes the declared "Integer" value type of a field and validates/decides on the actual type of association test and perform the test accordingly. 
+}
+\usage{
+testInteger(opt, vl, varName, varType, thisdata, phenoStartIdx)
+}
+\arguments{
+  \item{opt}{The list of input options provided by user.}
+  \item{vl}{The list that holds input phenotype variable list and input data coding list.}
+  \item{varName}{The phenotype/variable to be tested, in the form of FIELDID. For example 21022.}
+  \item{varType}{The value type of the variable. Should always be "INTEGER" here and used for logging purpose only.}
+  \item{thisdata}{The data frame object holds all variables including phenotypes, confounders, depeinding variables and trait of interest.}
+  \item{phenoStartIdx}{The column index of the first phenotype.}
+}
+\details{
+Processing integer fields, namely:
+  \enumerate{
+      \item Reassigning values as specified in the data code information file
+      \item Generate a single value if there are several values (arrays) by taking the mean
+      \item Treating this field as continuous if at least 20 distinct values
+      \item Otherwise treat as binary or ordered categorical if 2 or more than two values
+  }
+}
+\value{
+Return nothing and all output and lof information are written to output directory specified by user.
+}
+
diff --git a/PHESANT/phenomeScan.r b/PHESANT/phenomeScan.r
index 0803a90..be5f7c6 100644
--- a/PHESANT/phenomeScan.r
+++ b/PHESANT/phenomeScan.r
@@ -27,9 +27,7 @@ args <- commandArgs(T)
 if (length(args) == 0) {
     load(file = "opt_only.RData")
 } else {
-    opt_parser <- OptionParser(option_list=option_list)
-    opt <- parse_args(opt_parser)
-    opt <- processArgs(opt, opt_parser)
+    opt <- parseOptions()
 }
 
 run(opt)

From 66b719af092d1e4d18e5fa6a6ca92d89645e86c0 Mon Sep 17 00:00:00 2001
From: Quanli Wang <quanliwang20@hotmail.com>
Date: Sat, 16 Mar 2019 13:35:30 -0400
Subject: [PATCH 27/27] Restore original code structure

---
 PHESANT/R/testCategoricalOrdered.r            | 156 +++++------
 WAS/addToCounts.r                             |  33 +++
 WAS/binaryLogisticRegression.r                | 121 +++++++++
 WAS/equalSizedBins.r                          | 118 +++++++++
 WAS/fixOddFieldsToCatMul.r                    |  54 ++++
 WAS/getIsCatMultExposure.r                    |  61 +++++
 WAS/getIsExposure.r                           |  31 +++
 WAS/getNumValuesCatMultExposure.r             |  53 ++++
 WAS/incrementCounter.r                        |  34 +++
 WAS/initFunctions.r                           |  99 +++++++
 WAS/loadConfounders.r                         | 136 ++++++++++
 WAS/loadData.r                                |  85 ++++++
 WAS/loadIndicatorFields.r                     | 162 ++++++++++++
 WAS/loadPhenotypes.r                          | 127 +++++++++
 WAS/loadTraitOfInterest.r                     |  54 ++++
 WAS/makeTestDataFrame.r                       |  30 +++
 WAS/opt.RData                                 | Bin 0 -> 4853 bytes
 WAS/phenomeScan.r                             | 161 +++++++++++
 WAS/processArgs.r                             | 132 +++++++++
 WAS/reassignValue.r                           |  73 +++++
 WAS/replaceMissingCodes.r                     |  37 +++
 WAS/replaceNaN.r                              |  47 ++++
 WAS/saveCounts.r                              |  31 +++
 WAS/storeNewVar.r                             |  21 ++
 WAS/testAssociations.r                        | 141 ++++++++++
 WAS/testCatMultiple.r                         | 166 ++++++++++++
 WAS/testCatSingle.r                           | 193 ++++++++++++++
 WAS/testCategoricalOrdered.r                  | 163 ++++++++++++
 WAS/testCategoricalUnordered.r                | 177 +++++++++++++
 WAS/testContinuous.r                          | 250 ++++++++++++++++++
 WAS/testInteger.r                             |  88 ++++++
 WAS/testNumExamples.r                         |  37 +++
 {unittests => WAS/unittests}/run-tests.sh     |   0
 .../unittests}/test_equalSizedBins.r          |   0
 .../unittests}/test_reassignValue.r           |   0
 .../unittests}/test_testCatMultiple.r         |   0
 .../unittests}/test_testCatSingle.r           |   0
 WAS/validatePhenotypeInput.r                  |  85 ++++++
 WAS/validateTraitInput.r                      |  69 +++++
 39 files changed, 3139 insertions(+), 86 deletions(-)
 create mode 100644 WAS/addToCounts.r
 create mode 100644 WAS/binaryLogisticRegression.r
 create mode 100644 WAS/equalSizedBins.r
 create mode 100644 WAS/fixOddFieldsToCatMul.r
 create mode 100644 WAS/getIsCatMultExposure.r
 create mode 100644 WAS/getIsExposure.r
 create mode 100644 WAS/getNumValuesCatMultExposure.r
 create mode 100644 WAS/incrementCounter.r
 create mode 100644 WAS/initFunctions.r
 create mode 100644 WAS/loadConfounders.r
 create mode 100644 WAS/loadData.r
 create mode 100644 WAS/loadIndicatorFields.r
 create mode 100644 WAS/loadPhenotypes.r
 create mode 100644 WAS/loadTraitOfInterest.r
 create mode 100644 WAS/makeTestDataFrame.r
 create mode 100644 WAS/opt.RData
 create mode 100644 WAS/phenomeScan.r
 create mode 100644 WAS/processArgs.r
 create mode 100644 WAS/reassignValue.r
 create mode 100644 WAS/replaceMissingCodes.r
 create mode 100644 WAS/replaceNaN.r
 create mode 100644 WAS/saveCounts.r
 create mode 100644 WAS/storeNewVar.r
 create mode 100644 WAS/testAssociations.r
 create mode 100644 WAS/testCatMultiple.r
 create mode 100644 WAS/testCatSingle.r
 create mode 100644 WAS/testCategoricalOrdered.r
 create mode 100644 WAS/testCategoricalUnordered.r
 create mode 100644 WAS/testContinuous.r
 create mode 100644 WAS/testInteger.r
 create mode 100644 WAS/testNumExamples.r
 rename {unittests => WAS/unittests}/run-tests.sh (100%)
 rename {unittests => WAS/unittests}/test_equalSizedBins.r (100%)
 rename {unittests => WAS/unittests}/test_reassignValue.r (100%)
 rename {unittests => WAS/unittests}/test_testCatMultiple.r (100%)
 rename {unittests => WAS/unittests}/test_testCatSingle.r (100%)
 create mode 100644 WAS/validatePhenotypeInput.r
 create mode 100644 WAS/validateTraitInput.r

diff --git a/PHESANT/R/testCategoricalOrdered.r b/PHESANT/R/testCategoricalOrdered.r
index 7089ff7..5cffe59 100644
--- a/PHESANT/R/testCategoricalOrdered.r
+++ b/PHESANT/R/testCategoricalOrdered.r
@@ -19,96 +19,80 @@
 
 # Performs ordered logistic regression test and saves results in ordered logistic results file
 testCategoricalOrdered <- function(opt, vl, varName, varType, thisdata, phenoStartIdx, orderStr="") {
-
-	
-	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
-	geno = thisdata[,"geno"]
-
-	cat("CAT-ORD || ");
-	.incrementCounter("ordCat")
-	.doCatOrdAssertions(pheno)
-
-	uniqVar = unique(na.omit(pheno));
-
-	# log the ordering of categories used
-	orderStr = .setOrderString(orderStr, uniqVar);
-	cat("order: ", orderStr, " || ",  sep="");
-
-	# check sample size
-	numNotNA = length(which(!is.na(pheno)))
-	if (numNotNA<500) {
-		cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="");
-	  .incrementCounter("ordCat.500")
-	}
-	else {
-		# test this cat ordered variable with ordered logistic regression	
-
-	        phenoFactor = factor(pheno)
-
-		cat("num categories: ", length(unique(na.omit(phenoFactor))), " || ", sep="");
-
-		if (opt$save == TRUE) {
-			# add pheno to dataframe
-			.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
-			cat("SUCCESS results-ordered-logistic");
-			.incrementCounter("success.ordCat")
-                }
-                else {
-
-		# ordinal logistic regression
-		sink()
-		sink(pkg.env$modelFitLogFile, append=TRUE)
-		print("--------------")
-		print(varName)
-		
-		### BEGIN TRYCATCH
-		tryCatch({
-		confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
-
-
-		if (opt$standardise==TRUE) {
-			geno = scale(geno)
-                }
-
-		fit <- polr(phenoFactor ~ geno + ., data=confounders, Hess=TRUE)
-
-		ctable <- coef(summary(fit))
-		sink()
-		sink(pkg.env$resLogFile, append=TRUE)
-
-		ct = coeftest(fit)
-		pvalue = ct["geno","Pr(>|t|)"]
-		beta = ctable["geno", "Value"];
-
-		if (opt$confidenceintervals == TRUE) {
-			se = ctable["geno", "Std. Error"]
-			lower = beta - 1.96*se;
-	                upper = beta + 1.96*se;
+	  pheno <- thisdata[,phenoStartIdx:ncol(thisdata)]
+	  geno <- thisdata[,"geno"]
+
+	  cat("CAT-ORD || ")
+	  .incrementCounter("ordCat")
+	  .doCatOrdAssertions(pheno)
+	  uniqVar <- unique(na.omit(pheno))
+
+	  # log the ordering of categories used
+	  orderStr <-.setOrderString(orderStr, uniqVar)
+	  cat("order: ", orderStr, " || ",  sep="")
+
+	  # check sample size
+	  numNotNA <- length(which(!is.na(pheno)))
+	  if (numNotNA<500) {
+		    cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="")
+	      .incrementCounter("ordCat.500")
+	  } else {
+		    # test this cat ordered variable with ordered logistic regression	
+	      phenoFactor <- factor(pheno)
+    		cat("num categories: ", length(unique(na.omit(phenoFactor))), " || ", sep="");
+    		if (opt$save == TRUE) {
+    			  # add pheno to dataframe
+    			  .storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
+    			  cat("SUCCESS results-ordered-logistic")
+    			  .incrementCounter("success.ordCat")
+        } else {
+        		# ordinal logistic regression
+        		sink()
+        		sink(pkg.env$modelFitLogFile, append=TRUE)
+        		print("--------------")
+        		print(varName)
+        		
+        		### BEGIN TRYCATCH
+        		tryCatch({
+        		    confounders=thisdata[,3:(phenoStartIdx -1), drop = FALSE]
+        		    if (opt$standardise==TRUE) {
+        			      geno = scale(geno)
                 }
-                else {
-                        lower = NA
-                        upper = NA
+        		    fit <- polr(phenoFactor ~ geno + ., data=confounders, Hess=TRUE)
+        		    ctable <- coef(summary(fit))
+        		    sink()
+        		    sink(pkg.env$resLogFile, append=TRUE)
+        
+        		    ct <- coeftest(fit)
+        		    pvalue <- ct["geno","Pr(>|t|)"]
+        		    beta <- ctable["geno", "Value"]
+        
+        		    if (opt$confidenceintervals == TRUE) {
+        			      se <- ctable["geno", "Std. Error"]
+        			      lower <- beta - 1.96*se
+        	          upper <- beta + 1.96*se
+                } else {
+                    lower <- NA
+                    upper <- NA
                 }
-
-		write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
-		cat("SUCCESS results-ordered-logistic");
-		.incrementCounter("success.ordCat")
-
-		isExposure = getIsExposure(vl, varName)
+        
+        		    write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE")
+        		    cat("SUCCESS results-ordered-logistic")
+        		    .incrementCounter("success.ordCat")
+        
+        		    isExposure <- getIsExposure(vl, varName)
                 if (isExposure == TRUE) {
-                  .incrementCounter("success.exposure.ordCat")
+                    .incrementCounter("success.exposure.ordCat")
                 }
-
-		### END TRYCATCH
-		}, error = function(e) {
-			sink()
-                        sink(pkg.env$resLogFile, append=TRUE)
-                        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
-                        .incrementCounter("ordCat.error")
-                })
-		}
-
-	}
+        		### END TRYCATCH
+        		}, error = function(e) {
+        		    sink()
+                sink(pkg.env$resLogFile, append=TRUE)
+                cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                .incrementCounter("ordCat.error")
+            })
+        }
+	  }
 }
 
 # check that the phenotype is valid - that there are more than two categories
diff --git a/WAS/addToCounts.r b/WAS/addToCounts.r
new file mode 100644
index 0000000..e41420c
--- /dev/null
+++ b/WAS/addToCounts.r
@@ -0,0 +1,33 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# adds given value to counter, that are used to count how many variables go down each route in the data flow
+addToCounts <- function(countName, num) {
+		
+	idx = which(counters$name==countName)
+
+	if (length(idx)==0) {
+		# counter does not exist so add with countValue 1
+		counters <<- rbind(counters, data.frame(name=countName, countValue=num))
+	}
+	else {
+		# add to counter that already exists
+		counters$countValue[idx] <<- counters$countValue[idx]+num
+	}
+
+}
diff --git a/WAS/binaryLogisticRegression.r b/WAS/binaryLogisticRegression.r
new file mode 100644
index 0000000..6fbcefd
--- /dev/null
+++ b/WAS/binaryLogisticRegression.r
@@ -0,0 +1,121 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Perform binary logistic regression
+#
+# Performs binary logistic regression on the phenotype stored in thisdata 
+# and stores result in 'results-logistic-binary' results file.
+binaryLogisticRegression <- function(varName, varType, thisdata, isExposure) {
+
+        phenoFactor = factor(thisdata[,phenoStartIdx])
+
+        facLevels = levels(phenoFactor)
+
+	# assert variable has exactly two distinct values
+	if (length(facLevels)!=2) {
+		#stop(paste("Not 2 levels: ", length(facLevels), " || ", sep=""))
+		cat("BINARY-NOT2LEVELS- (", length(facLevels), ") || ",sep="");
+                incrementCounter("binary.nottwolevels")
+	}
+
+	idxTrue = length(which(phenoFactor==facLevels[1]))
+	idxFalse = length(which(phenoFactor==facLevels[2]))
+	numNotNA = length(which(!is.na(phenoFactor)))
+  
+        if (idxTrue<10 || idxFalse<10) {
+		cat("BINARY-LOGISTIC-SKIP-10 (", idxTrue, "/", idxFalse, ") || ", sep="")
+		incrementCounter("binary.10")
+	}
+	else if (numNotNA<500) {	
+		cat("BINARY-LOGISTIC-SKIP-500 (", numNotNA, ") || ",sep="");
+		incrementCounter("binary.500")
+       	}
+	else {
+
+              	cat("sample ", idxTrue, "/", idxFalse, "(", numNotNA, ") || ", sep="");
+
+		if (opt$save == TRUE) {
+			# add pheno to dataframe
+			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
+			cat("SUCCESS results-logistic-binary ");			
+			incrementCounter("success.binary")
+		}
+		else {
+
+		# use standardised geno values
+		if (opt$standardise==TRUE) {
+                	geno = scale(thisdata[,"geno"])
+		}
+		else {
+			geno = thisdata[,"geno"]
+		}
+                confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+		
+		sink()
+		sink(modelFitLogFile, append=TRUE)
+		print("--------------")
+		print(varName)
+
+		###### BEGIN TRYCATCH
+                tryCatch({
+
+		mylogit <- glm(phenoFactor ~ geno + ., data=confounders, family="binomial")
+
+		sink()
+             	sink(resLogFile, append=TRUE)
+		
+                sumx = summary(mylogit)
+
+                pvalue = sumx$coefficients['geno','Pr(>|z|)']
+                beta = sumx$coefficients["geno","Estimate"]
+
+
+		if (opt$confidenceintervals == TRUE) {
+			cis = confint(mylogit, "geno", level=0.95)
+	                lower = cis["2.5 %"]
+	                upper = cis["97.5 %"]
+		}
+		else {
+			lower = NA
+			upper = NA
+		}
+
+                numNotNA = length(na.omit(phenoFactor))
+
+                ## save result to file
+                write(paste(varName,varType,paste(idxTrue,"/",idxFalse,"(",numNotNA,")",sep=""), beta,lower,upper,pvalue, sep=","), file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+                cat("SUCCESS results-logistic-binary ");
+                
+		incrementCounter("success.binary")
+
+		if (isExposure==TRUE) {
+	            	incrementCounter("success.exposure.binary")
+	        }	
+
+		## END TRYCATCH
+                }, error = function(e) {
+                        sink()
+                        sink(resLogFile, append=TRUE)
+                        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                        incrementCounter("binary.error")
+                })
+		}	
+        }
+}
+
diff --git a/WAS/equalSizedBins.r b/WAS/equalSizedBins.r
new file mode 100644
index 0000000..15570cd
--- /dev/null
+++ b/WAS/equalSizedBins.r
@@ -0,0 +1,118 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# splits the pheno into 3 bins with the cut points between values rather at the exact value for the quantile
+equalSizedBins <- function(phenoAvg) {
+
+	## equal sized bins 
+        q = quantile(phenoAvg, probs=c(1/3,2/3), na.rm=TRUE)
+
+	minX = min(phenoAvg, na.rm=TRUE)
+	maxX = max(phenoAvg, na.rm=TRUE)
+
+	phenoBinned = phenoAvg;
+	if (q[1]==minX) {
+		# edge case - quantile value is lowest value
+
+		# assign min value as cat1
+		idx1 = which(phenoAvg==q[1]);
+		phenoBinned[idx1] = 0;
+
+		# divide remaining values into cat2 and cat3
+		phenoAvgRemaining = phenoAvg[which(phenoAvg!=q[1])];
+		qx = quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
+		minXX = min(phenoAvgRemaining, na.rm=TRUE)
+		maxXX =	max(phenoAvgRemaining, na.rm=TRUE)
+
+		if (qx[1]==minXX) {
+			# edge case again - quantile value is lowest value
+			idx2 = which(phenoAvg==qx[1]);
+			idx3 = which(phenoAvg>qx[1]);
+		}
+		else if (qx[1]==maxXX) {
+			# edge case again - quantile value is max value
+			idx2 = which(phenoAvg<qx[1] & phenoAvg>q[1]);
+			idx3 = which(phenoAvg==qx[1]);
+		}
+		else {
+			idx2 = which(phenoAvg<qx[1] & phenoAvg>q[1]);
+			idx3 = which(phenoAvg>=qx[1]);
+		}
+		phenoBinned[idx2] = 1;
+                phenoBinned[idx3] = 2;
+	}
+	else if (q[2]==maxX) {
+		# edge case - quantile value is highest value
+
+		# assign max value as cat3
+		idx3 = which(phenoAvg==q[2]);
+		phenoBinned[idx3] = 2;
+
+		# divide remaining values into cat1 and cat2
+		phenoAvgRemaining = phenoAvg[which(phenoAvg!=q[2])];
+                qx = quantile(phenoAvgRemaining, probs=c(0.5), na.rm=TRUE)
+		minXX = min(phenoAvgRemaining, na.rm=TRUE)
+                maxXX = max(phenoAvgRemaining, na.rm=TRUE)
+
+		if (qx[1]==minXX) {
+			# edge case again - quantile value is lowest value
+                        idx1 = which(phenoAvg==qx[1]);
+                        idx2 = which(phenoAvg>qx[1] & phenoAvg<q[2]);
+                }
+                else if	(qx[1]==maxXX) {
+			# edge case again - quantile value is max value
+                        idx1 = which(phenoAvg<qx[1]);
+                        idx2 = which(phenoAvg==qx[1]);
+                }
+                else {
+	                idx1 = which(phenoAvg<qx[1]);  
+			idx2 = which(phenoAvg>=qx[1] & phenoAvg<q[2]);
+		}
+
+                phenoBinned[idx1] = 0;
+                phenoBinned[idx2] = 1;
+	}
+       else if (q[1] == q[2]) {
+		# both quantiles correspond to the same value so set 
+		# cat1 as < this value, cat2 as exactly this value and
+		# cat3 as > this value
+              	phenoBinned = phenoAvg;
+        	idx1 = which(phenoAvg<q[1]);
+                idx2 = which(phenoAvg==q[2]);
+                idx3 = which(phenoAvg>q[2]);
+                phenoBinned[idx1] = 0;
+                phenoBinned[idx2] = 1;
+                phenoBinned[idx3] = 2;
+       	}
+        else {
+		# standard case - split the data into three roughly equal parts where
+		# cat1<q1, cat2 between q1 and q2, and cat3>=q2
+         	phenoBinned = phenoAvg;
+                idx1 = which(phenoAvg<q[1]);
+                idx2 = which(phenoAvg>=q[1] & phenoAvg<q[2]);
+               	idx3 = which(phenoAvg>=q[2]);
+                phenoBinned[idx1] = 0;
+                phenoBinned[idx2] = 1;
+                phenoBinned[idx3] = 2;
+	}
+
+	cat("cat N: ", length(idx1),", ",length(idx2),", ",length(idx3), " || ", sep="");
+	
+	return(phenoBinned);
+}
+
diff --git a/WAS/fixOddFieldsToCatMul.r b/WAS/fixOddFieldsToCatMul.r
new file mode 100644
index 0000000..84eff53
--- /dev/null
+++ b/WAS/fixOddFieldsToCatMul.r
@@ -0,0 +1,54 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Changes variable name from instances to arrays
+# 
+# This function changes the format of variable names from instance format to array format (i.e. we treat
+# the instances as arrays), for a small subset of Biobank variables.
+#
+# Some variables are stored in Biobank as categorical (single) fields with the data stored as set of instances,
+# but we want to treat these instead as categorical (multiple) with a set of arrays.
+# These are indicated by the value "YES-INSTANCES" in the CAT_SINGLE_TO_CAT_MULT column of the variable info file.
+# This function changes the format of these variable names from VARID_0_0, VARID_1_0, VARID_2_0 etc (which
+# is instance format), to VARID_0_0, VARID_0_1, VARID_0_2 etc. (array format) so they can be treated as categorical (multiple) fields. 
+fixOddFieldsToCatMul <- function(data) {
+
+	# examples are variables: 40006, 40011, 40012, 40013
+
+	# get all variables that need their instances changing to arrays
+	dataPheno = vl$phenoInfo[which(vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT=="YES-INSTANCES"),];
+
+	for (i in 1:nrow(dataPheno)) {
+		varID = dataPheno[i,]$FieldID;		
+		varidString = paste("x",varID,"_", sep="");			
+			
+		# get all columns in data dataframe for this variable	
+		colIdxs = which(grepl(varidString,names(data)));
+		
+		# change format from xvarid_0_0, xvarid_1_0, xvarid_2_0, to xvarid_0_0, xvarid_0_1, xvarid_0_2
+		count = 0;
+		for (j in colIdxs) {	
+			colnames(data)[j] <- paste(varidString, "0_", count, sep="")
+			count = count + 1;
+		}				
+	}
+
+	return(data)
+
+}
diff --git a/WAS/getIsCatMultExposure.r b/WAS/getIsCatMultExposure.r
new file mode 100644
index 0000000..560263f
--- /dev/null
+++ b/WAS/getIsCatMultExposure.r
@@ -0,0 +1,61 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# looks up categorical multiple field in the variable info file, return
+# whether field has YES in TRAIT_OF_INTEREST column (i.e. all values in 
+# this field denote the exposure), or whether varName has varValue stated 
+# as a trait of interest in the TRAIT_OF_INTEREST column (multiple values are
+# separated by "|" in this field
+getIsCatMultExposure <- function(varName, varValue) {
+	
+	# get row index of field in variable information file
+	idx=which(vl$phenoInfo$FieldID==varName)
+
+	# may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+	# cat mult values denoting exposure variable
+	isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+        
+        if (!is.na(isExposure) & isExposure!="") {
+		
+		isExposure = as.character(isExposure)
+		
+		## first check if value is YES, then all values are exposure traits
+		if (isExposure == "YES") {
+			cat("IS_CM_ALL_EXPOSURE || ")
+			return(TRUE)
+		}
+
+		## try to split by |, to set particular values as exposure
+
+		# split into variable Values
+		exposureValues = unlist(strsplit(isExposure,"\\|"))
+
+		# for each value stated, check whether it is varValue
+		for (thisVal in exposureValues) {
+			if (thisVal == varValue) {
+				cat("IS_CM_EXPOSURE || ")
+				return(TRUE)
+			}
+		}
+	}
+
+	# varValue is not in list of exposure values
+	return(FALSE)
+
+}
diff --git a/WAS/getIsExposure.r b/WAS/getIsExposure.r
new file mode 100644
index 0000000..c8df69e
--- /dev/null
+++ b/WAS/getIsExposure.r
@@ -0,0 +1,31 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# returns boolean var - whether this field denotes the trait of interest, as specified
+# in the variable information file
+# to determine if values of cat mult fields (not the whole field) are exposure values, use getIsCatMultExposure function instead.
+getIsExposure <- function(varName) {
+
+	idx=which(vl$phenoInfo$FieldID==varName)
+        isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+        if (!is.na(isExposure) & isExposure=="YES") {
+		return(TRUE)
+    	}
+	return(FALSE)
+}
diff --git a/WAS/getNumValuesCatMultExposure.r b/WAS/getNumValuesCatMultExposure.r
new file mode 100644
index 0000000..4cae9a9
--- /dev/null
+++ b/WAS/getNumValuesCatMultExposure.r
@@ -0,0 +1,53 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# looks up categorical multiple field in the variable info file, return
+# number of values denoted as trait of interest.
+# returns zero if whole field is denoted trait of interest, not particular values.
+getNumValuesCatMultExposure <- function(varName) {
+	
+	# get row index of field in variable information file
+	idx=which(vl$phenoInfo$FieldID==varName)
+
+	# may be empty of may contain VALUE1|VALUE2 etc .. to denote those
+	# cat mult values denoting exposure variable
+	isExposure = vl$phenoInfo$TRAIT_OF_INTEREST[idx]
+        
+        if (!is.na(isExposure) & isExposure!="") {
+		
+		isExposure = as.character(isExposure)
+		
+		## first check if value is YES, then no partic values are traits of interest
+		if (isExposure == "YES") {
+			return(0)
+		}
+
+		## try to split by |, to set particular values as exposure
+
+		# split into variable Values
+		exposureValues = unlist(strsplit(isExposure,"\\|"))
+
+		return(length(exposureValues))
+
+	}
+
+	# varValue is not in list of exposure values
+	return(0)
+
+}
diff --git a/WAS/incrementCounter.r b/WAS/incrementCounter.r
new file mode 100644
index 0000000..c430a24
--- /dev/null
+++ b/WAS/incrementCounter.r
@@ -0,0 +1,34 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# increments counters used to count how many variables go down each route in the data flow
+incrementCounter <- function(countName) {
+		
+	idx = which(counters$name==countName)
+
+	if (length(idx)==0) {
+		# counter does not exist so add with countValue 1
+		counters <<- rbind(counters, data.frame(name=countName, countValue=1))
+	}
+	else {
+		# increment counter that already exists
+		counters$countValue[idx] <<- counters$countValue[idx]+1
+	}
+
+}
diff --git a/WAS/initFunctions.r b/WAS/initFunctions.r
new file mode 100644
index 0000000..e412c6e
--- /dev/null
+++ b/WAS/initFunctions.r
@@ -0,0 +1,99 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# load the required r files
+loadSource <- function() {
+	source("loadData.r")
+	source("reassignValue.r")
+	source("validatePhenotypeInput.r")
+	source("validateTraitInput.r")
+	source("testNumExamples.r")
+	source("binaryLogisticRegression.r")
+	source("equalSizedBins.r")
+	source("fixOddFieldsToCatMul.r")
+	source("replaceMissingCodes.r")
+	source("replaceNaN.r")
+	source("testAssociations.r")
+	source("testCatMultiple.r")
+	source("testCatSingle.r")
+	source("testContinuous.r")
+	source("testInteger.r")
+	source("testCategoricalOrdered.r")
+	source("testCategoricalUnordered.r")
+	source("saveCounts.r")
+	source("incrementCounter.r")
+	source("getIsCatMultExposure.r")
+	source("getIsExposure.r")
+	source("addToCounts.r")
+	source("getNumValuesCatMultExposure.r")	
+	source("storeNewVar.r")
+	source("loadPhenotypes.r")
+	source("loadTraitOfInterest.r")	
+	source("loadConfounders.r")
+	source("makeTestDataFrame.r")
+	source("loadIndicatorFields.r")
+}
+
+# init the counters used to determine how many variables took each path in the variable processing flow.
+initCounters <- function() {
+	counters = data.frame(name=character(),countValue=integer(), stringsAsFactors=FALSE)
+	return(counters);
+}
+
+# create new results files and headers
+initResultsFiles <- function() {
+
+	## only linear and continuous fields can create linear results
+	file.create(paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt",sep=""));
+	write("varName,varType,n,beta,lower,upper,pvalue", file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+
+	## all field types can create binary results	
+	file.create(paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""));
+	write("varName,varType,n,beta,lower,upper,pvalue", file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+
+	## only categorical multiple cannot generate order categorical results
+	file.create(paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""));
+	write("varName,varType,n,beta,lower,upper,pvalue", file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+	
+	## only categorical single fields can generate unordered categorical results
+	file.create(paste(opt$resDir,"results-multinomial-logistic-",opt$varTypeArg,".txt",sep=""));
+	write("varName,varType,n,beta,lower,upper,pvalue", file=paste(opt$resDir,"results-multinomial-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+	
+}
+
+# load the variable information and data code information files
+initVariableLists <- function() {
+
+	phenoInfo=read.table(opt$variablelistfile,sep="\t",header=1,comment.char="",quote="");
+
+	dataCodeInfo=read.table(opt$datacodingfile,sep=",", header=1);
+
+	vars=list(phenoInfo=phenoInfo, dataCodeInfo=dataCodeInfo);
+	return(vars);
+}
+
+
+
+
+
+
+
+
+
+
+
diff --git a/WAS/loadConfounders.r b/WAS/loadConfounders.r
new file mode 100644
index 0000000..357dbab
--- /dev/null
+++ b/WAS/loadConfounders.r
@@ -0,0 +1,136 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+##
+## loads confounder variables from phenotype file
+
+loadConfounders <- function(phenotypes) {
+
+if (opt$save==TRUE) { 
+
+	# saving not running tests so we add a fake confounder
+	numRows = nrow(phenotypes)
+        data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+        colnames(data)[1] <- "userID"
+        colnames(data)[2] <- "conf1"
+        return(data)
+
+} else {
+
+	if (!is.null(opt$confounderfile)) {
+		print("Loading confounders from confounder file ...")
+	
+		confs = fread(opt$confounderfile, sep=',', header=TRUE, data.table=FALSE)
+	        confs = lapply(confs,function(x) type.convert(as.character(x)))
+	        confs = as.data.frame(confs)
+
+		## find userID column and change name to userID
+		idx = which(colnames(confs) == opt$userId)
+		confs = confs[,c(opt$userID,setdiff(colnames(confs),opt$userID))]
+		colnames(confs)[1] <- "userID"
+
+	} else {
+	print("Loading confounders from phenotypes file ...")
+        confNames = getConfounderNames()
+
+        #####
+	##### extract confounders from data file
+        confs = fread(opt$phenofile, select=confNames, sep=',', header=TRUE, data.table=FALSE)
+	confs = lapply(confs,function(x) type.convert(as.character(x)))
+	confs = as.data.frame(confs)
+
+        #####
+	##### process genetic batch to create genetic chip variable
+        if (opt$genetic == TRUE) {
+                genoBatch = confs[,"x22000_0_0"]
+
+                # chip comes from batch field 22000
+                genoChip = rep.int(NA,nrow(confs));
+                idxForVar = which(genoBatch<0);
+                genoChip[idxForVar] = 0;
+                idxForVar = which(genoBatch>=0 & genoBatch<2000);
+                genoChip[idxForVar] = 1;
+
+		# remove geno batch from and add geno chip to confounders
+                confs = confs[,-which(names(confs) == "x22000_0_0")]
+                confs = cbind.data.frame(confs, genoChip)
+        }
+
+
+	#####
+	##### Convert assessment centre to an indicator variable
+	if (opt$sensitivity==TRUE) {
+		confs$x54_0_0 = as.factor(confs$x54_0_0)
+		assCentre = model.matrix(~confs$x54_0_0)
+		assCentre = assCentre[,2:ncol(assCentre)]
+		confs = cbind(confs, assCentre)
+		confs$x54_0_0 = NULL
+	}
+
+	colnames(confs)[1] <- "userID"
+
+	}
+
+	# remove any rows with no values
+	print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))
+	confsComp = complete.cases(confs)
+	print(paste("Number of INCOMPLETE rows removed from confounder data: ", length(which(confsComp==FALSE)),sep=""))
+	confs = confs[confsComp==TRUE,]
+	print(paste("Number of rows in confounder data: ", nrow(confs),sep=""))	
+
+	print("Confounder columns:")
+	print(names(confs))
+
+	return(confs)
+}
+}
+
+getConfounderNames <- function() {
+
+        #####
+	##### first get vector of confounder names
+
+        # age and sex
+        confNames = c(opt$userId, "x21022_0_0", "x31_0_0")
+
+        # if genetic trait of interest then adjust for genotype chip
+        # and also let user choose sensitivity analysis that also adjusts for top 10 genetic principal components and assessment centre
+        if (opt$genetic == TRUE) {
+
+                confNames = append(confNames, "x22000_0_0")
+
+                if (opt$sensitivity==TRUE) {
+                        confNames = append(confNames, c("x22009_0_1", "x22009_0_2", "x22009_0_3", "x22009_0_4", "x22009_0_5", "x22009_0_6", "x22009_0_7", "x22009_0_8", "x22009_0_9", "x22009_0_10", "x54_0_0"))
+                        print("Adjusting for age, sex, genotype chip, top 10 genetic principal components and assessment centre")
+                } else {
+                        print("Adjusting for age, sex and genotype chip")
+                }
+        } else {
+                # non genetic trait of interest, then sensitivity adjusts for assessment center
+                if (opt$sensitivity==TRUE) {
+                        confNames = append(confNames, "x54_0_0")
+                        print("Adjusting for age, sex and assessment centre")
+                } else {
+                        print("Adjusting for age and sex")
+                }
+        }
+
+	return(confNames)
+}
+
+
diff --git a/WAS/loadData.r b/WAS/loadData.r
new file mode 100644
index 0000000..127ea39
--- /dev/null
+++ b/WAS/loadData.r
@@ -0,0 +1,85 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# loads phenotype and trait of interest data files
+# creates phenotype / trait of interest data frame
+# creates confounder data frame
+# returns an object holding these two data frames
+loadData <- function() {
+
+	library(data.table)
+
+	#####
+	##### validating data
+
+	## check phenotype file headers
+	validatePhenotypeInput()
+
+	## check trait of interest file headers
+	validateTraitInput()
+
+	#####
+	##### load data
+
+	## load phenotype
+	print("Loading phenotypes ...")
+	phenotype = loadPhenotypes()
+
+	## load trait of interest
+        toi <- loadTraitOfInterest(phenotype)
+
+        ## load confounders
+        conf <- loadConfounders(phenotype)
+
+	## add trait of interest to phenotype data frame and remove rows with no trait of interest
+	## merge in toi with phenotype - keep id list from phenotypes file
+	phenotype = merge(toi, phenotype, by="userID", all.y=TRUE, all.x=FALSE)
+	
+        ## remove any rows with no trait of interest
+        idxNotEmpty = which(!is.na(phenotype[,"geno"]))
+
+	if (opt$save == TRUE) {
+	        print(paste("Phenotype file has ", nrow(phenotype), " rows.", sep=""))
+	} else {
+		print(paste("Phenotype file has ", nrow(phenotype), " rows with ", length(idxNotEmpty), " not NA for trait of interest (",opt$traitofinterest,").", sep=""))
+	}
+
+        phenotype = phenotype[idxNotEmpty,]
+
+	# match ids from not empty phenotypes list
+	confsIdx = which(conf$userID %in% phenotype$userID)
+        conf = conf[confsIdx,]
+
+	if (nrow(phenotype)==0) {
+	        stop("No examples with row in both trait of interest and phenotype files", call.=FALSE)
+	} else {
+	        print(paste("Phenotype and trait of interest data files merged, with", nrow(phenotype),"examples"))
+	}
+
+	# some fields are fixed that have a field type as cat single but we want to treat them like cat mult
+	phenotype = fixOddFieldsToCatMul(phenotype)
+
+	indFields = loadIndicatorFields(colnames(phenotype))
+	
+	d = list(datax=phenotype, confounders=conf, inds=indFields)
+	return(d)
+
+}
+
+
diff --git a/WAS/loadIndicatorFields.r b/WAS/loadIndicatorFields.r
new file mode 100644
index 0000000..31c94e0
--- /dev/null
+++ b/WAS/loadIndicatorFields.r
@@ -0,0 +1,162 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+##
+## load data used for data code default value related field, and categorical multiple indicator field
+loadIndicatorFields <- function(phenosToTest) {
+
+	print("Loading indicator fields from phenotypes file ...")
+
+	# read pheno file column names
+        phenoVarsAll = colnames(read.table(opt$phenofile, header=1, nrows=1, sep=','))
+        phenoVarsAll = phenoVarsAll[which(phenoVarsAll!=opt$userId)]
+
+	indVars = c(opt$userId)
+
+	## add indicator variables to pheno data
+	indVars = addIndicatorVariables(indVars, phenosToTest, phenoVarsAll)
+
+	if (length(indVars)>1) {
+		# not just user id column
+		print("Loading required related variable(s):")
+		print(indVars[2:length(indVars)])
+	}
+	else {
+		print("No required related variables.")
+	}
+
+        ## read in the right table columns
+        data = fread(opt$phenofile, select=indVars, sep=',', header=TRUE, data.table=FALSE)
+	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+	colnames(data)[1] <- "userID"
+	return(data)
+}
+
+
+addIndicatorVariables <- function(indVars, phenosToTest, phenoVarsAll) {
+
+	#####
+	##### default value related fields for data codes
+	# get list of all indicator variables from outcome info file
+
+	# get datacodes with an indicator variable
+	dataCodeIdx = which(!is.na(vl$dataCodeInfo$default_related_field) & vl$dataCodeInfo$default_related_field!="")
+
+	# whether there are any related fields that are not in the phenotype data file when they should be
+        hasIssue=FALSE
+
+	if (length(dataCodeIdx)>0) {
+	dataCodeWithRF = vl$dataCodeInfo[dataCodeIdx,]
+
+	defaultFields = c()
+
+	# check there is a field in the phenotypes data for each data code and if so then add this data codes related field to the phenosToTest list
+	if (nrow(dataCodeWithRF)>0) {
+	for (i in 1:nrow(dataCodeWithRF)) {
+		dc = dataCodeWithRF$dataCode[i]
+
+		# get all fields with this datacode
+		fieldsIdx = which(vl$phenoInfo$DATA_CODING == dc)
+		fieldIDs = vl$phenoInfo$FieldID[fieldsIdx]
+		fieldIDs = paste("x",fieldIDs,"_0_0", sep="")
+
+		# datacode related field
+		rf = dataCodeWithRF$default_related_field[i]
+		rf = paste("x",rf,"_0_0", sep="")
+
+		# if one of these field IDs are in phenotypeColumns then data code related field is needed
+		if (length(intersect(fieldIDs, phenosToTest))>0) {
+			defaultFields = append(defaultFields, rf)
+		}
+	}
+	}
+
+	defaultFields = unique(defaultFields)
+	indVars = append(indVars, defaultFields)
+
+	#####
+	##### check these required variables exist in phenotype file
+        if(length(defaultFields)>0) {
+                for (i in 1:length(defaultFields)) {
+                        if (!(defaultFields[i] %in% phenoVarsAll)) {
+                                print(paste("Required variable: Field ",defaultFields[i],"is a data code related field (default_related_field column in data code information file) but was not found in phenotype data"))
+                        	hasIssue=TRUE
+			}
+                }
+        }
+	}
+
+	#####
+	##### categorical multiple indicator fields
+
+	# get field info, for fields with cat mult indicator fields
+        fieldsIdx = which(!is.na(vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS))
+
+	if (length(fieldsIdx)>0) {
+	        fieldsWithCMIF = vl$phenoInfo[fieldsIdx,]
+	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "NO_NAN"),]
+	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == "ALL"),]
+	        fieldsWithCMIF = fieldsWithCMIF[-which(fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS == ""),]
+
+		if (nrow(fieldsWithCMIF)>0) {
+
+		# turn into variable format not field ID
+		fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS = paste("x",fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS,"_0_0", sep="")
+		fieldsWithCMIF$FieldID = paste("x",fieldsWithCMIF$FieldID,"_", sep="")
+		phenosToTestIds = sub("_.*", "_", phenosToTest)
+	
+		# remove rows where the field isn't in the phenotypes list
+		idxIn = which(fieldsWithCMIF$FieldID %in% phenosToTestIds)
+		fieldsWithCMIF = fieldsWithCMIF[idxIn,]
+
+		defaultFields = fieldsWithCMIF$CAT_MULT_INDICATOR_FIELDS
+		defaultFields = unique(defaultFields)
+
+	        #####
+		##### remove those that already exist in phenotypes 'part'
+		idxExists = which(defaultFields %in% indVars)
+		if (length(idxExists>0)) {
+		        defaultFields = defaultFields[-idxExists]
+		}
+	
+		indVars = unique(append(indVars, defaultFields))
+
+		#####
+		##### check these required variables exist in phenotype file
+		if(length(defaultFields)>0) {
+			for (i in 1:length(defaultFields)) {
+				if (!(defaultFields[i] %in% phenoVarsAll)) {
+					print(paste("Required variable: Field ",defaultFields[i],"is a categorical multiple indicator field (CAT_MULT_INDICATOR_FIELDS column in variable information file) but was not found in phenotype data")) 
+					hasIssue=TRUE
+				}
+			}
+		}
+
+		}
+	}
+
+	# stop script if there are missing variables
+	if (hasIssue==TRUE) {
+		print("!!! PHESANT has stopped - add required variables to phenotype file or remove relevant phenotypes (so that required variables are not needed).")
+		quit()
+	}
+	
+	return(indVars)
+
+}
+
diff --git a/WAS/loadPhenotypes.r b/WAS/loadPhenotypes.r
new file mode 100644
index 0000000..c689ebe
--- /dev/null
+++ b/WAS/loadPhenotypes.r
@@ -0,0 +1,127 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+##
+## load phenotypes from phenotype file
+
+loadPhenotypes <- function() {
+
+
+        ## is not running 'all' then we determine the start and end idxs of phenotypes that we test, so that we can parallelise into multiple jobs
+        if (opt$varTypeArg!="all") {
+
+		# read pheno file column names
+	        phenoVars = read.table(opt$phenofile, header=0, nrows=1, sep=',')
+	        phenoVars = phenoVars[which(phenoVars!=opt$userId)]
+
+
+		#####
+		##### calculate part start and end
+
+                partSize = ceiling(length(phenoVars)/opt$numParts);
+                partStart = (opt$partIdx-1)*partSize + 1;
+
+                if (opt$partIdx == opt$numParts) {
+                        partEnd = length(phenoVars);
+                } else {
+                        partEnd = partStart + partSize - 1;
+                }
+
+                print(paste(partStart, '-', partEnd));
+
+
+                #####
+                ##### find range of columns to read in
+
+		## This is more complicated than just reading in a column range, because we need to determine cut points
+		## such that all columns of a particular field are loaded.
+		## A field is included in a 'part' if its last column is within the part range.
+		## e.g. for part 2 of 5 parts and for 100 columns, then fields having their last column at position 21 - 40 (i.e. its column index) are included in this part.
+
+                ## user ID always included
+                phenosToTest = c(opt$userId)
+
+		currentVar=""
+		currentVarLong=""
+		currentVarShort=""
+		first=TRUE
+		phenoIdx=0
+
+		# all columns for a particular field
+		thisPhenoToTest = c()
+
+                for (var in phenoVars) {
+
+                        varx = gsub("^x", "", var);
+                        varx = gsub("_[0-9]+$", "", varx);
+                        varxShort = gsub("^x", "", var);
+                        varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
+			currentVarLong = var
+
+			if (currentVar == varx) { # same variable same timepoint
+				# add current var to pheno list
+				thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+			}
+                        else if (currentVarShort == varxShort) { # save var, diff timepoint
+                                ## different time point of this var so skip in testing but add here because some are fixed to cat mult
+                                thisPhenoToTest = append(thisPhenoToTest, as.character(currentVarLong))
+                        } else {
+                              	## new variable so run test for previous (we have collected all the columns now)
+
+                                if (first==FALSE) {
+                                        if (phenoIdx>=partStart && phenoIdx<=partEnd) { # only start new variable processing if last column of it is within the idx range for this part
+                                       		phenosToTest = append(phenosToTest, thisPhenoToTest)
+					}
+                                }
+
+                                first=FALSE;
+
+                                ## new variable so set values
+                                currentVar = varx
+                                currentVarShort = varxShort
+				thisPhenoToTest = c(as.character(currentVarLong))
+                        }
+
+
+                        phenoIdx = phenoIdx + 1
+                }
+
+                # last variable so test association
+                if (phenoIdx>=partStart && phenoIdx<=partEnd) {
+                        #phenosToTest = append(phenosToTest, as.character(currentVarLong))
+			phenosToTest = append(phenosToTest, as.character(thisPhenoToTest))
+                }
+
+                ## read in the right table columns - a subset of the data file
+                data = fread(opt$phenofile, select=phenosToTest, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+
+        } else {
+                # reading all data at once
+                data = fread(opt$phenofile, sep=',', header=TRUE, data.table=FALSE, na.strings=c("", "NA"))
+        }
+
+	## this is type conversion as used in the read.table function (that we used to use ((this was changed because read.table cannot read column subsets))
+	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+
+	colnames(data)[1] <- "userID"
+
+	return(data)
+
+}
+
+
diff --git a/WAS/loadTraitOfInterest.r b/WAS/loadTraitOfInterest.r
new file mode 100644
index 0000000..712362c
--- /dev/null
+++ b/WAS/loadTraitOfInterest.r
@@ -0,0 +1,54 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+
+##
+## load trait of interest, either from separate trait of interest file, or from phenotype file
+
+loadTraitOfInterest <- function(phenotypes) {
+
+
+if (opt$save==TRUE) { 
+	# saving not running tests so we don't have a trait of interest
+
+        # add pretend trait of interest so other code doesn't break
+        numRows = nrow(phenotypes)
+	data = cbind.data.frame(phenotypes$userID, rep(-1, numRows))
+
+} else {
+
+	# load the trait of interest specified by the user
+
+        if (is.null(opt$traitofinterestfile)) {
+                print("Extracting trait of interest from pheno file ...")
+		data = fread(opt$phenofile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
+
+        } else {
+                print("Loading trait of interest file ...")
+                data = fread(opt$traitofinterestfile, select=c(opt$userId, opt$traitofinterest), sep=',', header=TRUE, data.table=FALSE)
+        }
+
+	data = data.frame(lapply(data,function(x) type.convert(as.character(x))))
+}
+
+colnames(data)[1] <- "userID"
+colnames(data)[2] <- "geno"
+
+return(data)
+
+}
diff --git a/WAS/makeTestDataFrame.r b/WAS/makeTestDataFrame.r
new file mode 100644
index 0000000..39963b2
--- /dev/null
+++ b/WAS/makeTestDataFrame.r
@@ -0,0 +1,30 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+## makes a smaller data frame containing the data for a particular test
+makeTestDataFrame <- function(datax, confounders, currentVarValues) {
+
+			thisdata = datax[,c("geno", "userID")]
+			thisdata = merge(thisdata, confounders, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+			currentVarValues = cbind.data.frame(datax$userID, currentVarValues)
+                        colnames(currentVarValues)[1] = "userID"
+			thisdata = merge(thisdata, currentVarValues, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+			return(thisdata)
+
+}
diff --git a/WAS/opt.RData b/WAS/opt.RData
new file mode 100644
index 0000000000000000000000000000000000000000..0f7822f72517a5db2b74bad89bb1f6abd3ac33e9
GIT binary patch
literal 4853
zcmV<R5(@1fiwFP!000001MNNAaui3F*}5YE5@5iXi%Jp*A!tc}z!)QAY%u0x86z-Y
zz;da(B(-UE_jFYY;oC5Gk7wtyp8L$kzV7ZnY)s5I?8ff%KJLRlY=mbXCML$3%&Jpe
zr%q>PSGCkaifBa~Wq0Mtb3Z4us<NwYUc7sv^KK_&tcf+Z^8d~JV@nhNmtZYy6K^~7
z1!uZo7HzA@63u%^Z)?dm=PdrA2OT_cTO2%V6y~j5e!-ebnYodoai0^)<a72zLZTgx
z^N%|?Y?E;5wCD3hXL>ek&Jk8k$ytKCDVxvD$xyqJNmroROx7Y}+RT&?fedK1oqUm^
zwm44;RNqahd9%n#a0?OSv}HRKStUe!+L|>>SqICNn7n=cdV*4&x3UF7b?*&9Rmf(f
zO~<q?#}F)xoVifJXTfsJMY9-HiH?++&CZyqN8As~q;HGgH*FT@d=JtHYE5NLn|C&0
zcH7@}y?(<DEblXwv%u+A23H3U7dIUF=dWET$4kp{MRd%0J*~87XENy&&5Xkxk>e_J
zHw7MPqsTZA+G!`XTL(qhp=7jaIWTD#9X4=F3^VexhCfi{+0E5N+Jg9OT@HJxdpW9O
zdMKNCo>)|PVtX>lgE3>yWUXwbj&A6YbJ6->oQ?@hZw&F8RLZ7}8Ow0Y8N;@?DNM&o
zAK6GwNh_SvlT3<WOXbs<++5vUvQ@@I?~w2axjRz%g#~kiI>b^qWLrhOs*6XurA)Lg
zxkQE--gIt&KkR~)%FI47oQi7Z=hlX+WJQIKI+97Q*u_k-F0N_Aw6z|&=3y!vk<JvY
z6c^-)@pzs)!r;h!(P6u(WE(*ixx%J5l3z+{zmzzqE9v_91=HT7?XB?Cyw|Jnl;||B
zT;%mETUyBR#E~SCnM-@iY<KDB`S3n0BqxkR@af@3a-*YgV;e7l>blqII&s=5mQ<c>
z&*tYce7&mhV~Z22cdXOxE303bWj9OQA;Jf6nq%1fJjckxK`~MBNy)OjuXbr>HlIyf
z#Ul~i!`6{EY=t+rB$Kw4volU+G2=X`ms7ebctrBenD7l68g|jfa)7CDz$Sh=YZjd=
z>7`oATDzzqbIPLD9wM!sZH{t{9bS;q){>}9K`|QQ^($>Sd4tokvU~+?sa9fPZtKG8
zGYYGBCX>0+f>7D6gV{U6w6%6mPL>vC_yiHMxiF+1!}5gTx-k8R>VTiKa+Z@x)ns{n
zp=l414i%Y%!T~ocPtY^A0d$TT*2vt5VH#7DH;jDIKWr2;_9I?}xSTPj>9BC?!bu;g
zCu!SURGluae3A}`N16!kPH}wK`0lA-kSVrE57Rq;<XwCvY&i3#H+S=sOT)D7d@5tQ
z7bZ8@gppFXhM&`?^10c3iI?hP{m$uSJhX-Ex!21hHs+PBjKbhMJ%eY`RxV|^HDb}s
z);4YEvtoymO{7A0>CU|6W<Ws3DAPe_gj|4BDeLA@^xCDCZC&{8k!E@8@RKXEn9kTW
zWlGhREhQhxSCi672hnop{drOK7A!M2G3K5j+D>}ncnqgi3b!ttHLsp@yjvo5nsfr<
zktUtqsrme4KAmvKh>_e8wzwbWdcs=DIL~<QaPb0Hr!6~G%oHT|zZgO7unHM&sS>>-
zqY`p67P40H3dMYi+wX#Vd#Od-o%|c1?&Odbsmj?N1-Cp0uDK%Og@eG~&f%oUR`}k=
z;h#Z_b`HCQ7(xd)Cg^+sbgl|DJRZ!1V?JIt!P^GBFy3yEX+QAkhhw6BUW5t<9`9@r
z^uhG@a@Z%~dw}2>9Qq`90|f67@WFW72@XF`V8a9l+w>TRB=cfZ*mM+ZI>zBkqVEaN
z^Q1!G<AnDq4yPr0oFRB;IlN5pb`YFbIGnGd>uVfdM>ry0ehz;0eRm0TxeV=_Ap090
zev|Oo$6%X(A-1{-{<{V?679bO-RnSa66n2!c)ktxdIx+aY<3gyZh)Ns3Aj^0=Qi};
z0^7U`JnwM$4@6hs-jmS1i|G3LSM$3VbDzWifejvT_+K*aA^7wf$n`$(zYpWz<M0=v
z%LgFSH1hL3z!f_2Q}aYS(J3v_Ddp*u=!mJ)EJx`Bo#y!G{bU@Ug;)mZ^dZPM3*&hy
zON<bmvLMqV7_$IlEYOLU-o!bgQ$eCr9(0;OIte+y!H-=ga`KYv)&)QQc>PQ~VA12R
zB;oG>f1Bv{82T4Mza`-LgyR;8ULQ;7euU^^y*}Z`J|%j63S)e|KI2_}?TMn-QwjfX
z0{?#_dVL%EzXf`I2Y7y$<Nhnr>-!SA-$Qic#NZD&${)c7pYzXuAme@r@(3H;^YC4{
zzThbT6~h+)Ad&e;AoG6{U;LOK`yV35Pnb71`o8!n@A`8L{{JZ9|1*z&(`Mp}pF{tb
zp!+X?=Px;K7xBffC3Js<=tiyaDu4NFir;_?zvXm?fZp#ww|@dUzlZUHXE%Hn>kqLG
z5$hPng5^%)V^2u8<4@2jLu|^-63m1Ob7F#EHmR5&M2NVfnJ>~5U2xaL{rj5WpmUju
ze#HN|Wg=fQQ^<!E!5!`5W=mS!8x3{$Ae5ewkx?P)oeNW=0((><li=N=;N5)gTv9In
zZY)0kd!Bp`&QkmvxKd+bVRs;5;r6Ub-qx@%`!MsGg!J!MRuxi>`6uALnsVV?b5e%w
zOzybWf@wQenCf221?#mnxJEdVvKIlf90#^lkPPzwzD6yUu49$WA=%7S!I#Q7jH1dP
zo)qKl(R|5C<rl1ECO4ZOaa^TkT5MWnEh_f^RMp|MS_eTZ=@Coji@Y(jpwE)5KnZ<1
zS6LtVwJxbry4om2>k5molh(-1;r?B;e@l%jzy)@#%uLd+tLIx++sx7pl~OZyhglee
zq_z-jww4NcyHvD-GSIgy!NcZ|$<VXP>R0a3s$fe+?C@1lh`4Q)MGW7zmx`wgCLz-H
zRTe3@ho%(==Ob;gcsrPUy4eiZtKg<;Xv}l9ieA1wE)`{Gjbzci->NF>(t8)vm#W<g
zQ?+O8kz6U8CFsw`Q>WKY$+%+gQHa%9DDupjhCTL(Q({@dK~Q=@L8--d@`OexD+kJJ
z8pABkl@_d=V=R<x$K8EbDipFAvHvc#{Ad;X?MHx?EQMt9?PYog?Ni9_UIoF3`xS^?
zLWdD+NzB7!$N{!;Z&ICfUhNI{kJ5gFX(xn`e-0Evzp3zUPL_Q}&#L-Xps7lNZX=zy
zY=aA7xYlLSW+9F<=HuvsL}g&1PCwHJ%>a|fLet<N!8{noJgKPUyK1cPhg62FHp3hW
z^Epm691Qm-Y5(DPat`W3TPEL775R>Y`%lpR;X2BvOTnW|pMu9&^b{Pc-0>QuqGTCJ
zlQOvi8$9Er#A=K%eKL+Rb&84pF-`xoOu^$!AL1vNJ_S$4lQTH+q?=UA(y3a^b-aqy
zr<gu5Psfvbe6<quMW#>8Gc0;y9t<pqGBRvdnv5^4K<{;wjAxlX8DD1lpgkAQ4E?n%
zaMAG*LQlDjonY$l5Pv-kT}!I?VO*6JY$R)AX<n&f<?~FRgs(Dv5?){{_X3q$mwc~j
ztSkCoXVH`IaQUZG!u(>pqU!?c6PkY)SD^AbT%lfK`UrW0>GSY%JaY`JRUW>{qUYfd
z`90)nXW^A9HonUAS$K`<v+z1wxeut^x-5K4V_VTb$)abWH}84I_YyGd44Qs#uRzyz
zn0_~yJ~ZA5_ph5Z=S`+B3sdprIJ}aOtff5MV$sXP5&GNQ)u<0Lp>8vMFucq3M!&;i
z8GZj+)#AG`jBUWrZdbbo->YKV_n5v0-)H(7{D7_8_Ec_N4SuMxspx;7MX$ko{9i8z
zziRfpK#TA63Y1)D#TThDTx0P(DpN1U_+xQSNrm}8VEPy~+1gABakrw(g!|F10&$ao
zsbgbe%3Kudg76QH-0S4VEnbOjaZL;T$jEq{$!6V9DwcA65v-d4?Y{yc`1d`_pefe&
zE7bZzVnJmaV-j#@RE%ksj8W`Q5$MYOGY3QaXF|3db`}c5tTO+aFw&x#E%_H}BFKWw
z<kCViosN?G{QDVMB3EHYEfs8%5yl~#b*LaOlJ5s}f>dt5Nx+C0s>bJF_;Dfj+lV`l
zVxxg@y1)CLd3bKz;5cgcaGPEHrvV=Nv5WWtK{z(8r2;pyEq;V|Mf|{6Vdcs#I}`)B
z_4(4Y#q&fjwv(?7vPy4EyKCuD+2ifo5!A?TqiWjq>!P5xT|)_=8p9@akb+vW_T59m
zs$}<5HLDKj^jY~nrUY<^Vc|MUBy2=>R8_OlK`N1Ko=yN>3{4}^74mNJjvb;iCxiaY
z&~`!F&Ei$PZdSjlNB1WQep}<&e~{Lp?y%hlW7=IykIIiXC4I#z_DNNf{je^YWgDIn
zFeZjRv3UqR$Y!c)dJL&z)&B$8hhoYeiLQ`$dpvoMM9kSEG3AX)ci4|Du?5S|#dcQt
z`#;9YD`PiTzY6+Y-U&Dl^7FI(sW%4mM1QezHLhGc!*U&~mt2C^ZfKu}c9%zY#JB4n
z7OBQ1F#c-QxCC4e5OiN)l{F*cmZ=1sVkh8ohak7V2DcmtyQR0DZrKOzUTAxfygeZA
zZs64qvhD|W^n=WzZDqkOs6phm4xnqWU?ES<TYbk<TnqNCkp-zHl4~;*1z7iCk$Rq1
z$`SJ~w&Mq&JqYa}w1=TR1g+Tb6W9F1EM)${dIs<iwEfT?W%UW*V@w^uwd@y)Yt!T!
z*_7(GfkJu|+A(NPKzq`YZ``lR$DtKFX5zZxGz+<5yq=*w4($lEFS7cC_8F!Q?Wj9$
z;@a`0HL@esX9I=v%g~;K_AIm$5}D)ej1g2`W$~SMfrXsbU(et_5A7?^zQ*bk{I4^0
z@W<Hk64#U$*T|Gqr41DSZ$Nt)+Dp*BDUmtOjub&d>=23Tvg<75vVnR=|5a$OK>HS}
zPxMbRb@WHsX%g3lZ?BOJsV*BR?B9X*CbUz~-hlR&MD|!aE(DQxp}hm`ZD{XGh)3CB
zAjXP2{&C~s0Sm>${(9!rd(hs4_93fJPQB06Ii=k>5Z8FqYh*mC#0E+x6WR|vc~1GK
z05cMqSA6pv6m2^Cv}^zunh*%zwX)!*tPkw~n-KW72D$*YBIrWU4d5%Z13?dhoiX7B
zfc*$wKyUz{7oZ<OFM<Jpg8+vS97G^&GX!uH!4Lx9rpFM7^P^FKF$BkB!Z^T-2*wed
z0eA`E9D<h+2ze#|UPUm0K<Ih_;35M5*cgE~s(>FyR}lENx{5#?zlPu~4cv$TKAl?%
zco*Ofg1Zp_<6mRd{eB<80|frKhX};+_W}Gd?;)60fC=E^%^*lcfEfbSbecslhrk~<
zk3by%5WpW}A$X*K1ppr}haevTas<ACz?ZX#KpeLb=;F_b06v{X1uOwPLGV!o!1#jx
zCkTAKK1Cpoe}+J(*Eb`8Pv=_-_zu8#5qvKKVEoTm^%(pCg3l57<9>+XbA<Z^0-Y^>
z6al^<@M8qN96v!Ij{g*aj{nahfKTU31^fcwmk53p0WiLxAEn02{3Yg+-vIm;!S6MI
z@Zv101g=<bHi@-GHyLyKv(bzBlzWa~k6z9=^QD>58*g5kx-fYwyxC8>amuGM9+e8&
zh=6Il7J)|f4Lz9RWPFNMHn3po_@+KSel!ACv>y-;4ffIDJMgm4i3rCj4(8NsX0zn^
zxvk=1>NenY!$I^gEzJ{kAp9zxGzEL^$g`Ui&!zzrz@yK?27#w!I00VhxiRFqE&ek#
z#Pf+jYT4tKd_w2})-ck2q6a+VgDZ85NBxE3T#!v=n{|1wW17x-XH?$eUH4q-S;cdy
zE8jS}$`9M8?-JEMXTSO@LN~_a_8YJCTrZwjRCuECO3%hCJsYp|^c`X;9MX8D=lbzc
zQQ@P;D?OjBUrK7fG+ybsaop%A+}L=f=d<vJt?)+Um7Z(l08`<B#w$JRY4sU})f=z$
zth?zyR0n+Hm7eRxNgt^v>BcKPpRH?B3fDAV>DhRtr~C}Q@k-CeD?O_Sx3({w9A0KE
z{_7jX_n`h4yhq#rl^k!&`46Z6D^O}f4`^va50Pm@PZw(o9xnCrGK?d~&n*X%`F)t~
zKJ?jt;7s|)s^w{q-a&GEu^qgdahS;|M7aD^BRpi7pX21jRP{LH%o}{XcXhxP*9pQ`
z1kS2O?8WL4HFriZ6E=naoU8=F8I?(YUN};{H&tAl${C(h11u5g<;|<Y|6^ng5JiP^
ztEK`KPp(8*(G{r>)U8iGksNhj3g|mYbc!8^iSz%@7$RLU=#}uGY5^b1M5jW^+hq&Q
zSk7aM+vd1ioxqNnTq;|#GyeLnnqi`5;RF%Xz@bb9P9y^>2qF4t!IB74HB+~RGF6<S
bX(VXM&mu?A-W@B?ZYlg7WRMq4+LHhPhZ&T^

literal 0
HcmV?d00001

diff --git a/WAS/phenomeScan.r b/WAS/phenomeScan.r
new file mode 100644
index 0000000..9aa9c64
--- /dev/null
+++ b/WAS/phenomeScan.r
@@ -0,0 +1,161 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+# documentation files (the "Software"), to deal in the Software without restriction, including without 
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions 
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+# DEALINGS IN THE SOFTWARE.
+
+##
+## main phenome scan file
+
+library("optparse")
+
+option_list = list(
+  make_option(c("-f", "--phenofile"), type="character", default=NULL, help="Phenotype dataset file name", metavar="character"),
+  make_option(c("-g", "--traitofinterestfile"), type="character", default=NULL, help="Trait of interest dataset file name", metavar="character"),
+  make_option(c("-v", "--variablelistfile"), type="character", default=NULL, help="variablelistfile file name (should be tab separated)", metavar="character"),
+  make_option(c("-d", "--datacodingfile"), type="character", default=NULL, help="datacodingfile file name (should be comma separated)", metavar="character"),
+  make_option(c("-e", "--traitofinterest"), type="character", default=NULL, help="traitofinterest option should specify trait of interest variable name", metavar="character"),
+  make_option(c("-r", "--resDir"), type="character", default=NULL, help="resDir option should specify directory where results files should be stored", metavar="character"),
+  make_option(c("-u", "--userId"), type="character", default="userId", help="userId option should specify user ID column in trait of interest and phenotype files [default= %default]", metavar="character"),
+  make_option(c("-t", "--test"), action="store_true", default=FALSE, help="Run test phenome scan on test data (see test subfolder) [default= %default]"),
+  make_option(c("-s", "--sensitivity"), action="store_true", default=FALSE, help="Run sensitivity phenome scan [default= %default]"),
+  make_option(c("-a", "--partIdx"), type="integer", default=NULL, help="Part index of phenotype (used to parellise)"),
+  make_option(c("-b", "--numParts"), type="integer", default=NULL, help="Number of phenotype parts (used to parellise)"),
+  make_option(c("-j", "--genetic"), action="store", default=TRUE, help="Trait of interest is genetic, e.g. a SNP or genetic risk score [default= %default]"),
+  make_option(c("-z", "--save"), action="store_true", default=FALSE, help="Save generated phenotypes to a file rather than testing associations [default= %default]"),
+  make_option(c("-c", "--confounderfile"), type="character", default=NULL, help="Confounder file name", metavar="character"),
+  make_option(c("-i", "--confidenceintervals"), type="logical", default=TRUE, help="Whether confidence intervals should be calculated [default= %default]"),
+  make_option(c("-k", "--standardise"), action="store", default=TRUE, help="Trait of interest is standardised to have mean=0 and std=1 [default= %default]")
+);
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+source("processArgs.r")
+processArgs();
+
+save.image(file = "opt.RData")
+source("initFunctions.r")
+loadSource();
+
+## load the files we write to and use
+counters=initCounters();
+if (opt$save==FALSE) {
+	initResultsFiles();
+}
+vl=initVariableLists();
+
+## load data
+d <- loadData()
+data=d$datax
+confounders=d$confounders
+indicatorFields=d$inds
+
+numPreceedingCols = ncol(confounders)-1+2; # confounders,minus id column, plus trait of interest and user ID
+phenoStartIdx = numPreceedingCols+1;
+
+print("LOADING DONE")
+
+phenoVars=colnames(data);
+# remove user id and age and sex columns
+phenoVars = phenoVars[-c(1,2)]; # first and second columns are the id and snpScore, respectively, as determined in loadData.r
+
+currentVar="";
+currentVarShort="";
+first=TRUE;
+
+if (opt$save == TRUE) {
+
+	derivedBinary <- data.frame(userID=data$userID)
+        derivedCont <- data.frame(userID=data$userID)
+        derivedCatOrd <- data.frame(userID=data$userID)
+        derivedCatUnord <- data.frame(userID=data$userID)
+
+	resLogFile = paste(opt$resDir,"data-log-",opt$varTypeArg,".txt",sep="")
+        sink(resLogFile)
+} else {
+	modelFitLogFile = paste(opt$resDir,"modelfit-log-",opt$varTypeArg,".txt",sep="")
+	sink(modelFitLogFile)
+	sink()
+
+	resLogFile = paste(opt$resDir,"results-log-",opt$varTypeArg,".txt",sep="")
+	sink(resLogFile)
+}
+
+
+phenoIdx=0; # zero because then the idx is the position of the previous variable, i.e. the var in currentVar
+for (var in phenoVars) { 
+
+
+	sink()
+#	print(var)
+	sink(resLogFile, append=TRUE)
+
+	varx = gsub("^x", "", var);
+        varx = gsub("_[0-9]+$", "", varx);
+	varxShort = gsub("^x", "", var);
+        varxShort = gsub("_[0-9]+_[0-9]+$", "", varxShort);
+
+	## test this variable
+	if (currentVar == varx) {
+		thisCol = data[,eval(var)]
+		thisCol = replaceNaN(thisCol)
+		currentVarValues = cbind.data.frame(currentVarValues, thisCol);
+	}
+	else if (currentVarShort == varxShort) {
+		## different time point of this var so skip
+	}
+	else {
+		## new variable so run test for previous (we have collected all the columns now)
+		if (first==FALSE) {
+
+			thisdata = makeTestDataFrame(data, confounders, currentVarValues)
+			testAssociations(currentVar, currentVarShort, thisdata)
+		}
+		
+		first=FALSE;
+		
+		## new variable so set values
+		currentVar = varx;
+		currentVarShort = varxShort;
+
+		currentVarValues = data[,eval(var)]
+		currentVarValues = replaceNaN(currentVarValues)
+	}
+
+	phenoIdx = phenoIdx + 1;
+}
+
+if (phenoIdx>0){
+	# last variable so test association
+	thisdata = makeTestDataFrame(data, confounders, currentVarValues)
+	testAssociations(currentVar, currentVarShort, thisdata)
+}
+
+sink()
+
+# save counters of each path in variable flow
+saveCounts()
+
+if (opt$save == TRUE) {
+	write.table(derivedBinary, file=paste(opt$resDir,"data-binary-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(derivedCont, file=paste(opt$resDir,"data-cont-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(derivedCatOrd, file=paste(opt$resDir,"data-catord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+	write.table(derivedCatUnord, file=paste(opt$resDir,"data-catunord-",opt$varTypeArg,".txt", sep=""), append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=TRUE);
+}
+
+warnings()
+
+
+
diff --git a/WAS/processArgs.r b/WAS/processArgs.r
new file mode 100644
index 0000000..2b9a438
--- /dev/null
+++ b/WAS/processArgs.r
@@ -0,0 +1,132 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Parse the arguments input by the user
+# if argument 'test' is used then run test phenome scan
+processArgs <- function() {
+
+if (opt$test==TRUE) {
+
+	# set up the test phenome scan settings
+	datadir='../testWAS/data/';
+	opt$resDir <<- '../testWAS/results/';
+        opt$userId <<- 'userId';
+	opt$phenofile <<-  paste(datadir,'phenotypes.csv', sep="");
+        opt$variablelistfile <<- '../testWAS/variable-lists/outcome-info.tsv';
+        opt$datacodingfile <<- '../testWAS/variable-lists/data-coding-ordinal-info.txt';
+	opt$confidenceintervals <<- TRUE	
+
+	if(opt$save == FALSE) {
+		opt$traitofinterestfile <<- paste(datadir,'exposure.csv', sep="");
+		opt$traitofinterest <<- 'exposure';
+		opt$sensitivity <<- FALSE;
+		opt$genetic <<- TRUE;
+	}
+
+	processParts(opt$partIdx, opt$numParts);	
+}
+else {
+
+	## check arguments are supplied correctly
+
+	if (is.null(opt$phenofile)){
+	  print_help(opt_parser)
+	  stop("phenofile argument must be supplied", call.=FALSE)
+	}
+	else if (!file.exists(opt$phenofile)) {
+                stop(paste("phenotype data file phenofile=", opt$phenofile, " does not exist", sep=""), call.=FALSE)
+        }
+
+#	if (is.null(opt$traitofinterestfile)){
+#	  print_help(opt_parser)
+#	  stop("traitofinterestfile argument must be supplied", call.=FALSE)
+#	}
+	if (opt$save==FALSE && !is.null(opt$traitofinterestfile) && !file.exists(opt$traitofinterestfile)) {
+               stop(paste("trait of interest data file traitofinterestfile=", opt$traitofinterestfile, " does not exist", sep=""), call.=FALSE)
+        }
+
+	if (opt$save==FALSE && !is.null(opt$confounderfile) && !file.exists(opt$confounderfile)) {
+               stop(paste("confounder data file confounderfile=", opt$confounderfile, " does not exist", sep=""), call.=FALSE)
+        }
+
+	if (is.null(opt$variablelistfile)){
+	  print_help(opt_parser)
+	  stop("variablelistfile argument must be supplied", call.=FALSE)
+	}
+	else if (!file.exists(opt$variablelistfile)) {
+                stop(paste("variable listing file variablelistfile=", opt$variablelistfile, " does not exist", sep=""), call.=FALSE)
+        }
+
+	if (is.null(opt$datacodingfile)){
+	  print_help(opt_parser)
+	  stop("datacodingfile argument must be supplied", call.=FALSE)
+	}
+	else if (!file.exists(opt$datacodingfile)) {
+                stop(paste("data coding file datacodingfile=", opt$datacodingfile, " does not exist", sep=""), call.=FALSE)
+        }
+
+	if (opt$save==FALSE && is.null(opt$traitofinterest)){
+	  print_help(opt_parser)
+	  stop("traitofinterest argument must be supplied", call.=FALSE)
+	}
+
+	if (is.null(opt$resDir)){
+	  print_help(opt_parser)
+	  stop("resDir argument must be supplied", call.=FALSE)
+	}
+	else if (!file.exists(opt$resDir)) {
+		stop(paste("results directory resDir=", opt$resDir, " does not exist", sep=""), call.=FALSE)
+	}
+
+
+	processParts(opt$partIdx, opt$numParts);
+}
+	
+
+if (opt$save==TRUE) {
+	print("Saving phenotypes to file. Tests of association will not run!")
+}
+
+}
+
+# Parse the 'part' arguments and check they are valid
+processParts <- function(pIdx, nParts) {
+
+	if (is.null(pIdx) && is.null(nParts)) {
+                opt$varTypeArg <<- "all";
+		print(paste("Running with all traits in phenotype file:", opt$phenofile));
+        }
+	else if (is.null(pIdx)) {
+                print_help(opt_parser)
+                stop("pIdx argument must be supplied when nParts argument is supplied", call.=FALSE)
+        }
+	else if (is.null(nParts)) {
+                print_help(opt_parser)
+                stop("nParts argument must be supplied when pIdx argument is supplied", call.=FALSE)
+        }
+	else if (pIdx<1 || pIdx>nParts) {
+		print_help(opt_parser)
+                stop("pIdx arguments must be between 1 and nParts inclusive", call.=FALSE)
+	}
+	else {
+                opt$varTypeArg <<- paste(pIdx, "-", nParts, sep="");
+        	print(paste("Running with part",pIdx,"of",nParts," in phenotype file:", opt$phenofile));
+	}
+
+}
diff --git a/WAS/reassignValue.r b/WAS/reassignValue.r
new file mode 100644
index 0000000..ed9701c
--- /dev/null
+++ b/WAS/reassignValue.r
@@ -0,0 +1,73 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Reassigns values as specified in data coding info file
+reassignValue <- function(pheno, varName) {
+
+	# get data code info - whether this data code is ordinal or not and any reordering and resassignments
+        dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
+        dataCode = dataPheno$DATA_CODING;
+
+	# not all variables will have a data code info row
+        dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
+
+        if (length(dataCodeRow)==0) {
+                return(pheno);
+        }
+	else if (length(dataCodeRow)>1) {
+                cat("WARNING: >1 ROWS IN DATA CODE INFO FILE || ")
+		return(pheno);
+        }
+
+	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
+        reassignments = as.character(dataDataCode$reassignments);
+	
+	return(reassignValue2(pheno, reassignments))
+	
+}
+
+# Reassigns values in pheno, as specified in resassignments argument
+reassignValue2 <- function(pheno, reassignments) {
+
+	# can be NA if row not included in data coding info file
+	if (!is.na(reassignments) && nchar(reassignments)>0) {
+
+		reassignParts = unlist(strsplit(reassignments,"\\|"));
+		cat(paste("reassignments: ", reassignments, " || ", sep=""));
+
+		# do each reassignment
+		for(i in reassignParts) {
+			reassignParts = unlist(strsplit(i,"="));
+
+			# matrix version
+			idx = which(pheno==reassignParts[1],arr.ind=TRUE)
+			pheno[idx]=strtoi(reassignParts[2]);
+		}
+		
+		## see if type has changed (this happens for field 216 (X changed to -1))
+		## as.numeric will set non numeric to NA so we know if it's ok to do this by seeing if there are extra NA's after the conversion
+		pNum = as.numeric(unlist(pheno))
+		isNum = length(which(is.na(pheno), arr.ind=TRUE))==length(which(is.na(pNum), arr.ind=TRUE))
+		if (isNum) {
+			pheno = pNum
+		}
+	}
+
+	return(pheno)
+}
diff --git a/WAS/replaceMissingCodes.r b/WAS/replaceMissingCodes.r
new file mode 100644
index 0000000..f792374
--- /dev/null
+++ b/WAS/replaceMissingCodes.r
@@ -0,0 +1,37 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Replace negative values with NA as these are assumed to be missing
+replaceMissingCodes <- function(pheno) {
+
+	phenoReplaced = pheno;
+
+	uniqVar = unique(na.omit(phenoReplaced))
+
+	# variable values <0 are `missing' codes
+        for (u in uniqVar) {
+		if (u<0) {
+		    	idxU = which(phenoReplaced==u)
+                       	phenoReplaced[idxU]=NA
+                }
+        }
+
+	return(phenoReplaced)
+
+}
diff --git a/WAS/replaceNaN.r b/WAS/replaceNaN.r
new file mode 100644
index 0000000..dc3dfd4
--- /dev/null
+++ b/WAS/replaceNaN.r
@@ -0,0 +1,47 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Replace NaN and empty values with NA in pheno
+replaceNaN <- function(pheno) {
+
+	if (is.factor(pheno)) {
+		
+		phenoReplaced = pheno
+
+		nanStr  = which(phenoReplaced=="NaN")
+        	phenoReplaced[nanStr]=NA 
+		
+		emptyx  = which(phenoReplaced=="")
+       		phenoReplaced[emptyx]=NA
+
+	}
+	else {
+
+		phenoReplaced = pheno
+		nanx  = which(is.nan(phenoReplaced))
+		phenoReplaced[nanx] = NA;
+
+		emptyStr  = which(phenoReplaced=="")	
+		phenoReplaced[emptyStr] = NA;
+	    
+	}
+
+	return(phenoReplaced)
+
+}
diff --git a/WAS/saveCounts.r b/WAS/saveCounts.r
new file mode 100644
index 0000000..5a72916
--- /dev/null
+++ b/WAS/saveCounts.r
@@ -0,0 +1,31 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Saves the counters stored in count variables, to a file in results directory
+saveCounts <- function() {
+
+	countFile = paste(opt$resDir,"variable-flow-counts-",opt$varTypeArg,".txt",sep="")
+
+	# sort on counter name
+	sortIdx = order(as.character(counters[,"name"]))
+	counters <<- counters[sortIdx,]
+
+	write.table(counters, file=countFile, sep=",", quote=FALSE, row.names=FALSE)
+}
+
diff --git a/WAS/storeNewVar.r b/WAS/storeNewVar.r
new file mode 100644
index 0000000..20c4d5a
--- /dev/null
+++ b/WAS/storeNewVar.r
@@ -0,0 +1,21 @@
+
+
+storeNewVar <- function(userIDData, phenoData, varName, type) {
+
+        # add pheno to dataframe
+	newdata = data.frame(userID=userIDData, newvar=phenoData)
+        names(newdata)[names(newdata)=="newvar"] = varName
+
+	if (type == "bin") {
+	        derivedBinary <<- merge(derivedBinary, newdata, by="userID", all=TRUE);
+	} else if (type == "cont") {
+		derivedCont <<- merge(derivedCont, newdata, by="userID", all=TRUE);
+	} else if (type == "catOrd") {
+		derivedCatOrd <<- merge(derivedCatOrd, newdata, by="userID", all=TRUE);
+	} else if (type == "catUnord") {
+		derivedCatUnord <<- merge(derivedCatUnord, newdata, by="userID", all=TRUE);
+	}
+
+	#write.table(phenoFactor, file=paste(opt$resDir, "data-binary-", varName, ".csv", sep=""), row.names=FALSE, col.names=FALSE, na="", quote=FALSE);
+
+}
diff --git a/WAS/testAssociations.r b/WAS/testAssociations.r
new file mode 100644
index 0000000..71592ab
--- /dev/null
+++ b/WAS/testAssociations.r
@@ -0,0 +1,141 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Tests the association of a field, determined by its field type
+testAssociations <- function(currentVar, currentVarShort, thisdata) {
+
+	## call file for variable type
+
+	tryCatch({
+
+		# retrieve whether phenotype is excluded etc
+		idx=which(vl$phenoInfo$FieldID==currentVarShort);
+
+		# check if variable info is found for this field
+		if (length(idx)==0) {
+			cat(paste(currentVar, " || Variable could not be found in pheno info file. \n", sep=""))			
+			incrementCounter("notinphenofile")
+		}
+		else {
+
+		# get info from variable info file
+		excluded = vl$phenoInfo$EXCLUDED[idx]
+		catSinToMult = vl$phenoInfo$CAT_SINGLE_TO_CAT_MULT[idx]
+		fieldType = vl$phenoInfo$ValueType[idx]
+		isExposure = getIsExposure(currentVarShort) #vl$phenoInfo$EXPOSURE_PHENOTYPE[idx]
+
+		if (fieldType=="Integer") {		
+
+			#### INTEGER
+			cat(currentVar, "|| ", sep="")
+			
+			if (excluded!="") {
+				cat(paste("Excluded integer: ", excluded, " || ", sep=""))
+				incrementCounter("excluded.int")
+			}
+			else {
+				incrementCounter("start.int")
+				if (isExposure==TRUE) {
+					incrementCounter("start.exposure.int")
+				}
+
+			    	testInteger(currentVarShort, "INTEGER", thisdata);
+			}
+			cat("\n");
+	    	}
+		else if (fieldType=="Continuous") {
+
+			#### CONTINUOUS
+			cat(currentVar, "|| ", sep="")
+
+		    	if (excluded!="") {
+				cat(paste("Excluded continuous: ", excluded, " || ", sep=""))
+				incrementCounter("excluded.cont")
+		    	}
+			else {
+				incrementCounter("start.cont")
+				if (isExposure==TRUE) {
+                                        incrementCounter("start.exposure.cont")
+                                }
+				testContinuous(currentVarShort, "CONTINUOUS", thisdata);
+	        	}
+	        	cat("\n");
+		}
+	    	else if (fieldType=="Categorical single" && catSinToMult=="") {
+
+			#### CAT SINGLE
+			cat(currentVar, "|| ", sep="")
+	
+	    		if (excluded!="") {
+				cat(paste("Excluded cat-single: ", excluded, " || ", sep=""))
+				incrementCounter("excluded.catSin")
+			}
+			else {
+				incrementCounter("start.catSin")
+				if (isExposure==TRUE) {
+                                        incrementCounter("start.exposure.catSin")
+                                }
+			    	testCategoricalSingle(currentVarShort, "CAT-SIN", thisdata);
+			}
+			cat("\n");
+	  	}
+		else if (fieldType=="Categorical multiple" || catSinToMult!="") {
+		
+			#### CAT MULTIPLE
+			cat(currentVar, "|| ", sep="")
+
+			if (excluded!="") {
+				cat(paste("Excluded cat-multiple: ", excluded, " || ", sep=""))
+				incrementCounter("excluded.catMul")
+			}
+			else {
+
+				if (catSinToMult!="") {
+					cat("cat-single to cat-multiple || ", sep="")
+					incrementCounter("catSinToCatMul")
+				}
+
+				incrementCounter("start.catMul")	
+				if (isExposure==TRUE) {
+                                        incrementCounter("start.exposure.catMul")
+                                }
+				else {
+					# get number of cat mult values denoting trait of interest
+	                                numVals = getNumValuesCatMultExposure(currentVarShort)
+					if (numVals>0) {
+		                                addToCounts("start.exposure.catMulvalues", numVals)
+					}
+				}
+		        	testCategoricalMultiple(currentVarShort, "CAT-MUL", thisdata);
+			}
+			cat("\n");
+		}
+		else {
+	        	#cat("VAR MISSING ", currentVarShort, "\n", sep="");
+	    	}
+		}
+
+	}, error = function(e) {
+		print(paste("ERROR:", currentVar,e))
+	})
+
+}
+
+
+
diff --git a/WAS/testCatMultiple.r b/WAS/testCatMultiple.r
new file mode 100644
index 0000000..8aaf974
--- /dev/null
+++ b/WAS/testCatMultiple.r
@@ -0,0 +1,166 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Performs preprocessing of categorical (multiple) fields, namely:
+# 1) Reassigning values as specified in data coding file
+# 2) Generating binary variable for each category in field, restricting to correct set of participants as specified
+# in CAT_MULT_INDICATOR_FIELDS field of variable info file (either NO_NAN, ALL or a field ID)
+# 3) Checking derived variable has at least 10 cases in each group
+# 4) Calling binaryLogisticRegression function for this derived binary variable
+testCategoricalMultiple <- function(varName, varType, thisdata) {
+	cat("CAT-MULTIPLE || ");
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata), drop=FALSE]
+	pheno = reassignValue(pheno, varName)
+
+	## get unique values from all columns of this variable
+	uniqueValues = unique(na.omit(pheno[,1]));
+	numCols = ncol(pheno);
+	numRows = nrow(pheno);
+	if (numCols>1) {
+		for (num in 2:numCols) {
+			u = unique(na.omit(pheno[,num]))
+			uniqueValues = union(uniqueValues,u);
+		}
+	}
+
+	## for each value create a binary variable and test this
+	for (variableVal in uniqueValues) {
+
+		## numeric negative values we assume are missing - check this
+		if(is.numeric(variableVal) & variableVal<0) {
+			cat("SKIP_val:", variableVal," < 0", sep="");
+			next;
+		}
+	
+		# make variable for this value
+		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
+		idxsTrue = idxForVar[,"row"]
+
+		cat(" CAT-MUL-BINARY-VAR ", variableVal, " || ", sep="");
+		incrementCounter("catMul.binary")
+		
+		# make zero vector and set 1s for those with this variable value
+		varBinary = rep.int(0,numRows);
+		varBinary[idxsTrue] = 1;
+		varBinaryFactor = factor(varBinary)
+
+		## data for this new binary variable
+		newthisdata = cbind.data.frame(thisdata[,1:numPreceedingCols], varBinaryFactor)
+
+		## one of 3 ways to decide which examples are negative
+        	idxsToRemove = restrictSample(varName, pheno, variableVal, thisdata[,"userID", drop=FALSE])
+
+		if (!is.null(idxsToRemove) & length(idxsToRemove) > 0) {
+			newthisdata = newthisdata[-idxsToRemove,]
+		}
+
+		facLevels = levels(newthisdata[,phenoStartIdx])		
+		idxTrue = length(which(newthisdata[,phenoStartIdx]==facLevels[1]))
+	        idxFalse = length(which(newthisdata[,phenoStartIdx]==facLevels[2]))
+                
+	        if (idxTrue<10 || idxFalse<10) {
+	                cat("CAT-MULT-SKIP-10 (", idxTrue, " vs ", idxFalse, ") || ", sep="");
+			incrementCounter("catMul.10")
+	        }
+		else {
+			isExposure = getIsCatMultExposure(varName, variableVal)
+
+			incrementCounter("catMul.over10")
+		     	# binary - so logistic regression
+			binaryLogisticRegression(paste(varName, variableVal,sep="#"), varType, newthisdata, isExposure)
+		}
+	}
+}
+
+# restricts sample based on value in CAT_MULT_INDICATOR_FIELDS column of variable info file,
+# either NO_NAN, ALL or a field ID
+# returns idx's that should be removed from the sample
+restrictSample <- function(varName,pheno,variableVal, userID) {
+
+	# get definition for sample for this variable either NO_NAN, ALL or a variable ID
+	varIndicator = vl$phenoInfo$CAT_MULT_INDICATOR_FIELDS[which(vl$phenoInfo$FieldID==varName)]
+
+	return(restrictSample2(varName,pheno,varIndicator,variableVal, userID))
+}
+
+
+restrictSample2 <- function(varName,pheno, varIndicator,variableVal, userID) {
+	
+	if (varIndicator=="NO_NAN") { # remove NAs
+		## remove all people with no value for this variable
+
+		# row indexes with NA in all columns of this cat mult field		
+		ind <- apply(pheno, 1, function(x) all(is.na(x)))
+		naIdxs = which(ind==TRUE)
+		cat("NO_NAN Remove NA participants ", length(naIdxs), " || ", sep="");
+	}
+	else if (varIndicator=="ALL") {
+
+		# use all people (no missing assumed) so return empty vector
+		# e.g. hospital data and death registry
+		naIdxs = cbind()
+		cat("ALL || ")
+	}
+	else if (varIndicator!="") {
+		# remove people who have no value for indicator variable
+		indName = paste("x",varIndicator,"_0_0",sep="");
+		cat("Indicator name ", indName, " || ", sep="");
+		indvarx = merge(userID, indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)		
+		indicatorVar = indvarx[,indName]
+
+		# remove participants with NA value in this related field
+		indicatorVar = replaceNaN(indicatorVar)
+		naIdxs = which(is.na(indicatorVar))
+
+		cat("Remove indicator var NAs: ", length(naIdxs), " || ", sep="");
+
+		if (is.numeric(as.matrix(indicatorVar))) {
+			# remove participants with value <0 in this related field - assumed missing indicators
+			lessZero = which(indicatorVar<0)
+			naIdxs = union(naIdxs, lessZero)
+			cat("Remove indicator var <0: ", length(lessZero), " || ", sep="")
+		}
+	}
+	else {
+		stop("Categorical multiples variables need a value for CAT_MULT_INDICATOR_FIELDS", call.=FALSE)
+	}
+
+	## remove people with pheno<0 if they aren't a positive example for this variable indicator
+	## because we can't know if they are a negative example or not
+	if (is.numeric(as.matrix(pheno))) {
+		idxForVar = which(pheno == variableVal, arr.ind=TRUE)
+		idxMissing = which(pheno < 0, arr.ind=TRUE)
+
+		# all people with <0 value and not variableVal
+		naMissing = setdiff(idxMissing,idxForVar)
+		
+		# add these people with unknowns to set to remove from sample
+		naIdxs = union(naIdxs, naMissing)
+		
+		cat(paste("Removed ", length(naMissing) ," examples != ", variableVal, " but with missing value (<0) || ", sep=""));
+	}
+	else {
+		cat("Not numeric || ")
+	}
+	
+	return(naIdxs);
+
+}
+
diff --git a/WAS/testCatSingle.r b/WAS/testCatSingle.r
new file mode 100644
index 0000000..109f88b
--- /dev/null
+++ b/WAS/testCatSingle.r
@@ -0,0 +1,193 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Performs variable processing for categorical (single) fields, namely:
+# 1) Reassigning values as specified in data coding information file
+# 2) Reordering categories for ordered fields
+# 3) Replacing missing codes - we assume values < 0 are missing for categorical (single) variables
+# 4) Remove values with <10 cases
+# 5) Deterimine correct test to perform, either binary, ordered or unordered.
+testCategoricalSingle <- function(varName, varType, thisdata) {
+	cat("CAT-SINGLE || ");
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+	isExposure = getIsExposure(varName)
+
+	# assert variable has only one column
+	if (!is.null(dim(pheno))) stop("More than one column for categorical single")
+
+	pheno = reassignValue(pheno, varName)
+
+	# get data code info - whether this data code is ordinal or not and any reordering
+        dataPheno = vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),];
+        dataCode = dataPheno$DATA_CODING;
+
+	# get data coding information	
+       	dataCodeRow = which(vl$dataCodeInfo$dataCode==dataCode);
+	if (length(dataCodeRow)==0) {
+                cat("ERROR: No row in data coding info file || ");
+		return(NULL);
+        }
+	dataDataCode = vl$dataCodeInfo[dataCodeRow,];
+        ordered = dataDataCode$ordinal;
+        order = as.character(dataDataCode$ordering);
+
+	## reorder variable values into increasing order (we do this now as this may convert variable to binary rather than ordered)
+        pheno = reorderOrderedCategory(pheno,order);
+
+	## if data code has a default_value then recode NA's to this value for participants with value in default_related_field
+	## this is used where there is no zero option e.g. field 100200
+	defaultValue = dataDataCode$default_value
+	defaultRelatedID = dataDataCode$default_related_field
+	pheno = setDefaultValue(pheno, defaultValue, defaultRelatedID, thisdata[,"userID", drop=FALSE])
+
+        ## all categories coded as <0 we assume are `missing' values
+        pheno = replaceMissingCodes(pheno)
+
+	## remove categories if < 10 examples
+	pheno = testNumExamples(pheno)
+
+	uniqVar = unique(na.omit(pheno))
+	uniqVar = sort(uniqVar)
+
+	if (length(uniqVar)<=1) {
+		cat("SKIP (only one value) || ");
+		incrementCounter("catSin.onevalue")
+	}
+	else if (length(uniqVar)==2) {		
+		cat("CAT-SINGLE-BINARY || ");
+		incrementCounter("catSin.case3")
+		# binary so logistic regression
+
+		phenoFactor = factor(pheno)
+		# binary - so logistic regression
+		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
+	        binaryLogisticRegression(varName, varType, thisdatanew, isExposure)	
+	}
+	else {
+		# > 2 categories
+		if (is.na(ordered)) {
+			cat(" ERROR: 'ordered' not found in data code info file")	
+		}
+		else {
+
+		## unordered
+		if (ordered == 0) {
+			
+			cat("CAT-SINGLE-UNORDERED || ")
+			incrementCounter("catSin.case2")
+
+			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
+			testCategoricalUnordered(varName, varType, thisdatanew);
+			
+		}
+		else if (ordered == 1) {
+		
+			## ordered
+			cat("ordered || ");
+			incrementCounter("catSin.case1")
+
+			## reorder variable values into increasing order
+			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], pheno);
+			testCategoricalOrdered(varName, varType, thisdatanew, order)
+		
+		}
+		else if (ordered == -2) {
+			cat(" EXCLUDED or BINARY variable: Should not get here in code. ")
+			incrementCounter("catSin.binaryorexcluded")
+		}
+		else {
+			print(paste("ERROR", varName, varType, dataCode));
+		}
+		}
+	}
+
+}
+
+## values are reordered and assigned values 1:N for N categories
+reorderOrderedCategory <- function(pheno,order) {
+
+	## new pheno of NAs (all values not in order are assumed to be NA)
+
+	if (!is.na(order) && nchar(order)>0) {
+
+		# make empty pheno		
+		pheno2 = rep(NA,length(pheno));
+			
+		## get ordering
+		orderParts = unlist(strsplit(order,"\\|"));
+		
+		# go through values in correct order and set value
+		# from 1 to the number of values
+		count=1;
+		for(i in orderParts) {
+			idx = which(pheno==i);
+			pheno2[idx] = count;
+		
+			count=count+1;
+		}
+		
+		cat("reorder ",order," || ",sep="");
+		
+		return(pheno2)
+	}	
+	else {
+		return(pheno);
+	}
+	
+}
+
+## sets default value for people with no value in pheno, but with a value in the
+## field specified in the default_value_related_field column in the data coding info file.
+## the default value is specified in the default_value column in the data coding info file.
+setDefaultValue <- function(pheno, defaultValue, defaultRelatedID, userID) {
+
+
+	if (!is.na(defaultValue) && nchar(defaultValue)>0) {
+
+		# remove people who have no value for indicator variable
+	       	indName = paste("x",defaultRelatedID,"_0_0",sep="");
+
+	     	cat("Default related field: ", indName, " || ", sep="");
+		indicatorVar = indicatorFields[,indName]
+		indvarx = merge(userID, indicatorFields, by="userID", all.x=TRUE, all.y=FALSE, sort=FALSE)
+                indicatorVar = indvarx[,indName]
+
+	    	# remove participants with NA value in this related field
+	    	indicatorVar = replaceNaN(indicatorVar)
+
+		# check if there are already examples with default value and if so display warning
+		numWithDefault = length(which(pheno==defaultValue))
+		if (numWithDefault>0) {
+			cat("(WARNING: already ", numWithDefault, " values with default value) ", sep="")
+		}
+		
+		# set default value in people who have no value in the pheno but do have a value in the default_value_related_field
+	    	defaultIdxs = which(!is.na(indicatorVar) & is.na(pheno))
+		pheno[defaultIdxs] = defaultValue
+
+	       	cat("default value ", defaultValue, " set, N= ", length(defaultIdxs), " || ", sep="");
+
+	}
+
+	return(pheno)
+
+}
+
+
diff --git a/WAS/testCategoricalOrdered.r b/WAS/testCategoricalOrdered.r
new file mode 100644
index 0000000..78debf6
--- /dev/null
+++ b/WAS/testCategoricalOrdered.r
@@ -0,0 +1,163 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Performs ordered logistic regression test and saves results in ordered logistic results file
+testCategoricalOrdered <- function(varName, varType, thisdata, orderStr="") {
+
+	
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+	geno = thisdata[,"geno"]
+
+	cat("CAT-ORD || ");
+	incrementCounter("ordCat")
+
+	doCatOrdAssertions(pheno)
+
+	uniqVar = unique(na.omit(pheno));
+
+	# log the ordering of categories used
+	orderStr = setOrderString(orderStr, uniqVar);
+	cat("order: ", orderStr, " || ",  sep="");
+
+	# check sample size
+	numNotNA = length(which(!is.na(pheno)))
+	if (numNotNA<500) {
+		cat("CATORD-SKIP-500 (", numNotNA, ") || ",sep="");
+		incrementCounter("ordCat.500")
+	}
+	else {
+		# test this cat ordered variable with ordered logistic regression	
+
+	        phenoFactor = factor(pheno)
+
+		cat("num categories: ", length(unique(na.omit(phenoFactor))), " || ", sep="");
+
+		if (opt$save == TRUE) {
+			# add pheno to dataframe
+			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catOrd')
+			cat("SUCCESS results-ordered-logistic");
+			incrementCounter("success.ordCat")
+                }
+                else {
+
+		# ordinal logistic regression
+		sink()
+		sink(modelFitLogFile, append=TRUE)
+		print("--------------")
+		print(varName)
+
+		require(MASS)
+		require(lmtest)
+
+		### BEGIN TRYCATCH
+		tryCatch({
+		confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+
+
+		if (opt$standardise==TRUE) {
+			geno = scale(geno)
+                }
+
+		fit <- polr(phenoFactor ~ geno + ., data=confounders, Hess=TRUE)
+
+		ctable <- coef(summary(fit))
+		sink()
+		sink(resLogFile, append=TRUE)
+
+		ct = coeftest(fit)
+		pvalue = ct["geno","Pr(>|t|)"]
+		beta = ctable["geno", "Value"];
+
+		if (opt$confidenceintervals == TRUE) {
+			se = ctable["geno", "Std. Error"]
+			lower = beta - 1.96*se;
+	                upper = beta + 1.96*se;
+                }
+                else {
+                        lower = NA
+                        upper = NA
+                }
+
+		write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-ordered-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE");
+		cat("SUCCESS results-ordered-logistic");
+		incrementCounter("success.ordCat")
+
+		isExposure = getIsExposure(varName)
+                if (isExposure == TRUE) {
+                        incrementCounter("success.exposure.ordCat")
+                }
+
+		### END TRYCATCH
+		}, error = function(e) {
+			sink()
+                        sink(resLogFile, append=TRUE)
+                        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                        incrementCounter("ordCat.error")
+                })
+		}
+
+	}
+}
+
+# check that the phenotype is valid - that there are more than two categories
+# and that these all have at least 10 cases
+# something has gone wrong if this is the case
+doCatOrdAssertions <- function(pheno) {
+
+	# assert variable has only one column    
+    	if (!is.null(dim(pheno))) stop("More than one column for categorical ordered")
+
+		uniqVar = unique(na.omit(pheno));
+	
+		# assert more than 2 categories
+		if (length(uniqVar)<=1) stop("1 or zero values")
+		if (length(uniqVar)==2) stop("this variable is binary")
+
+		# assert each value has >= 10 examples
+		for (u in uniqVar) {
+     			withValIdx = which(pheno==u)
+        		numWithVal = length(withValIdx);
+		
+			if (numWithVal<10) stop("value with <10 examples")
+		}
+}
+
+# If data coding file does not specify an order then we use the default order as in coding defined by Biobank
+# and this function just generates a string with this order for logging purposes
+setOrderString <- function(orderStr, uniqVar) {
+
+	if (is.na(orderStr) || nchar(orderStr)==0) {
+
+		orderStr="";
+
+		# create order str by appending each value
+                uniqVarSorted = sort(uniqVar);
+                first=1;
+                for (i in uniqVarSorted) {
+                        if (first==0) {
+                                orderStr = paste(orderStr, "|",	sep="");
+                        }
+			if (i>=0) # ignore missing values
+                        	orderStr = paste(orderStr, i, sep="");
+				first=0;
+			end
+                }
+        }
+	return(orderStr);
+}
diff --git a/WAS/testCategoricalUnordered.r b/WAS/testCategoricalUnordered.r
new file mode 100644
index 0000000..eae9c73
--- /dev/null
+++ b/WAS/testCategoricalUnordered.r
@@ -0,0 +1,177 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Tests an unordered categorical phenotype with multinomial regression
+# and saves this result in the multinomial logistic results file
+testCategoricalUnordered <- function(varName, varType, thisdata) {
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+	#geno = thisdata[,"geno"]
+
+	numNotNA = length(which(!is.na(pheno)))
+	if (numNotNA<500) {
+		cat("CATUNORD-SKIP-500 (", numNotNA, ") || ",sep="");
+		incrementCounter("unordCat.500")
+	}
+	else {
+
+		# check there are not too many levels and skip if there are
+		numUnique = length(unique(na.omit(pheno)))
+
+		# num outcome values * (num confounders and trait of interest and bias term)
+		numWeights=(numUnique-1)*((numPreceedingCols-2)+1+1)
+		if (numWeights>1000) {
+			cat("Too many weights in model: ", numWeights, " > 1000, (num outcomes values: ", numUnique, ") || SKIP ", sep="")
+			incrementCounter("unordCat.cats")
+			return(NULL)
+		}
+
+		phenoFactor = chooseReferenceCategory(pheno);
+
+		if (opt$save == TRUE) {
+			# add pheno to dataframe
+			storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'catUnord')
+			cat("SUCCESS results-notordered-logistic ");
+	                incrementCounter("success.unordCat")
+		}
+                else {
+		
+
+		reference = levels(phenoFactor)[1];
+
+		sink()
+		sink(modelFitLogFile, append=TRUE) # hide output of model fitting
+		print("--------------")
+                print(varName)
+
+		require(nnet)
+		if (opt$standardise==TRUE) {
+                	geno = scale(thisdata[,"geno"])
+                }
+		else {
+                        geno = thisdata[,"geno"] 
+                }
+		#cat("genoMean=", mean(geno), " genoSD=", sd(geno), " || ", sep="")
+		
+		confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+
+		###### BEGIN TRYCATCH
+		tryCatch({
+
+		fit <- multinom(phenoFactor ~ geno + ., data=confounders, maxit=1000)
+
+		## baseline model with only confounders, to which we compare the model above
+		fitB <- multinom(phenoFactor ~ ., data=confounders, maxit=1000)
+
+		## compare model to baseline model
+		require(lmtest)
+		lres = lrtest(fit, fitB)
+		modelP = lres[2,"Pr(>Chisq)"];
+		
+		## save result to file
+		maxFreq = length(which(phenoFactor==reference));
+		numNotNA = length(which(!is.na(pheno)))
+	    	write(paste(paste(varName,"-",reference,sep=""), varType, paste(maxFreq,"/",numNotNA,sep=""), -999, -999, -999, modelP, sep=","), file=paste(opt$resDir,"results-multinomial-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE")
+
+		sink()
+		sink(resLogFile, append=TRUE)	
+		
+		sumx <- summary(fit)
+		
+		z <- sumx$coefficients/sumx$standard.errors
+		p = (1 - pnorm(abs(z), 0, 1))*2			
+
+		ci <- confint(fit, "geno", level=0.95)
+		ci = data.frame(ci)
+
+		## get result for each variable category
+		uniqVar = unique(na.omit(pheno))
+		for (u in uniqVar) {
+		
+			## no coef for baseline value, and values <0 are assumed to be missing
+			if (u == reference || u<0) {
+				next
+			}
+
+			pvalue = p[paste(eval(u),sep=""),"geno"]				
+			beta = sumx$coefficients[paste(eval(u),sep=""),"geno"]
+							
+			if (opt$confidenceintervals == TRUE) {
+				lower = ci[1, paste("X2.5...", u, sep="")]
+				upper =	ci[1, paste("X97.5...", u, sep="")]
+                	}
+                	else {
+                	      	lower = NA
+                	        upper = NA
+                	}
+
+			numThisValue = length(which(phenoFactor==u));
+
+			## save result to file
+			write(paste(paste(varName,"-",reference,"#",u,sep=""), varType, paste(maxFreq,"#",numThisValue,sep=""), beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-multinomial-logistic-",opt$varTypeArg,".txt",sep=""), append="TRUE")
+			
+		}
+
+		cat("SUCCESS results-notordered-logistic ");
+		incrementCounter("success.unordCat")
+
+		isExposure = getIsExposure(varName)
+                if (isExposure == TRUE) {
+                        incrementCounter("success.exposure.unordCat")
+                }
+		
+		## END TRYCATCH
+		}, error = function(e) {
+                        sink()
+                        sink(resLogFile, append=TRUE)
+                        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                	incrementCounter("unordCat.error")
+		})
+		}
+	}
+}
+
+# find reference category - category with most number of examples
+chooseReferenceCategory <- function(pheno) {
+
+	uniqVar = unique(na.omit(pheno));
+	phenoFactor = factor(pheno)
+
+	maxFreq=0;
+	maxFreqVar = "";
+	for (u in uniqVar) {
+		withValIdx = which(pheno==u)
+		numWithVal = length(withValIdx);
+		if (numWithVal>maxFreq) {
+			maxFreq = numWithVal;
+			maxFreqVar = u;
+		}
+	}
+
+	cat("reference: ", maxFreqVar,"=",maxFreq, " || ", sep="");
+		
+	## choose reference (category with largest frequency)
+	phenoFactor <- relevel(phenoFactor, ref = paste("",maxFreqVar,sep=""))
+	
+	return(phenoFactor);
+}
+
+
+
+
diff --git a/WAS/testContinuous.r b/WAS/testContinuous.r
new file mode 100644
index 0000000..1532eba
--- /dev/null
+++ b/WAS/testContinuous.r
@@ -0,0 +1,250 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Main function called for continuous fields
+testContinuous <- function(varName, varType, thisdata) {
+
+	cat("CONTINUOUS MAIN || ");	
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+
+	# reassign values
+        pheno = reassignValue(pheno, varName)
+
+	thisdata[,phenoStartIdx:ncol(thisdata)] = pheno
+
+	testContinuous2(varName, varType, thisdata)
+	
+}
+
+# Main code used to process continuous fields, or integer fields that have been reassigned as continuous because they have >20 distinct values.
+# This is needed because we have already reassigned values for integer fields, so do this in the function above for continuous fields.
+testContinuous2 <- function(varName, varType, thisdata) {
+	cat("CONTINUOUS || ");
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+	isExposure = getIsExposure(varName)
+
+	if (!is.null(dim(pheno))) {
+		phenoAvg = rowMeans(pheno, na.rm=TRUE)
+	}
+	else {
+		phenoAvg = pheno
+	}
+
+	## recode NaN to NA, which is generated if all cols of pheno are NA for a given person
+	idxNan = which(is.nan(phenoAvg))
+	phenoAvg[idxNan] = NA;
+	numNotNA=length(na.omit(phenoAvg));
+
+	## check whether >20% examples with same value
+	uniqVar = unique(na.omit(phenoAvg))
+	valid = TRUE
+	for (uniq in uniqVar) {
+		numWithValue = length(which(phenoAvg==uniq))
+		if (numWithValue/numNotNA >=0.2) {
+			valid = FALSE;
+			break
+		}
+	}
+
+	if (valid == FALSE) {
+
+		## treat as ordinal categorical
+		cat(">20% IN ONE CATEGORY || ");
+		
+		# if >2 unique values then treat as ordered categorical
+		numUniqueValues = length(uniqVar)
+
+		# straight forward case that there are two (or one) values		
+		if (numUniqueValues<=2) {
+			## treat as binary or skip (binary requires>=10 per category)
+
+			## remove categories if < 10 examples to see if this should be binary or not, but if ordered categorical
+			## then we include all values when generating this
+	    		phenoAvgMoreThan10 = testNumExamples(phenoAvg)
+
+			## binary if 2 distinct values, else ordered categorical
+        		phenoFactor = factor(phenoAvgMoreThan10)
+        		numLevels = length(unique(na.omit(phenoAvgMoreThan10))) #length(levels(phenoFactor))
+
+        		if (numLevels<=1) {
+       				cat("SKIP (number of levels: ",numLevels,")",sep="")
+				incrementCounter("cont.onevalue")
+        		}
+        		else if (numLevels==2) {
+	        		# binary
+				incrementCounter("cont.binary")
+        			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);			
+        			binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+        		}
+        	}
+		else {
+			## try to treat as ordered categorical
+
+			incrementCounter("cont.ordcattry")
+			## equal sized bins
+			phenoBinned = equalSizedBins(phenoAvg);
+
+			# check number of people in each bin
+			bin0Num = length(which(phenoBinned==0))
+			bin1Num = length(which(phenoBinned==1))
+			bin2Num = length(which(phenoBinned==2))
+
+			if (bin0Num>=10 & bin1Num>=10 & bin2Num>=10) {
+
+				# successful binning. >=10 examples in each of the 3 bins
+
+				incrementCounter("cont.ordcattry.ordcat")
+			        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned);
+				testCategoricalOrdered(varName, varType, thisdatanew);
+			}
+			else {
+				# try to treat as binary because not enough examples in each bin
+								
+				if (bin0Num<10 & bin2Num<10) {
+					## skip - not possible to create binary variable because first and third bins are too small
+					## ie. could merge bin1 with bin 2 but then bin3 still too small etc
+					cat("SKIP 2 bins are too small || ")
+	                                incrementCounter("cont.ordcattry.smallbins")
+				} 
+				else if ((bin0Num<10 | bin1Num<10) & (bin0Num+bin1Num)>=10) {
+
+					# combine first and second bin to create binary variable
+					incrementCounter("cont.ordcattry.binsbinary")
+					cat("Combine first two bins and treat as binary || ")
+					phenoBinned[which(phenoBinned==0)] = 1	
+
+					# test binary
+					thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
+	                                binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+				}
+				else if ((bin2Num<10 | bin1Num<10) & (bin2Num+bin1Num)>=10) {
+
+					# combine second and last bin to create binary variable
+					incrementCounter("cont.ordcattry.binsbinary")
+					cat("Combine last two bins and treat as binary || ")
+                                        phenoBinned[which(phenoBinned==2)] = 1
+					
+                                        # test binary
+                                        thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoBinned)
+                                        binaryLogisticRegression(varName, varType, thisdatanew, isExposure)
+                                }
+				
+
+				else {
+					## skip - not possible to create binary variable because combining bins would still be too small
+					cat("SKIP 2 bins are too small(2) || ")
+                                        incrementCounter("cont.ordcattry.smallbins2")
+				}
+
+			}
+	
+		}
+	}
+	else {		
+		cat("IRNT || ");
+		incrementCounter("cont.main")
+
+		# check there are at least 500 examples
+		numNotNA = length(which(!is.na(phenoAvg)))
+		if (numNotNA<500) {
+			cat("CONTINUOUS-SKIP-500 (", numNotNA, ") || ",sep="");
+			incrementCounter("cont.main.500")
+		}
+		else {
+			## inverse rank normal transformation
+			phenoIRNT = irnt(phenoAvg)
+
+			if (opt$save == TRUE) {
+				# add pheno to dataframe
+				storeNewVar(thisdata[,"userID"], phenoIRNT, varName, 'cont')
+				cat("SUCCESS results-linear");
+	                        incrementCounter("success.continuous")
+                        }
+                        else {
+		
+			## do regression (use standardised geno values)
+			if (opt$standardise==TRUE) {
+				geno = scale(thisdata[,"geno"])
+			}
+			else {
+                	        geno = thisdata[,"geno"] 
+                	}
+			confounders=thisdata[,3:numPreceedingCols, drop = FALSE]
+
+			sink()
+			sink(modelFitLogFile, append=TRUE)
+			print("--------------")			
+			print(varName)
+
+			###### BEGIN TRYCATCH
+	                tryCatch({
+
+			fit <- lm(phenoIRNT ~ geno + ., data=confounders)
+			
+			sink()
+			sink(resLogFile, append=TRUE)
+			
+			sumx = summary(fit)
+
+			pvalue = sumx$coefficients['geno','Pr(>|t|)']
+			beta = sumx$coefficients["geno","Estimate"]
+
+			if (opt$confidenceintervals == TRUE) {
+				cis = confint(fit, level=0.95)
+				lower = cis["geno", "2.5 %"]
+	                        upper = cis["geno", "97.5 %"]
+                        }
+                        else {
+                                lower = NA
+                                upper = NA
+                        }
+
+			numNotNA = length(which(!is.na(phenoIRNT)))
+
+			## save result to file
+			write(paste(varName, varType, numNotNA, beta, lower, upper, pvalue, sep=","), file=paste(opt$resDir,"results-linear-",opt$varTypeArg,".txt", sep=""), append="TRUE");
+			cat("SUCCESS results-linear");
+
+			incrementCounter("success.continuous")
+                	if (isExposure == TRUE) {
+                	        incrementCounter("success.exposure.continuous")
+                	}
+
+			## END TRYCATCH
+                	}, error = function(e) {
+                	        sink()
+                	        sink(resLogFile, append=TRUE)
+                	        cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
+                	        incrementCounter("continuous.error")
+                	})
+			}			
+		}
+	}
+}
+
+irnt <- function(pheno) {
+	set.seed(1234)
+	numPhenos = length(which(!is.na(pheno)))
+	quantilePheno = (rank(pheno, na.last="keep", ties.method="random")-0.5)/numPhenos
+	phenoIRNT = qnorm(quantilePheno)	
+	return(phenoIRNT);
+}
+
diff --git a/WAS/testInteger.r b/WAS/testInteger.r
new file mode 100644
index 0000000..9f87511
--- /dev/null
+++ b/WAS/testInteger.r
@@ -0,0 +1,88 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Processing integer fields, namely:
+# 1) Reassigning values as specified in the data code information file
+# 2) Generate a single value if there are several values (arrays) by taking the mean
+# 3) Treating this field as continuous if at least 20 distinct values.
+# Otherwise treat as binary or ordered categorical if 2 or more than two values. 
+testInteger <- function(varName, varType, thisdata) {
+	cat("INTEGER || ");
+
+	pheno = thisdata[,phenoStartIdx:ncol(thisdata)]
+	isExposure = getIsExposure(varName)
+
+	if (!is.numeric(as.matrix(pheno))) {
+		cat("SKIP Integer type but not numeric",sep="");
+		return(NULL)
+	}
+
+	pheno = reassignValue(pheno, varName)
+
+	## average if multiple columns
+	if (!is.null(dim(pheno))) {
+                phenoAvg = rowMeans(pheno, na.rm=TRUE)
+		
+		# if participant only has NA values then NaN is generated so we convert back to NA
+		phenoAvg = replaceNaN(phenoAvg)
+        }
+	else {
+                phenoAvg = pheno
+        }
+
+	uniqVar = unique(na.omit(phenoAvg))
+
+	# if >=20 separate values then treat as continuous
+	if (length(uniqVar)>=20) {
+		
+		thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoAvg);
+		testContinuous2(varName, varType, thisdatanew)
+		incrementCounter("int.continuous")
+	}
+	else {
+		
+		## remove categories if < 10 examples
+	    	phenoAvg = testNumExamples(phenoAvg)
+	    
+		## binary if 2 distinct values, else ordered categorical
+		phenoFactor = factor(phenoAvg)
+		numLevels = length(levels(phenoFactor))
+		if (numLevels<=1) {
+			cat("SKIP (number of levels: ",numLevels,")",sep="");
+			incrementCounter("int.onevalue")
+		}
+		else if (numLevels==2) {
+			incrementCounter("int.binary")
+
+			# binary
+			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
+			binaryLogisticRegression(varName, varType, thisdatanew, isExposure);
+		}
+		else {
+			incrementCounter("int.catord")
+			cat("3-20 values || ")
+
+			# we don't use equal sized bins just the original integers (that have >=10 examples) as categories
+			thisdatanew = cbind.data.frame(thisdata[,1:numPreceedingCols], phenoFactor);
+
+			# treat as ordinal categorical
+			testCategoricalOrdered(varName, varType, thisdatanew);
+		}
+	}
+}
diff --git a/WAS/testNumExamples.r b/WAS/testNumExamples.r
new file mode 100644
index 0000000..37cc992
--- /dev/null
+++ b/WAS/testNumExamples.r
@@ -0,0 +1,37 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Remove variable values if less than 10 examples have this value
+testNumExamples <- function(pheno) {
+	
+	## loop through values and remove if has < 10 examples
+        uniqVar = unique(na.omit(pheno));
+        for (u in uniqVar) {
+                withValIdx = which(pheno==u)
+                numWithVal = length(withValIdx);
+                if (numWithVal<10) {
+                        pheno[withValIdx]=NA
+			cat(paste("Removed ",u ,": ", numWithVal, "<10 examples || ", sep=""));
+                }
+		else {
+			cat(paste("Inc(>=10): ", u, "(", numWithVal, ") || ", sep=""));
+		}
+        }
+	return(pheno);
+}
diff --git a/unittests/run-tests.sh b/WAS/unittests/run-tests.sh
similarity index 100%
rename from unittests/run-tests.sh
rename to WAS/unittests/run-tests.sh
diff --git a/unittests/test_equalSizedBins.r b/WAS/unittests/test_equalSizedBins.r
similarity index 100%
rename from unittests/test_equalSizedBins.r
rename to WAS/unittests/test_equalSizedBins.r
diff --git a/unittests/test_reassignValue.r b/WAS/unittests/test_reassignValue.r
similarity index 100%
rename from unittests/test_reassignValue.r
rename to WAS/unittests/test_reassignValue.r
diff --git a/unittests/test_testCatMultiple.r b/WAS/unittests/test_testCatMultiple.r
similarity index 100%
rename from unittests/test_testCatMultiple.r
rename to WAS/unittests/test_testCatMultiple.r
diff --git a/unittests/test_testCatSingle.r b/WAS/unittests/test_testCatSingle.r
similarity index 100%
rename from unittests/test_testCatSingle.r
rename to WAS/unittests/test_testCatSingle.r
diff --git a/WAS/validatePhenotypeInput.r b/WAS/validatePhenotypeInput.r
new file mode 100644
index 0000000..9b95f71
--- /dev/null
+++ b/WAS/validatePhenotypeInput.r
@@ -0,0 +1,85 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Validate the contents of the phenotype file
+validatePhenotypeInput <- function() {
+
+	print("Validating phenotype data ...")
+
+	## get just first row so we can check the column names
+	phenoIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+	
+	###
+	### pheno file validation
+
+	print(paste("Number of columns in phenotype file: ", ncol(phenoIn),sep=""))
+
+	## check user id exists in pheno file
+	idx1 = which(names(phenoIn) == opt$userId);
+	if (length(idx1)==0) {
+                stop(paste("phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+        }
+
+	# we only need the confounders if we are actually running the tests
+	if (opt$save==FALSE & is.null(opt$confounderfile)) {
+
+	## confounder variables exist in pheno file
+	idx = which(names(phenoIn) == "x21022_0_0");
+	if (length(idx)==0) {
+                stop("phenotype file doesn't contain required age colunn: x21022_0_0", call.=FALSE)
+        }
+
+	idx = which(names(phenoIn) == "x31_0_0");
+        if (length(idx)==0) {
+                stop("phenotype file doesn't contain required sex colunn: x31_0_0", call.=FALSE)
+        }
+
+	
+	if (opt$genetic ==TRUE) {
+		idx = which(names(phenoIn) == "x22000_0_0");
+        	if (length(idx)==0) {
+        	        stop("phenotype file doesn't contain required genetic batch colunn: x22000_0_0", call.=FALSE)
+        	}	
+	}
+
+	## if running with sensitivity option then check extra columns exist in pheno file (genetic PCs and assessment centre)
+	if (opt$sensitivity==TRUE) {
+
+		if (opt$genetic ==TRUE) {
+			## check first 10 genetic PCs exist
+			for (i in 1:10) {
+				idx = which(names(phenoIn) == paste("x22009_0_", i, sep=""));
+				if (length(idx)==0) {
+         			       stop(paste("phenotype file doesn't contain required genetic principal component colunn: x22009_0_", i, sep=""), call.=FALSE)
+        			}
+			}
+		}
+
+		## assessment centre field
+		idx = which(names(phenoIn) == "x54_0_0");
+        	if (length(idx)==0) {
+        	        stop("phenotype file doesn't contain required assessment centre colunn: x54_0_0", call.=FALSE)
+        	}
+	}
+
+	}
+
+	print("Phenotype file validated")
+
+}
diff --git a/WAS/validateTraitInput.r b/WAS/validateTraitInput.r
new file mode 100644
index 0000000..16a35f4
--- /dev/null
+++ b/WAS/validateTraitInput.r
@@ -0,0 +1,69 @@
+# The MIT License (MIT)
+# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without
+# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+# Validate the contents of the trait of interest file
+validateTraitInput <- function(snpIn) {
+
+if (opt$save!=TRUE) {
+
+	print("Validating trait of interest data ...")
+
+	###
+	### get header of trait of interest file or pheno file (if no trait of interest file is specified
+
+	if (is.null(opt$traitofinterestfile)) {
+		snpIn = read.table(opt$phenofile, header=1, nrows=1, sep=',')
+	}
+	else {
+		snpIn = read.table(opt$traitofinterestfile, header=1, nrows=1, sep=',')	
+	}
+
+	###
+	### trait of interest file validation
+
+	print(paste("Number of columns in trait of interest file:", ncol(snpIn),sep=""))
+
+	## check user id exists in snp file
+	idx1 = which(names(snpIn) == opt$userId);
+	if (length(idx1)==0) {
+		if (is.null(opt$traitofinterestfile)) {
+			stop(paste("Phenotype file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+		} else {
+			stop(paste("Trait of interest file doesn't contain userID colunn:", opt$userId), call.=FALSE)
+		}
+
+	}
+	
+	## check trait of interest exists in trait of interest file
+	idx2 = which(names(snpIn) == opt$traitofinterest);
+	if (length(idx2)==0) {
+
+		if (is.null(opt$traitofinterestfile)) {
+			stop(paste("No trait of interest file specified, and phenotypes file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+		}
+		else {
+			stop(paste("Trait of interest file doesn't contain trait of interest variable column:", opt$traitofinterest), call.=FALSE)
+		}
+	}
+
+	print("Trait of interest file validated")
+
+}
+
+}