-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathairport_data_load.R
More file actions
119 lines (105 loc) · 5.47 KB
/
airport_data_load.R
File metadata and controls
119 lines (105 loc) · 5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
library(dataRetrieval)
library(USGSwsBase)
siteNumber <- '040871475'
ParameterCd <- '00060'
StartDate <- '1997-10-01'
EndDate <- '2012-10-01'
# Run hourly_daily_checks.R
# Run disch_iceaffect_omit.R or disch_iceaffect_corr.R
# Run data_merge_daily.R
# DTMaxCols <- na.omit(data_sub)
# DTMaxRows <- data_sub[,colSums(is.na(data_sub)) <= nrow(data_sub)*0.5]
# DTMaxRows <- na.omit(DTMaxRows)
#
# #List columns removed to maximize rows:
# names(data_sub)[!(names(DTMaxCols) %in% names(data_sub))]
# names(data_sub)[!(names(DTMaxRows) %in% names(data_sub))]
# setdiff(data_sub$num,DTMaxRows$num)
# setdiff(data_sub$num,DTMaxCols$num)
# #Choose which to use: DTMaxCols or DTMaxRows:
# DT <- data_sub[,c("TPLoad",names(DTMaxCols))]
# data_sub <- na.omit(DT)
#
# DT <- data_sub[,c("TPLoad",names(DTMaxRows))]
# data_sub <- na.omit(DT)
# set necessary site information and inputs to step-wise regression
library(GSqwsr)
#BODload
data_sub <- data_sub[which(!is.na(data_sub$BODload)),]
data_sub <- data_sub[which(!is.na(data_sub$kgGlycol)),]
data_sub_cens <- importQW(data_sub,c("Qmax","Eduration","mean_temp","max_temp","min_temp","prcp_sum","kgGlycol"),"BODload","remark","",0.005,"User","tons","Unk","","00310","BODLoading")
siteName <- "Outfall"
siteNo <- '040871475'
siteINFO <- getSiteFileData(siteNo, interactive=FALSE)
investigateResponse <- "BODLoading"
#CODload
data_sub <- data_sub[which(!is.na(data_sub$CODload)),]
data_sub <- data_sub[which(!is.na(data_sub$kgGlycol)),]
data_sub_cens <- importQW(data_sub,c("Qmax","Eduration","mean_temp","max_temp","min_temp","prcp_sum","dwpt_mean","snowdp_sum","mmprcp_deice","kgGlycol"),"CODload","remark","",0.005,"User","tons","Unk","","00310","CODLoading")
siteName <- "Outfall_mmprcp_deice"
siteNo <- '040871475'
siteINFO <- getSiteFileData(siteNo, interactive=FALSE)
investigateResponse <- "CODLoading"
#PGload
data_sub <- data_sub[which(!is.na(data_sub$PGload)),]
data_sub <- data_sub[which(!is.na(data_sub$kgGlycol)),]
data_sub_cens <- importQW(data_sub,c("Qmax","Eduration","mean_temp","max_temp","min_temp","prcp_sum","dwpt_mean","snowdp_sum","mmprcp_deice","kgGlycol"),"PGload","remark","",0.005,"User","tons","Unk","","00310","PGLoading")
siteName <- "Outfall_mmprcp_deice"
siteNo <- '040871475'
siteINFO <- getSiteFileData(siteNo, interactive=FALSE)
investigateResponse <- "PGLoading"
#EGload
data_sub <- data_sub[which(!is.na(data_sub$EGload)),]
data_sub <- data_sub[which(!is.na(data_sub$kgGlycol)),]
data_sub_cens <- importQW(data_sub,c("Qmax","Eduration","mean_temp","max_temp","min_temp","prcp_sum","dwpt_mean","snowdp_sum","mmprcp_deice","kgGlycol"),"EGload","remark","",0.005,"User","tons","Unk","","00310","EGLoading")
siteName <- "Outfall_mmprcp_deice"
siteNo <- '040871475'
siteINFO <- getSiteFileData(siteNo, interactive=FALSE)
investigateResponse <- "EGLoading"
#EGPGload
data_sub <- data_sub[which(!is.na(data_sub$EGPGload)),]
data_sub <- data_sub[which(!is.na(data_sub$kgGlycol)),]
data_sub_cens <- importQW(data_sub,c("Qmax","Eduration","mean_temp","max_temp","min_temp","prcp_sum","dwpt_mean","snowdp_sum","mmprcp_deice","kgGlycol"),"EGPGload","remark","",0.005,"User","tons","Unk","","00310","EGPGLoading")
siteName <- "Outfall_mmprcp_deice"
siteNo <- '040871475'
siteINFO <- getSiteFileData(siteNo, interactive=FALSE)
investigateResponse <- "EGPGLoading"
# choose 'normal' or 'lognormal' distribution for data
transformResponse <- "lognormal"
pathToSave <- paste("/Users/jlthomps/Documents/R/GMIA_hourly/",siteName,sep="")
##########################################################
# Preliminary Assessment Plots:
# pdf(paste(pathToSave,"/InitialQQGraphs",investigateResponse,".pdf",sep=""))
pdf(paste(pathToSave,"/",investigateResponse,"_InitialQQGraphs.pdf",sep=""))
plotQQTransforms(data_sub_cens,investigateResponse)
predictVariableScatterPlots(data_sub_cens,investigateResponse)
dev.off()
##########################################################
#################################################################################################
#Kitchen sink:
predictVariables <- names(data_sub_cens)[-which(names(data_sub_cens) %in% investigateResponse)]
predictVariables <- predictVariables[which(predictVariables != "datetime")]
predictVariables <- predictVariables[which(predictVariables != "decYear")]
kitchenSink <- createFullFormula(data_sub_cens,investigateResponse)
returnPrelim <- prelimModelDev(data_sub_cens,investigateResponse,kitchenSink,
"BIC", #Other option is "AIC"
transformResponse)
steps <- returnPrelim$steps
modelResult <- returnPrelim$modelStuff
modelReturn <- returnPrelim$DT.mod
colnames(steps) <- c("step","BIC","Deviance","Resid.Dev","Resid.Df","Correlation","Slope","RMSE","PRESS","scope","response")
#Save plotSteps to file:
source("/Users/jlthomps/Desktop/git/GLRIBMPs/plotStepsGLRI.R")
source("/Users/jlthomps/Desktop/git/GLRIBMPs/analyzeStepsGLRI.R")
pdf(paste(pathToSave,"/",investigateResponse,"_plotSteps.pdf",sep=""))
plotStepsGLRI(steps,data_sub_cens,transformResponse)
dev.off()
pdf(paste(pathToSave,"/",investigateResponse,"_analyzeSteps.pdf",sep=""))
analyzeStepsGLRI(steps, investigateResponse,siteINFO, xCorner = 0.01)
dev.off()
#################################################################################################
##########################################################
#Save steps to file:
fileToSave <- paste(pathToSave,"/",investigateResponse,"_steps.csv",sep="")
write.table(steps, fileToSave, row.names=FALSE, sep=",")
##########################################################