-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomplete.R
More file actions
73 lines (59 loc) · 2.25 KB
/
complete.R
File metadata and controls
73 lines (59 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Date validation
IsDate <- function(mydate, date.format = "%Y-%m-%d") {
# Check if field is a date using as.Date that looks for unambiguous dates
# Assumes date format so NA returned not Character error.
# Why? with no date format, R tries two defaults then gives error.
# BUT With a dateformat R returns NA
# Args
# Suspected date and optional date format string
# Returns
# TRUE if thinbks it is a date
tryCatch(!is.na(as.Date(mydate, date.format)),
error = function(err) {FALSE})
}
# the number of completely observed cases in each data file
complete <- function(directory, id = 1:332) {
# directory: name of directory containing readings files
# id: id range of files. Files are named id.csv, with id zero filled to
# 3 positions, ie. 001.csv, 020,csv, 134.csv
# readings files are assumed to be csvs with columns:
# Date "YYYY-MM-DD"
# sulfate: empty (NA) or float
# nitrate: empty (NA) or float
# id: id of this sensor. 1-332 in test data set
resultFrame <- NULL
# loop through the ids
for (currentId in id) {
# Generate the file name for the id
if (currentId < 1 || currentId > 332) {
print(paste("Invalid id:", currentId))
break
} else if (currentId < 10) {
currentIdString <- paste0(c("00", as.character(currentId)), collapse='')
} else if (currentId < 100) {
currentIdString <- paste0(c("0", as.character(currentId)), collapse='')
} else {
currentIdString <- as.character(currentId)
}
# create the full file name and path. check existence
fileName <- paste0(c(directory, "/", currentIdString, ".csv"), collapse='')
if (!file.exists(fileName)) {
print(paste("File does not exist: ", fileName))
break
}
# load the file into a frame
frame <- read.csv(fileName)
# count the valid readings in the file
readings <- nrow(
subset(frame, IsDate(Date) &
!is.na(nitrate) &
!is.na(sulfate) &
ID == currentId))
if (is.null(resultFrame)) {
resultFrame <- data.frame(id = currentId, nobs = readings)
} else {
resultFrame <- rbind(resultFrame, c(currentId, readings))
}
}
resultFrame
}