-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpredict.R
More file actions
92 lines (74 loc) · 2.45 KB
/
predict.R
File metadata and controls
92 lines (74 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# The score command in general will allow the user to score their own
# data. For some models, this might mean they can apply a colourising
# model to their own black and white photos, for example.
#
# For this mode we will interactively request some data and report the
# prediction of rain tomorrow.
#
# TODO An extension to this command will allow a CSV file containing
# observations to be scored, returning the prediciton of rain for each
# observation.
# Overall: Load model, interactively request data, predict.
suppressMessages(
{
library(mlhub)
library(rpart)
library(randomForest) # Model: randomForest() na.roughfix() for missing data.
library(magrittr)
library(dplyr)
library(tidyr)
library(rattle)
})
dsname <- "weatherAUS"
ds <- get(dsname)
nobs <- nrow(ds)
vnames <- names(ds)
names(ds) %<>% normVarNames()
names(vnames) <- names(ds)
vars <- names(ds)
target <- "rain_tomorrow"
vars <- c(target, vars) %>% unique() %>% rev()
for (v in which(sapply(ds, is.factor))) levels(ds[[v]]) %<>% normVarNames()
risk <- "risk_mm"
id <- c("date", "location")
ignore <- c(risk, id)
vars <- setdiff(vars, ignore)
inputs <- setdiff(vars, target)
form <- formula(ds[rev(vars)])
ds[vars] <- na.roughfix(ds[vars])
mfile <- "rain_dt_model.RData"
if (! file.exists(mfile))
{
cat("The model was not found. Be sure to run the demo first.\n")
quit(save="no", 1, FALSE)
}else{
load(mfile)
mlcat("Provide values for the following variables", end="")
# The following code based on rpart::printcp()
# Copyright (c) Brian Ripley, GPLv2 License.
frame <- model$frame
leaves <- frame$var == "<leaf>"
used <- unique(frame$var[!leaves])
unused <- setdiff(names(ds), used)
val <- vector()
for (i in seq_len(length(used)))
{
v <- as.character(used[i])
cl <- class(ds[[v]])
if (cl %in% c("numeric", "integer"))
{
cl <- sprintf("numeric %4.1f - %4.1f", min(ds[[v]]), max(ds[[v]]))
asis <- "as.numeric"
}
cat(sprintf("%-15s [%s]: ", v, cl))
entry <- scan("stdin", 0, n=1, quiet=TRUE)
val <- c(val, eval(parse(text=sprintf("%s(%s)", asis, entry))))
}
newdata <- ds[1,]
usedi <- sapply(used, function(x) which(x == names(ds)))
newdata[1,usedi] <- as.list(val)
unusedi <- sapply(unused, function(x) which(x == names(ds)))
newdata[1,unusedi] <- NA
pr <- predict(model, newdata=newdata)[,"yes"]
cat(sprintf("\nI predict the chance of rain tomorrow to be %2.0f%%.\n\n", 100*pr))
}