forked from chrisclark/PythonForDataScience
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmakeSubmission.py
More file actions
20 lines (16 loc) · 799 Bytes
/
makeSubmission.py
File metadata and controls
20 lines (16 loc) · 799 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from sklearn.ensemble import RandomForestClassifier
from numpy import genfromtxt, savetxt
def main():
#create the training & test sets, skipping the header row with [1:]
dataset = genfromtxt(open('Data/train.csv','r'), delimiter=',', dtype='f8')[1:]
target = [x[0] for x in dataset]
train = [x[1:] for x in dataset]
test = genfromtxt(open('Data/test.csv','r'), delimiter=',', dtype='f8')[1:]
#create and train the random forest
#multi-core CPUs can use: rf = RandomForestClassifier(n_estimators=100, n_jobs=2)
rf = RandomForestClassifier(n_estimators=100)
rf.fit(train, target)
predicted_probs = [x[1] for x in rf.predict_proba(test)]
savetxt('Data/submission.csv', predicted_probs, delimiter=',', fmt='%f')
if __name__=="__main__":
main()