-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathadvanced.py
More file actions
213 lines (150 loc) · 6.86 KB
/
Copy pathadvanced.py
File metadata and controls
213 lines (150 loc) · 6.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import numpy
import keras
from matplotlib import pyplot as plt
import csv
from sklearn.preprocessing import MinMaxScaler
import os
import time
'''
Below are 3 variables that the file path to important folders
The basicSRC hold the complete file path for the basic folder
The staticSRC holds the complete file path to the images folder
The advancedSRC holds the complete file path for the advanced folder
This makes the rest of the code easier to view and understand withouth having to have long file paths everywhere
'''
global basicSRC, staticSRC
basicSRC = "/var/www/html/StockPredictor/basic/"
advancedSRC = "/var/www/html/StockPredictor/advanced/"
staticSRC = "/var/www/html/StockPredictor/static/img/"
def validateCSVData(processedData,minDataTrue, minData, predictionType=None):
'''
This function checks the csv has valid data
returns 0 for valid data
returns 1 for not integers
returns 2 for no data
returns 3 for not enough data
'''
valid = 0
error="No error, data is valid"
for i in processedData:
try: # try converting to int
int(i)
except:
print(i)
valid = 1
error= "Data contains non integers"
if(minDataTrue==True):
if(len(processedData)==0):
valid = 2
if(predictionType=="basic"):
error = "Stock doesn't exist"
else:
error = "No data"
elif(len(processedData)<int(minData)):
valid = 3
error = "Not enough data"
return valid, error
def split(sequence, inputSize , outputSize):
iterations= len(sequence) - (inputSize + outputSize) + 1 #this calculates the number of possible samples
X, Y = list(), list() #creates 2 blank lists to hold this data
for k in range(iterations): #loops for all the batches
X.append(sequence[k:k+inputSize]) #appends the input batches
Y.append(sequence[k+inputSize:k+inputSize+outputSize]) #appends the output batches
return numpy.array(X), numpy.array(Y)
def loadCSV(location, column ):
#This functions loads CSV data into array
rawData=[] #Assign new blank array
with open(location) as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV: #iterate for every row in CSV
rawData.append(float(row[column].replace(",", ""))) #often data sets use commas to make the data more presentable. Eg 10000 becomes 10,000. This undoes this
return rawData
sc = MinMaxScaler(feature_range = (0, 1)) #defines a new scaling function
'''
the code belows loads each lines from the parameters file to a list
it then assigns these lines and parameters to variables
'''
f = open(advancedSRC + "Parameters.txt", 'r+')
parameters = [line for line in f.readlines()]
f.close()
inputSize = int(parameters[1])
activationFunction = str(parameters[2])
outputSize = int(parameters[3])
lossFunction = parameters[4]
epochs = int(parameters[5])
stackedLayers = parameters[6]
'''
the code belows loads the data
'''
trainingData = loadCSV(advancedSRC+"PredictionData.csv",0)
predictionData = loadCSV(advancedSRC+"TrainingData.csv",0)
'''
the code below creates a graph of past stock data
it removes all other files starting with "advancedPast" and then creates a new png file
the file has a timestamp at the end
this prevents browsers from caching the images as they are all unique
'''
fig = plt.figure()
plt.plot( trainingData[-inputSize:] , "-x", color='red') #this plots the previous stock values in red
plt.xlabel("Day") #provides the label for the X axis
plt.ylabel("Value") #provides the label for the Y axis
pastNewName = "advancedPast" + str(time.time()) + ".png"
for filename in os.listdir(str(staticSRC)):
if filename.startswith('advancedPast'): # not to remove other images
os.remove(str(staticSRC) + filename)
plt.savefig(staticSRC + pastNewName) #this saves the generated graph
plt.close(fig)
scaledArray = numpy.array(trainingData) #creates a numpy array and loads the trainingData intop it
scaledArray = scaledArray .reshape(-1,1) #reshapes
scaledArray = sc.fit_transform(scaledArray ) #scales data
newScaled=[]
for x in scaledArray : #sometimes, after scalling data can include 1,0 or nan after scalling. These must be removed
if( int(x[0])!=1 or int(x[0])!=0 or str(x[0])!="nan"):
newScaled.append(float(x[0]))
X, Y = split(newScaled, inputSize , outputSize) #splits the data into batches
X = X.reshape((X.shape[0], X.shape[1], 1)) #reshapes
model = keras.models.Sequential()
model.add(keras.layers.LSTM(256, activation=activationFunction[0:-1], return_sequences=stackedLayers, input_shape=(inputSize , 1)))
model.add(keras.layers.LSTM(128, activation=activationFunction[0:-1], return_sequences=stackedLayers ))
model.add(keras.layers.LSTM(64, activation=activationFunction[0:-1],))
model.add(keras.layers.Dense(outputSize))
model.summary()
model.compile(optimizer='adam', loss=lossFunction[0:-1])
data = model.fit(X, Y, epochs=epochs, verbose=1)
scaledArray = numpy.array(predictionData) #creates a numpy array
scaledArray = scaledArray .reshape(-1,1) #reshapes
scaledArray = sc.fit_transform(scaledArray ) #scales data
newScaled=[]
for x in scaledArray : #sometimes, after scalling data can include 1,0 or nan after scalling. These must be removed
if( int(x[0])!=1 or int(x[0])!=0 or str(x[0])!="nan"):
newScaled.append(float(x[0]))
xNew = numpy.array(newScaled[-inputSize:]) #get 60 recent days
xNew = xNew.reshape((1, inputSize , 1))
yNew = model.predict(xNew, verbose=1)
unscaledY = sc.inverse_transform(yNew) #this unscales the data
yNew = yNew[0] #converts the 2d array back to 1d
unscaledY=unscaledY[0] #converts the 2d array back to 1d
link = trainingData[-1:] #this takes the 4 most recent
xLink =[3]
for i in range (len(unscaledY)):
xLink.append(i+4)
link.append(unscaledY[i]) #adds the first predicted value. This makes the graph connect up
fig = plt.figure()
plt.plot( [0,1,2,3], trainingData[-4:] , "-x", color='red') #this plots the previous stock values in red
plt.plot( xLink, link , "-x", color='blue') # this plots the predicted stock values in blue
plt.xlabel("Day") #provides the label for the X axis
plt.ylabel("Value") #provides the label for the Y axis
pastNewName = "advancedPrediction" + str(time.time()) + ".png"
for filename in os.listdir(str(staticSRC)):
if filename.startswith('advancedPrediction'): # not to remove other images
os.remove(str(staticSRC) + filename)
plt.savefig(staticSRC + pastNewName) #this saves the generated graph
plt.close(fig)
f = open(advancedSRC + "progress.txt", "w")
f.write("Complete") #write to the progress file so the main prorgram knows that training and predictions have finished
f.close()
f = open(str(staticSRC + 'data.csv'), "w") #write to the data csv. write the future predicted values.
for row in unscaledY:
f.writelines(str(row))
f.writelines("\n")
f.close()