-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
45 lines (36 loc) · 1.12 KB
/
data.py
File metadata and controls
45 lines (36 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import csv, math, random
import numpy as np
random.seed(42)
def load(dataset):
X = np.load('data/'+dataset+'.npy')
if dataset == 'train':
f = file('data/'+dataset+'.txt')
r = csv.reader(f)
y = np.array([float(row[0]) for row in r], np.float32)
return X, y
return X
def split(X, y):
''' Set aside 20% of the data for training.
Data are partitioned into contiguous subregions
to reduce validation overfitting.'''
while True:
s = [[e, e+408] for e in random.sample(range(len(X)),10)]
s.sort(key=lambda x: x[0])
if s[-1][1] >= 20400: # Check boundary condition
continue
if all([s[i][1] < s[i+1][0] for i in range(len(s)-1)]): # Check non-overlapping condition
break
else: continue
break
val_inds = sum([range(e[0],e[1]) for e in s],[])
train_inds = [e for e in range(len(X)) if e not in val_inds]
return X[train_inds], X[val_inds], y[train_inds], y[val_inds]
def stack(X, y=None):
# Turn X from (samples, length, width, 3)
# to (samples-1, length, width, 6)
X_new = np.concatenate((X[1:],X[:-1]),3)
# Remove first element of y
if y is not None:
y_new = y[1:]
return X_new, y_new
return X_new