-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess.py
More file actions
64 lines (55 loc) · 1.62 KB
/
preprocess.py
File metadata and controls
64 lines (55 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas, scipy, numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Binarizer
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.preprocessing import add_dummy_feature
df=pandas.read_csv( 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv ',sep=';')
array=df.values
#Separating data into input and output components
x=array[:,0:8]
y=array[:,8]
print("Min-max scaling")
print("Before min-max scaling")
print(x[0:5,:])
scaler=MinMaxScaler(feature_range=(0,1))
rescaledX=scaler.fit_transform(x)
numpy.set_printoptions(precision=3) #Setting precision for the output
print("After min-max scaling")
print(rescaledX[0:5,:])
print(" ")
print("Binarizing")
X = [[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]]
print("Original data")
print(X)
transformer = Binarizer().fit(X)
print("After Binarizing")
print(transformer.transform(X))
print(" ")
print("Standardizing data")
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
print("original data")
print(data)
scaler = StandardScaler()
scaler.fit(data)
print("Mean of the data")
print(scaler.mean_)
print("Standardized data")
print(scaler.transform(data))
print(" ")
le = preprocessing.LabelEncoder()
print("Labels:")
print(["paris", "paris", "tokyo", "amsterdam"])
le.fit(["paris", "paris", "tokyo", "amsterdam"])
print("Encodings for \n tokyo,amsterdam,paris::")
print(le.transform(["tokyo", "amsterdam", "paris"]) )
print("")
print("Adding dummy feature")
X = [[0,1],[1,1]]
print("Data :")
print(X)
print("adding dummy feature with value 5")
X=add_dummy_feature(X,value=5.0)
print(X)