DeepSPADE/trainautoencode.py at master · tanmayb123/DeepSPADE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import data_helpers
from w2v import train_word2vec
from sklearn.utils import class_weight
from keras.models import Model
from keras.layers import *
import keras

np.random.seed(2)

embedding_dim = 100
batch_size = 32
num_epochs = 1
val_split = 0.1

min_word_count = 1
context = 12

print("Loading data...")

x, y, vocabulary, vocabulary_inv = data_helpers.load_data()
embedding_weights = train_word2vec(x, vocabulary_inv, embedding_dim, min_word_count, context)

shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices].argmax(axis=1)

print("Vocabulary Size: {:d}".format(len(vocabulary)))

layerinput = Input(shape=[267], dtype='int32')

encoded = Embedding(len(vocabulary), embedding_dim, input_length=267, weights=embedding_weights)(layerinput)
encoded = LSTM(100, return_sequences=True)(encoded)
#decoded = RepeatVector(267)(encoded)
decoded = LSTM(100, return_sequences=True)(encoded)

model = Model(inputs=[layerinput], outputs=[decoded])

model.compile(loss='binary_crossentropy', optimizer='adam')

model.summary()

model.fit(x_shuffled, x_shuffled, batch_size=batch_size, nb_epoch=num_epochs, validation_split=val_split, verbose=1)

model.save('autoencode.h5')