ko_novel_generator/generate.py at master · misoA/ko_novel_generator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import os
import argparse

import numpy as np
import pickle

from model import gru
from data_loader import gru_dataset
from opt import opt

import torch
import torch.nn as nn
from torch.autograd import Variable

def softmax(output):
    theta = 1
    output /= np.exp(output * theta)
    output /= np.sum(output)
    return output

def highest_epoch(opt):
    epoch_list = os.listdir(opt.save_dir)
    epoch_list = [int(epoch[4:-4]) for epoch in epoch_list]
    highest_epoch = sorted(epoch_list)[-1]
    return highest_epoch

def word_discriminator(output):
    return output[-1,:,:].squeeze().max(dim=0)[1].item()

def word_weight_discriminator(output):
    weight = softmax(output[-1,:,:].squeeze().data.cpu().numpy())
    t = np.cumsum(weight)
    s = np.sum(weight)
    return int(np.searchsorted(t, np.random.rand(1)*s))

def sample_model(opt, model, hidden=None, prime_text=" ", length=1000, text=True):

    # Make prime tensor
    text = [opt.vocab_ctoi.get(c, 0) for c in prime_text]
    prime_input = torch.LongTensor(text).view(-1, 1)
    prime_input = Variable(prime_input)

    # Make the list that stored every word
    gen_list = []
    gen_list.extend([char for char in prime_text])

    # Initialize state for generation
    if hidden is None:
        h = model.init_hidden()
    else:
        h = hidden
    # Add Cuda
    if opt.cuda:
        prime_input = prime_input.cuda()

    # Feed the model
    output, h = model(prime_input, h)

    # file to write
    f = open(os.path.join(opt.gen_dir, "result.txt"), 'w')

    # text to store
    gen_result = prime_text

    # Sample character by character
    for i in range(length):
        # Find what word is next and store to the list
        next_word = word_discriminator(output)
        # next_word = word_weight_discriminator(output)
        gen_list.append(opt.vocab_itoc[next_word])
        gen_result += opt.vocab_itoc[next_word]

        # Write the generated text to the file
        if len(gen_list) % 100 == 0:
            f.write(''.join(gen_list))
            f.flush()
            gen_list = []

        # Make input tensor using the last word generated
        char_input = Variable(torch.LongTensor([next_word]).view(-1, 1))
        if opt.cuda:
            char_input = char_input.cuda()

        output, h = model(char_input, h)

        # Need to repackage to disconnect the hidden nodes connection
        # if we don't use this the memory occupied by stack of hidden nodes will be too big
        h = h.detach()

    # After finishing generate write the remaining text to file
    f.write(''.join(gen_list))
    f.flush()
    f.close()
    return gen_result, h

if __name__ == "__main__":
    # get the information from argparse
    parser = argparse.ArgumentParser("Add information about making samples")

    parser.add_argument('--run', type=bool, default=False,
                        help="if you run main method")
    parser.add_argument('--epoch', type=int, default=None,
                        help="The epoch of saved model your gonna load")
    parser.add_argument('--prime', type=str, default=" ",
                        help="Enter the text that you wanna start with")
    parser.add_argument('--len', type=int, default=1000,
                        help="how long will you generate text?")
    parser.add_argument('--resume', default=False,
                        help="resume previous hidden state?")

    args = parser.parse_args()

    if args.run:
        assert args.epoch != None, "The epoch must be entered! --epoch [int]"

        # Directory
        # - Where is trainable data
        # - Where to save the model parameter
        # - Where to save the generated text
        data_dir = "data/"
        save_dir = "save/"
        gen_dir = "generate/"

        # Choose the hyperparameter at here!
        ratio = 0.9
        num_layers = 2
        hidden_size = 2048
        embedding_size = 2048
        cuda = True if torch.cuda.is_available() else False
        batch_size = 1 # Change 1 because this is generating
        seq_len = 50
        num_epochs = 100
        save_every = 50
        print_every = 10
        valid_every = 20 # test the valid data when batch step is (int)
        grad_clip = 5.
        learning_rate = 0.001

        # Store every options to opt class data structure
        opt = opt(data_dir=data_dir,
                  save_dir=save_dir,
                  gen_dir=gen_dir,
                  ratio = ratio,
                  num_layers=num_layers,
                  hidden_size=hidden_size,
                  embedding_size=embedding_size,
                  cuda=cuda,
                  batch_size=batch_size,
                  seq_len = seq_len,
                  num_epochs=num_epochs,
                  save_every=save_every,
                  print_every=print_every,
                  valid_every=valid_every,
                  grad_clip=grad_clip,
                  learning_rate=learning_rate)

        # load the vocab data
        with open('vocab/vocab.pkl', 'rb') as f:
            vocab = pickle.load(f)

        # load the hidden data if resume is true
        if args.resume:
            with open(os.path.join(opt.gen_dir, "hidden.pkl"), 'rb') as f:
                h = pickle.load(f)
        else:
            h = None

        # Store vocabulary to the option
        opt.vocab_size = vocab['vocab_size']
        opt.vocab_itoc = vocab['vocab_itoc']
        opt.vocab_ctoi = vocab['vocab_ctoi']

        # make model
        model = gru(opt)
        if opt.cuda:
            model = model.cuda()

        # load saved torch data
        save_path = os.path.join(opt.save_dir, "gru_{0}.pkl".format(args.epoch))

        # load saved model
        model.load_state_dict(torch.load(save_path))

        result, h = sample_model(opt=opt,
                              model=model,
                              hidden=h,
                              prime_text=args.prime,
                              length=args.len)

        # store the hidden state
        with open(os.path.join(opt.gen_dir, "hidden.pkl"), 'wb') as f:
            pickle.dump(h, f)

        print(result)