gpt/generate.py at main · markmusic27/gpt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
import os
from gpt import GPT, encode, decode, vocab_size, block_size
import torch.nn.functional as F

# Select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""
Load the trained model
"""
def load_model():
    # Create model instance
    model = GPT()

    # Load the saved parameters
    model_path = os.path.join("saved_models", "model_params.pth")

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found at {model_path}. Please ensure the model has been trained first.")

    # Load state dict to the selected device
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)

    # Move model to device and set eval mode
    model = model.to(device)
    model.eval()

    return model

"""
Generate text using the loaded model
"""
def generate_text(model, prompt="", max_new_tokens=100):
    # Encode the prompt
    if prompt:
        context = torch.tensor(encode(prompt), dtype=torch.long, device=device).unsqueeze(0)
        print(prompt, end="", flush=True)  # Print initial prompt
    else:
        # Start with a random token if no prompt
        context = torch.randint(0, vocab_size, (1, 1), dtype=torch.long, device=device)

    # Generate text token by token
    for _ in range(max_new_tokens):
        # Get next token
        idx_cond = context[:, -block_size:]
        logits, _ = model.forward(idx_cond)
        logits = logits[:, -1, :]  # Get last time dimension
        probs = F.softmax(logits, dim=1)

        new_token = torch.multinomial(probs, num_samples=1, replacement=True)

        # Decode and print only the new token
        new_char = decode([new_token.item()])
        print(new_char, end="", flush=True)

        # Append to context
        context = torch.cat((context, new_token), dim=1)

    print()  # New line at the end
    return decode(context[0].detach().cpu().tolist())

if __name__ == "__main__":
    # Load the model
    model = load_model()

    # Example usage
    generate_text(model, prompt="HAMLET:", max_new_tokens=1000000)