forked from boa-collaboration/boa-constrictor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_lstm.py
More file actions
51 lines (40 loc) · 1.97 KB
/
model_lstm.py
File metadata and controls
51 lines (40 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import torch
import torch.nn as nn
import numpy as np
def BoaLSTM(d_model=256, num_layers=4, vocab_size=256, device="cuda"):
""" Construct a BoaBytePredictor with LSTM """
class BoaBytePredictorLSTM(nn.Module):
""" LSTM model adapted to predict the next byte in a sequence. """
def __init__(self, d_model=256, num_layers=4, vocab_size=256):
super().__init__()
# Embedding for vocab_size possible bytes
self.embedding = nn.Embedding(vocab_size, d_model)
self.lstm = nn.LSTM(input_size=d_model, hidden_size=d_model, num_layers=num_layers, batch_first=True)
self.head = nn.Sequential(
nn.Linear(d_model, d_model),
nn.ReLU(),
# Output logits for each of the vocab_size possible next bytes
nn.Linear(d_model, vocab_size)
)
self.d_model= d_model
self.num_layers = num_layers
def forward(self, x):
h = self.embedding(x) # [B, L, D]
output, _ = self.lstm(h)
return self.head(output)
@torch.inference_mode()
def init_stream(self, max_len: int, batch_size: int = 1, device=None, dtype=None):
h_0 = torch.zeros(self.num_layers, batch_size, self.d_model, device=device, dtype=dtype)
c_0 = torch.zeros(self.num_layers, batch_size, self.d_model, device=device, dtype=dtype)
return [(h_0, c_0)]
@torch.inference_mode()
def step(self, byte_t: torch.LongTensor, caches) -> torch.Tensor:
# byte_t: [B] -> logits: [B, 256]
x = self.embedding(byte_t).unsqueeze(1) # [B, 1, D]
prev_states = caches[0]
lstm_out, new_states = self.lstm(x, prev_states)
caches[0] = new_states
logits = self.head(lstm_out)
return logits.squeeze(1)
model = BoaBytePredictorLSTM(d_model, num_layers, vocab_size)
return model.to(device)