-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_basil_v0001.py
More file actions
executable file
·36 lines (29 loc) · 901 Bytes
/
create_basil_v0001.py
File metadata and controls
executable file
·36 lines (29 loc) · 901 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# create_basil_v0001.py
import os
from transformers import GPT2LMHeadModel, GPT2Config, GPT2Tokenizer, GenerationConfig
from config import MODELS_DIR
gpt2_config = GPT2Config(
vocab_size=50257,
n_positions=1024,
n_embd=768,
n_layer=12,
n_head=12,
pad_token_id=50256
)
model = GPT2LMHeadModel(gpt2_config)
# Set generation config to avoid max_length/max_new_tokens conflict
# Use only max_new_tokens, clear max_length
model.generation_config = GenerationConfig(
max_new_tokens=30,
max_length=None, # Explicitly clear to avoid conflict
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=50256,
eos_token_id=50256,
)
out_dir = os.path.join(MODELS_DIR, "basil_v001")
model.save_pretrained(out_dir)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.save_pretrained(out_dir)
print("✅ Basil v001 initialized with random weights.")