bootstrap-basil/create_basil_v0001.py at main · hunterooc/bootstrap-basil · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# create_basil_v0001.py

import os
from transformers import GPT2LMHeadModel, GPT2Config, GPT2Tokenizer, GenerationConfig
from config import MODELS_DIR

gpt2_config = GPT2Config(
    vocab_size=50257,
    n_positions=1024,
    n_embd=768,
    n_layer=12,
    n_head=12,
    pad_token_id=50256
)

model = GPT2LMHeadModel(gpt2_config)

# Set generation config to avoid max_length/max_new_tokens conflict
# Use only max_new_tokens, clear max_length
model.generation_config = GenerationConfig(
    max_new_tokens=30,
    max_length=None,  # Explicitly clear to avoid conflict
    do_sample=True,
    temperature=1.0,
    top_k=50,
    pad_token_id=50256,
    eos_token_id=50256,
)

out_dir = os.path.join(MODELS_DIR, "basil_v001")
model.save_pretrained(out_dir)

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.save_pretrained(out_dir)

print("✅ Basil v001 initialized with random weights.")