Spaces:
Running
Running
File size: 1,781 Bytes
b0671c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import torch
from utils import create_tokenizer
from dataset import load_saved_datasets
from transformers import AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments, Trainer
# HYPERPARAMETERS
save_dir = "..data/tokenized_recipes"
NUM_EPOCHS = 5
# Set up device agnostic code
if torch.cuda.is_available():
device = 'cuda'
elif torch.mps.is_available():
device = 'mps'
else:
device = 'cpu'
print("Using device:", device)
tokenizer = create_tokenizer()
# Load dataset
train_dataset, val_dataset = load_saved_datasets(save_dir)
# Set up model
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
model.resize_token_embeddings(len(tokenizer))
model.to(device)
data_collator = DataCollatorForLanguageModeling(
tokenizer = tokenizer,
mlm = False #Causal LM, not BERT
)
# Set up training arguments
training_args = TrainingArguments(
output_dir="../models/recipe-gpt",
overwrite_output_dir=True,
eval_strategy="epoch",
save_strategy="epoch",
logging_strategy="steps",
logging_steps=50,
save_total_limit=2,
num_train_epochs=NUM_EPOCHS,
per_device_train_batch_size=4, # adjust based on available memory
per_device_eval_batch_size=4,
warmup_steps=50,
weight_decay=0.01,
fp16=True, # Set to True if using a GPU with FP16 support
report_to="none"
)
#Set up trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
tokenizer=tokenizer,
data_collator=data_collator
)
# Train the model
trainer.train()
final_model_path = "../models/recipe-gpt"
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
|