MiniGPT / localscripts /train_custommade.py
CreatedNull's picture
Upload folder using huggingface_hub
b127d35 verified
import json
import torch.nn as nn
import torch
from model import MiniGPT
from dataset import DataLoader,ChatDataset,SimpleTokenizr
from tqdm import tqdm
with open("./customchatbot-v1/data/merged_data.jsonl", "r", encoding="utf-8") as f:
texts = [json.loads(line)["text"] for line in f if line.strip()]
tokenizer = SimpleTokenizr()
tokenizer.train(texts)
model = MiniGPT(vocab_size=100)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
dataset = ChatDataset("./customchatbot-v1/data/merged_data.jsonl", tokenizer)
dataloader = DataLoader(dataset, batch_size=100, shuffle=True)
def Train(epochs):
for epoch in range(epochs):
model.train()
loop = tqdm(enumerate(dataloader),total=len(dataloader),desc="Training")
tloss = 0
for i,l in loop:
optimizer.zero_grad()
outputs = model(i)
loss = criterion(outputs,l)
loss.backward()
Train(epochs=1)