File size: 924 Bytes
e7020af
 
 
3c4bdb3
 
e7020af
 
 
 
 
 
 
3c4bdb3
e7020af
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from unsloth import FastLanguageModel
from peft import PeftModel
from transformers import AutoTokenizer
import gradio as gr

base_model_name = "unsloth/Llama-3.2-3B-Instruct"
base_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model_name,
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True 
)

# lora adapters from my Hugging Face model
lora_model_name = "oskaralf/lora_model"  # Hugging Face repository for LoRA adapters
model = PeftModel.from_pretrained(base_model, lora_model_name)
FastLanguageModel.for_inference(model)

def chatbot(input_text):
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")  
    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=64)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Chatbot")
iface.launch()