oskaralf commited on
Commit
a03b322
·
1 Parent(s): 1785e1f
Files changed (1) hide show
  1. app.py +38 -38
app.py CHANGED
@@ -1,42 +1,42 @@
1
- import torch
2
-
3
- import torch
4
- torch.cuda.is_available = lambda: False # Force torch to disable CUDA
5
-
6
- from unsloth import FastLanguageModel
7
-
8
-
9
- # Force CPU mode
10
- device = "cpu"
11
-
12
- # Load the base model in CPU mode
13
- base_model_name = "unsloth/Llama-3.2-3B-Instruct"
14
- base_model, tokenizer = FastLanguageModel.from_pretrained(
15
- model_name=base_model_name,
16
- max_seq_length=2048,
17
- dtype="float32", # Use float32 for CPU
18
- load_in_4bit=False # Disable 4-bit quantization for CPU
19
- )
20
- base_model.to(device)
21
-
22
- # Apply LoRA adapters in CPU mode
23
- from peft import PeftModel
24
-
25
- lora_model_name = "oskaralf/lora_model" # Replace with your LoRA model path
26
- model = PeftModel.from_pretrained(base_model, lora_model_name)
27
- model.to(device)
28
-
29
- # Prepare for inference in CPU mode
30
- FastLanguageModel.for_inference(model)
31
-
32
- # Gradio interface
33
  import gradio as gr
34
-
35
- def chatbot(input_text):
36
- inputs = tokenizer(input_text, return_tensors="pt").to(device)
37
- outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=64)
 
 
 
 
 
 
 
 
 
 
 
38
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
  return response
40
 
41
- iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Chatbot")
42
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ MODEL_NAME = "oskaralf/model_merged"
5
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
6
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
+
8
+ def generate_response(prompt, max_length=128, temperature=0.7, top_p=0.9):
9
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
10
+ outputs = model.generate(
11
+ **inputs,
12
+ max_length=max_length,
13
+ temperature=temperature,
14
+ top_p=top_p,
15
+ pad_token_id=tokenizer.eos_token_id
16
+ )
17
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
18
  return response
19
 
20
+ def interactive_app():
21
+ with gr.Blocks() as app:
22
+ gr.Markdown("# Coding Task Generator")
23
+ gr.Markdown("Generate coding tasks by entering a prompt below.")
24
+
25
+ prompt = gr.Textbox(label="Enter your prompt:", placeholder="e.g., Create a Python task involving recursion.")
26
+ max_length = gr.Slider(label="Max Length", minimum=16, maximum=512, value=128, step=16)
27
+ temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.1)
28
+ top_p = gr.Slider(label="Top-p Sampling", minimum=0.1, maximum=1.0, value=0.9, step=0.1)
29
+ generate_button = gr.Button("Generate Task")
30
+
31
+ output = gr.Textbox(label="Generated Task", lines=10)
32
+
33
+ generate_button.click(
34
+ generate_response,
35
+ inputs=[prompt, max_length, temperature, top_p],
36
+ outputs=output
37
+ )
38
+
39
+ return app
40
+
41
+ if __name__ == "__main__":
42
+ interactive_app().launch()