mrfakename's picture
Create app.py
7b3ed8d verified
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load model and tokenizer once at startup
print("Loading model...")
tok = AutoTokenizer.from_pretrained("openai/circuit-sparsity", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"openai/circuit-sparsity",
trust_remote_code=True,
torch_dtype="auto",
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
print(f"Model loaded on {model.device}")
def generate(prompt, max_new_tokens, temperature, top_p):
inputs = tok(prompt, return_tensors="pt", add_special_tokens=False)["input_ids"].to(
model.device
)
with torch.no_grad():
out = model.generate(
inputs,
max_new_tokens=int(max_new_tokens),
do_sample=True,
temperature=temperature,
top_p=top_p,
return_dict_in_generate=False,
)
return tok.decode(out[0], skip_special_tokens=True)
with gr.Blocks() as demo:
gr.Markdown("# Circuit Sparsity Model Demo")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Prompt",
lines=8,
value="def square_sum(xs):\n return sum(x * x for x in xs)\n\nsquare_sum([1, 2, 3])\n",
)
with gr.Row():
max_tokens = gr.Slider(1, 256, value=64, step=1, label="Max New Tokens")
temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top P")
generate_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Textbox(label="Output", lines=12)
generate_btn.click(generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output)
if __name__ == "__main__":
demo.launch()