import gradio as gr from transformers import MistralForCausalLM, AutoTokenizer import torch model_name = "sand-ai/MAGI-1" tokenizer = AutoTokenizer.from_pretrained(model_name) model = MistralForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu" ) def chat_fn(message, history): inputs = tokenizer(message, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=200) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) return reply demo = gr.ChatInterface(chat_fn) demo.launch()