import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline base = "meta-llama/Llama-2-7b-chat-hf" adapter = "FinGPT/fingpt-mt_llama2-7b_lora" tokenizer = AutoTokenizer.from_pretrained(base) model = AutoModelForCausalLM.from_pretrained(adapter, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def chat(user_input, history): prompt = (history + "\nUser: " + user_input) if history else ("User: " + user_input) output = pipe(prompt, max_new_tokens=256, do_sample=True)[0]["generated_text"] return output, prompt + "\nAssistant: " + output with gr.Blocks() as demo: chatbot = gr.Chatbot() txt = gr.Textbox(placeholder="Ask a finance question...") state = gr.State("") txt.submit(lambda m, h: (chatbot + [(m, chat(m, h)[0])], chat(m, h)[1]), [txt, state], [chatbot, state]) demo.launch()