#!pip install -U "transformers==4.40.0" --upgrade #!pip install -i https://pypi.org/simple/ bitsandbytes #!pip install accelerate import transformers import torch model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={ "torch_dtype": torch.float16, "quantization_config": {"load_in_4bit": True}, "low_cpu_mem_usage": True, }, ) messages = [ {"role" : "system", "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, {"role" : "user", "content": """hi there!"""}, ] prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) print(outputs[0]["generated_text"][len(prompt):]) #!pip install gradio import gradio as gr messages = [{"role" : "system", "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, {"role" : "user", "content": """hi there!"""},] def add_text(history, text): global messages #message[list] is defined globally history = history + [(text,'')] messages = messages + [{"role":'user', 'content': text}] return history, '' def generate(history): global messages prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) response_msg = outputs[0]["generated_text"][len(prompt):] for char in response_msg: history[-1][1] += char yield history pass with gr.Blocks() as demo: chatbot = gr.Chatbot(value=[], elem_id="chatbot") with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Enter text and press enter", ) txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( generate, inputs =[chatbot,],outputs = chatbot,) demo.queue() demo.launch(debug=True) ''' import os from groq import Groq import gradio as gr client = Groq(api_key = os.environ.get("GROQ_API_KEY"), ) system_prompt = { "role": "system", "content": "You are a useful assistant. I would appreciate it if you reply with efficient answers. " } async def chat_groq(message, history): messages = [system_prompt] for msg in history: messages.append({"role": "user", "content": str(msg[0])}) messages.append({"role": "assistant", "content": str(msg[1])}) messages.append({"role": "user", "content": str (message)}) response_content = '' stream = client.chat.completions.create( model="llama3-8b-8192", messages=messages, max_tokens=1024, temperature=1.3, stream=True ) for chunk in stream: content = chunk.choices[0].delta.content if content: response_content += chunk. choices[0].delta.content yield response_content with gr. Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: gr.ChatInterface(chat_groq, clear_btn=None, undo_btn=None, retry_btn=None, ) demo.queue() demo.launch()'''