#!pip install -U "transformers==4.40.0" --upgrade
#!pip install -i https://pypi.org/simple/ bitsandbytes
#!pip install accelerate

import transformers
import torch

model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
)

messages = [
    {"role"   : "system",
     "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"},
    {"role"   : "user",
     "content": """hi there!"""},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)

print(outputs[0]["generated_text"][len(prompt):])

#!pip install gradio

import gradio as gr

messages = [{"role"   : "system",
     "content": "You are an interviewer testing the user whether he can be a good manager or not.  When the user says hi there!, i want you to begin"},
    {"role"   : "user",
     "content": """hi there!"""},]

def add_text(history, text):
    global messages  #message[list] is defined globally
    history = history + [(text,'')]
    messages = messages + [{"role":'user', 'content': text}]
    return history, ''

def generate(history):
  global messages
  prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
)

  terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

  outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
  response_msg = outputs[0]["generated_text"][len(prompt):]
  for char in response_msg:
      history[-1][1] += char
      yield history
  pass

with gr.Blocks() as demo:

    chatbot = gr.Chatbot(value=[], elem_id="chatbot")
    with gr.Row():
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
            )

    txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
            generate, inputs =[chatbot,],outputs = chatbot,)

demo.queue()
demo.launch(debug=True)
'''

import os
from groq import Groq 
import gradio as gr
    
client = Groq(api_key = os.environ.get("GROQ_API_KEY"), )

system_prompt = {
                "role": "system",
                "content":
                "You are a useful assistant. I would appreciate it if you reply with efficient answers. "
                }

async def chat_groq(message, history):
    
    messages = [system_prompt]
    
    for msg in history:
        messages.append({"role": "user", "content": str(msg[0])})
        messages.append({"role": "assistant", "content": str(msg[1])})
        
    messages.append({"role": "user", "content": str (message)})
    
    response_content = ''
    
    stream = client.chat.completions.create(
                                            model="llama3-8b-8192",
                                            messages=messages,
                                            max_tokens=1024,
                                            temperature=1.3,
                                            stream=True
                                        )

    for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            response_content += chunk. choices[0].delta.content 
        yield response_content

with gr. Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
    gr.ChatInterface(chat_groq,
                     clear_btn=None, 
                     undo_btn=None, 
                     retry_btn=None,
                    )

demo.queue()
demo.launch()'''