ppgodzilla commited on
Commit
fb59d40
·
verified ·
1 Parent(s): 248f20c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -14
app.py CHANGED
@@ -1,15 +1,7 @@
1
- from fastapi import FastAPI, Request
2
- from pydantic import BaseModel
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
- import os
6
 
7
- app = FastAPI()
8
-
9
- class UserInput(BaseModel):
10
- prompt: str
11
-
12
- # Load model and tokenizer
13
  model_name = "NousResearch/Nous-Hermes-llama2-13b"
14
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
15
  model = AutoModelForCausalLM.from_pretrained(
@@ -21,14 +13,16 @@ model = AutoModelForCausalLM.from_pretrained(
21
 
22
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
23
 
24
- @app.post("/chat")
25
- async def chat(data: UserInput):
26
  output = generator(
27
- data.prompt,
28
  max_new_tokens=512,
29
  do_sample=True,
30
  temperature=0.8,
31
- top_k=50,
32
  top_p=0.95
33
  )
34
- return {"response": output[0]["generated_text"]}
 
 
 
 
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import gradio as gr
4
 
 
 
 
 
 
 
5
  model_name = "NousResearch/Nous-Hermes-llama2-13b"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
7
  model = AutoModelForCausalLM.from_pretrained(
 
13
 
14
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
15
 
16
+ def chat(prompt):
 
17
  output = generator(
18
+ prompt,
19
  max_new_tokens=512,
20
  do_sample=True,
21
  temperature=0.8,
22
+ top_k=60,
23
  top_p=0.95
24
  )
25
+ return output[0]["generated_text"]
26
+
27
+ demo = gr.Interface(fn=chat, inputs="text", outputs="text")
28
+ demo.launch()