Danielrahmai1991 commited on
Commit
40ab755
·
verified ·
1 Parent(s): 0ab0668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -28
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
 
3
  from langchain_community.llms import LlamaCpp
@@ -7,20 +8,16 @@ from langchain_core.callbacks import StreamingStdOutCallbackHandler
7
  from langchain.retrievers import TFIDFRetriever
8
  from langchain.chains import RetrievalQA
9
  from langchain.memory import ConversationBufferMemory
10
- from langchain_community.chat_models import ChatLlamaCpp
11
 
12
  callbacks = [StreamingStdOutCallbackHandler()]
13
  print("creating ll started")
14
- M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf"
15
  llm = LlamaCpp(
16
- model_path=M_NAME,
17
- n_batch=8,
18
- temperature=0.85,
19
- max_tokens=256,
20
- top_p=0.95,
21
- top_k = 10,
22
  callback_manager=callbacks,
23
- n_ctx=2048,
24
  verbose=True, # Verbose is required to pass to the callback manager
25
  )
26
  # print("creating ll ended")
@@ -31,29 +28,120 @@ llm = LlamaCpp(
31
 
32
 
33
  def greet(question, model_type):
34
- print("prompt started ")
35
  print(f"question is {question}")
36
- template = """You are the Finiantial expert:
37
-
38
- ### Instruction:
39
- {question}
40
-
41
- ### Input:
42
-
43
-
44
- ### Response:
45
- """
46
- print("test1")
47
- prompt = PromptTemplate(template=template, input_variables=["question"])
48
- print("test2")
49
- llm_chain_model = LLMChain(prompt=prompt, llm=llm)
50
- print("test3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  out_gen = llm_chain_model.run(question)
52
- print("test4")
53
  print(f"out is: {out_gen}")
54
  return out_gen
55
 
56
  demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
57
- ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
58
  ),], outputs="text")
59
- demo.launch(debug=True, share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  import gradio as gr
3
 
4
  from langchain_community.llms import LlamaCpp
 
8
  from langchain.retrievers import TFIDFRetriever
9
  from langchain.chains import RetrievalQA
10
  from langchain.memory import ConversationBufferMemory
11
+
12
 
13
  callbacks = [StreamingStdOutCallbackHandler()]
14
  print("creating ll started")
 
15
  llm = LlamaCpp(
16
+ model_path="taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf",
17
+ temperature=0.75,
18
+ max_tokens=100,
19
+ top_p=4,
 
 
20
  callback_manager=callbacks,
 
21
  verbose=True, # Verbose is required to pass to the callback manager
22
  )
23
  # print("creating ll ended")
 
28
 
29
 
30
  def greet(question, model_type):
 
31
  print(f"question is {question}")
32
+ if model_type == "With memory":
33
+ retriever = TFIDFRetriever.from_texts(
34
+ ["Finatial AI"])
35
+
36
+
37
+ template = """You are the Finiantial expert:
38
+ {history}
39
+ {context}
40
+ ### Instruction:
41
+ {question}
42
+
43
+ ### Input:
44
+
45
+
46
+ ### Response:
47
+ """
48
+
49
+ prompt1 = PromptTemplate(
50
+ input_variables=["history", "context", "question"],
51
+ template=template,
52
+ )
53
+
54
+ llm_chain_model = RetrievalQA.from_chain_type(
55
+ llm=llm,
56
+ chain_type='stuff',
57
+ retriever=retriever,
58
+ verbose=False,
59
+ chain_type_kwargs={
60
+ "verbose": False,
61
+ "prompt": prompt1,
62
+ "memory": ConversationBufferMemory(
63
+ memory_key="history",
64
+ input_key="question"),
65
+ }
66
+ )
67
+ print("creating model created")
68
+ else:
69
+ template = """You are the Finiantial expert:
70
+ ### Instruction:
71
+ {question}
72
+ ### Input:
73
+ ### Response:
74
+ """
75
+
76
+ prompt = PromptTemplate(template=template, input_variables=["question"])
77
+
78
+ llm_chain_model = LLMChain(prompt=prompt, llm=llm)
79
  out_gen = llm_chain_model.run(question)
 
80
  print(f"out is: {out_gen}")
81
  return out_gen
82
 
83
  demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
84
+ ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
85
  ),], outputs="text")
86
+ demo.launch(debug=True, share=True)
87
+
88
+
89
+ # import gradio as gr
90
+
91
+ # from langchain_community.llms import LlamaCpp
92
+ # from langchain.prompts import PromptTemplate
93
+ # from langchain.chains import LLMChain
94
+ # from langchain_core.callbacks import StreamingStdOutCallbackHandler
95
+ # from langchain.retrievers import TFIDFRetriever
96
+ # from langchain.chains import RetrievalQA
97
+ # from langchain.memory import ConversationBufferMemory
98
+ # from langchain_community.chat_models import ChatLlamaCpp
99
+
100
+ # callbacks = [StreamingStdOutCallbackHandler()]
101
+ # print("creating ll started")
102
+ # M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf"
103
+ # llm = LlamaCpp(
104
+ # model_path=M_NAME,
105
+ # n_batch=8,
106
+ # temperature=0.85,
107
+ # max_tokens=256,
108
+ # top_p=0.95,
109
+ # top_k = 10,
110
+ # callback_manager=callbacks,
111
+ # n_ctx=2048,
112
+ # verbose=True, # Verbose is required to pass to the callback manager
113
+ # )
114
+ # # print("creating ll ended")
115
+
116
+
117
+
118
+
119
+
120
+
121
+ # def greet(question, model_type):
122
+ # print("prompt started ")
123
+ # print(f"question is {question}")
124
+ # template = """You are the Finiantial expert:
125
+
126
+ # ### Instruction:
127
+ # {question}
128
+
129
+ # ### Input:
130
+
131
+
132
+ # ### Response:
133
+ # """
134
+ # print("test1")
135
+ # prompt = PromptTemplate(template=template, input_variables=["question"])
136
+ # print("test2")
137
+ # llm_chain_model = LLMChain(prompt=prompt, llm=llm)
138
+ # print("test3")
139
+ # out_gen = llm_chain_model.run(question)
140
+ # print("test4")
141
+ # print(f"out is: {out_gen}")
142
+ # return out_gen
143
+
144
+ # demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
145
+ # ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
146
+ # ),], outputs="text")
147
+ # demo.launch(debug=True, share=True)