Spaces:

Germin
/

Backend

Running

App Files Files Community

sravan commited on 23 days ago

Commit

ae692a1

1 Parent(s): bd9af43

first working api's

Browse files

Files changed (6) hide show

callbacks.py +1 -1
chains.py +33 -11
data_indexing.py +1 -1
main.py +29 -9
prompts.py +15 -4
test.db +0 -0

callbacks.py CHANGED Viewed

@@ -16,7 +16,7 @@ class LogResponseCallback(BaseCallbackHandler):
         # TODO: The function on_llm_end is going to be called when the LLM stops sending
         # the response. Use the crud.add_message function to capture that response.
         type = 'AI'
-        user_data = crud.get_or_create(db, self.user_request.username)
         user_id = user_data.user_id
         timestamp  = datetime.now()
         message = outputs.generations[0][0].text # answer from the prompt message

         # TODO: The function on_llm_end is going to be called when the LLM stops sending
         # the response. Use the crud.add_message function to capture that response.
         type = 'AI'
+        user_data = crud.get_or_create(self.db, self.user_request.username)
         user_id = user_data.user_id
         timestamp  = datetime.now()
         message = outputs.generations[0][0].text # answer from the prompt message

chains.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.runnables import RunnablePassthrough
 import schemas
@@ -17,19 +17,41 @@ from transformers import AutoTokenizer
 data_indexer = DataIndexer()
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
-llm = HuggingFaceEndpoint(
-    repo_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
-    huggingfacehub_api_token=os.environ['HF_TOKEN'],
-    max_new_tokens=512,
-    stop_sequences=[tokenizer.eos_token],
     streaming=True,
-    # task="conversational",
-    task="text-generation",
 )
 simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.

 import os
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain_core.runnables import RunnablePassthrough
 import schemas
 data_indexer = DataIndexer()
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+# llm = HuggingFaceEndpoint(
+#     # repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+#     repo_id="deepseek-ai/DeepSeek-R1-0528",
+#     huggingfacehub_api_token=os.environ['HF_TOKEN'],
+#     max_new_tokens=512,
+#     stop_sequences=["<|eot_id|>"],
+#     streaming=True,
+#     # task="conversational",
+#     task="text-generation",
+#     # provider='novita',
+#     # temperature=0.7,
+# )
+# llm = HuggingFaceEndpoint(
+#     repo_id="deepseek-ai/DeepSeek-R1-0528",
+#     max_new_tokens=512,
+#     temperature=0.5,
+#     huggingfacehub_api_token=os.environ['HF_TOKEN'],
+#     provider="auto",
+#     )
+llm_endpoint = HuggingFaceEndpoint(
+    repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+    task="text-generation",
+    max_new_tokens=100,
     streaming=True,
+    do_sample=False,
+    temperature=0.9,
+    repetition_penalty=1.03,
+    provider="auto",
 )
+llm = ChatHuggingFace(llm=llm_endpoint)
 simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.

data_indexing.py CHANGED Viewed

@@ -80,7 +80,7 @@ class DataIndexer:
             # values = None
             # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
-            vector_ids = [uuid.uuid4() for _ in batch]
             # TODO: create a list of dictionaries representing the metadata. Capture the text data
             # with the "text" key, and make sure to capture the rest of the doc.metadata.

             # values = None
             # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
+            vector_ids = [str(uuid.uuid4()) for _ in batch]
             # TODO: create a list of dictionaries representing the metadata. Capture the text data
             # with the "text" key, and make sure to capture the rest of the doc.metadata.

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from langchain_core.runnables import Runnable
 from langchain_core.callbacks import BaseCallbackHandler
-from fastapi import FastAPI, Request, Depends
 from sse_starlette.sse import EventSourceResponse
 from langserve.serialization import WellKnownLCSerializer
 from typing import List
@@ -36,13 +36,27 @@ def get_db():
         db.close()
 async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
-    for output in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
-        data = WellKnownLCSerializer().dumps(output).decode("utf-8")
-        yield {'data': data, "event": "data"}
     yield {"event": "end"}
 @app.post("/simple/stream")
 async def simple_stream(request: Request):
     data = await request.json()
@@ -56,11 +70,13 @@ async def formatted_stream(request: Request):
     try:
         data = await request.json()
         user_question = schemas.UserQuestion(**data['input'])
-        return EventSourceResponse(
             generate_stream(
                 input_data = user_question,
                 runnable = formatted_chain )
                 )
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
@@ -103,7 +119,11 @@ async def history_stream(request: Request, db: Session = Depends(get_db)):
     _ = crud.add_message(db,add_message, username = user_request.username)
     # chat history contains: [{ message, type, timestamp}]
-    return EventSourceResponse(generate_stream(history_input, history_chain))
     # raise NotImplemented
@@ -119,7 +139,7 @@ async def rag_stream(request: Request, db: Session = Depends(get_db)):
     data = await request.json()
     user_request = schemas.UserRequest(**data['input'])
     messages = crud.get_user_chat_history(db, user_request.username)
-    chat_history = format_chat_history(messages)
     history_input = schemas.HistoryInput( chat_history = format_chat_history(chat_history), question=user_request.question)
     ## adding messgae to message database
@@ -149,14 +169,14 @@ async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):
     # - We create an instance of HistoryInput by using format_chat_history.
     # - We use the history input within the filtered rag chain.
     data = await request.json()
-    user_request = models.UserRequest(data)
     messages = db.Query(
         Message.message,
         Message.type,
         Message.timestamp
     ).filter(Message.user_id == user_request.username)
-    chat_history = format_chat_history(messages)
     history_input = schemas.HistoryInput(
         chat_history = format_chat_history(chat_history),

 from langchain_core.runnables import Runnable
 from langchain_core.callbacks import BaseCallbackHandler
+from fastapi import FastAPI, Request, Depends, HTTPException
 from sse_starlette.sse import EventSourceResponse
 from langserve.serialization import WellKnownLCSerializer
 from typing import List
         db.close()
+# async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
+#     for output in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
+#         data = WellKnownLCSerializer().dumps(output).decode("utf-8")
+#         yield {'data': data, "event": "data"}
+#     yield {"event": "end"}
 async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
+    for chunk in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
+        # ChatHuggingFace returns message chunks with content attribute
+        if hasattr(chunk, 'content'):
+            content = chunk.content
+        else:
+            content = str(chunk)
+        if content:  # Only yield non-empty content
+            yield {'data': content, "event": "data"}
     yield {"event": "end"}
 @app.post("/simple/stream")
 async def simple_stream(request: Request):
     data = await request.json()
     try:
         data = await request.json()
         user_question = schemas.UserQuestion(**data['input'])
+        output =  EventSourceResponse(
             generate_stream(
                 input_data = user_question,
                 runnable = formatted_chain )
                 )
+        # print(output.generations[0][0].text)
+        return output
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
     _ = crud.add_message(db,add_message, username = user_request.username)
     # chat history contains: [{ message, type, timestamp}]
+    output = EventSourceResponse(generate_stream(history_input, history_chain))
+    LogResponseCallback.on_llm_end(outputs = output)
+    return output
     # raise NotImplemented
     data = await request.json()
     user_request = schemas.UserRequest(**data['input'])
     messages = crud.get_user_chat_history(db, user_request.username)
+    chat_history = messages
     history_input = schemas.HistoryInput( chat_history = format_chat_history(chat_history), question=user_request.question)
     ## adding messgae to message database
     # - We create an instance of HistoryInput by using format_chat_history.
     # - We use the history input within the filtered rag chain.
     data = await request.json()
+    user_request = models.UserRequest(**dat['input'])
     messages = db.Query(
         Message.message,
         Message.type,
         Message.timestamp
     ).filter(Message.user_id == user_request.username)
+    chat_history = messages
     history_input = schemas.HistoryInput(
         chat_history = format_chat_history(chat_history),

prompts.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langchain_core.prompts import PromptTemplate
 from typing import List
 import models
 def format_prompt(prompt) -> PromptTemplate:
     # TODO: format the input prompt by using the model specific instruction template
@@ -12,10 +12,20 @@ def format_prompt(prompt) -> PromptTemplate:
     {prompt}<|eot_id|>
     <|start_header_id|>assistant<|end_header_id|>
     """
-    prompt_template = PromptTemplate(
         # input_variables=["question"], the variables will be auto detected by langchain package
-        template = template
     )
     # TODO: return a langchain PromptTemplate
     return prompt_template
@@ -26,7 +36,7 @@ def format_chat_history(messages: List[models.Message]):
     # the list of Message into a text of chat history.
     chat_history = ""
     for msg in messages:
-        chat_history += msg['message']
         chat_history += "\n---\n"
     # combined all messages from the list for sending it to the model prompt.
     return chat_history
@@ -48,6 +58,7 @@ def format_context(docs: List[str]):
 raw_prompt = "{question}"
 # TODO: Create the history_prompt prompt that will capture the question and the conversation history.
 # The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
 history_prompt: str = """

 from langchain_core.prompts import PromptTemplate
 from typing import List
 import models
+from transformers import AutoTokenizer
 def format_prompt(prompt) -> PromptTemplate:
     # TODO: format the input prompt by using the model specific instruction template
     {prompt}<|eot_id|>
     <|start_header_id|>assistant<|end_header_id|>
     """
+    raw_template = [
+        {"role": "system", "content":"You are a helpful assistant." },
+        {"role": "user", "content": "{{prompt}}"},
+    ]
+    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+    formatted_template = tokenizer.apply_chat_template(
+            raw_template,
+            tokenize=False,
+            add_generation_prompt=True
+    )
+    prompt_template = PromptTemplate.from_template(
         # input_variables=["question"], the variables will be auto detected by langchain package
+        formatted_template
     )
     # TODO: return a langchain PromptTemplate
     return prompt_template
     # the list of Message into a text of chat history.
     chat_history = ""
     for msg in messages:
+        chat_history += '{}:{}'.format(msg.type, msg.message)
         chat_history += "\n---\n"
     # combined all messages from the list for sending it to the model prompt.
     return chat_history
 raw_prompt = "{question}"
 # TODO: Create the history_prompt prompt that will capture the question and the conversation history.
 # The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
 history_prompt: str = """

test.db CHANGED Viewed

Binary files a/test.db and b/test.db differ