Spaces:

anuttamac
/

financialrag

Build error

App Files Files Community

Anuttama Chakraborty commited on Mar 8, 2025

Commit

80a3a2e

1 Parent(s): 4376d5f

gradio integration first commit

Browse files

Files changed (4) hide show

RagWithConfidenceScore.py +397 -0
app.py +51 -0
financial_docs/JPMorgan Chase Bank, N.A. 2024 Annual Consolidated Financial Statements - Final.pdf +0 -0
requirements.txt +13 -0

RagWithConfidenceScore.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import os
+import torch
+import pandas as pd
+from transformers import AutoTokenizer, AutoModel, pipeline
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from sklearn.metrics.pairwise import cosine_similarity
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader, CSVLoader
+from langchain_community.vectorstores import FAISS
+from langchain.prompts import PromptTemplate
+from pathlib import Path
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.document_loaders import DirectoryLoader
+class RagWithScore:
+    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2",
+                cross_encoder_name="cross-encoder/ms-marco-MiniLM-L-6-v2",
+                llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+                documents_dir="financial_docs"):
+        """
+        Initialize the Financial RAG system
+        Args:
+            model_name: The embedding model name
+            cross_encoder_name: The cross-encoder model for reranking
+            llm_name: Small language model for generation
+            documents_dir: Directory containing financial statements
+        """
+        # Initialize embedding model
+        self.embedding_model = HuggingFaceEmbeddings(model_name=model_name)
+        # Initialize cross-encoder for reranking
+        self.cross_encoder = CrossEncoder(cross_encoder_name)
+        # Initialize small language model
+        self.tokenizer = AutoTokenizer.from_pretrained(llm_name)
+        self.llm = pipeline(
+        "text-generation",
+        model=llm_name,
+        tokenizer=self.tokenizer,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        max_new_tokens=512,
+        do_sample=False,  # Set to False for deterministic outputs
+        temperature=0.2,   # Reduce randomness
+        top_p=1.0          # No nucleus sampling
+)
+        # Store paths
+        self.documents_dir = documents_dir
+        self.vector_store = None
+        # Input guardrail rules - sensitive terms/patterns
+        self.guardrail_patterns = [
+            "insider trading",
+            "stock manipulation",
+            "fraud detection",
+            "embezzlement",
+            "money laundering",
+            "tax evasion",
+            "illegal activities"
+        ]
+        # Confidence score thresholds
+        self.confidence_thresholds = {
+            "high": 0.75,
+            "medium": 0.5,
+            "low": 0.3
+        }
+    import os
+    def load_and_process_documents(self):
+        """Load, split and process financial documents"""
+        print("Processing documents to create FAISS index...")
+        loader = DirectoryLoader('./financial_docs', glob="**/*.pdf")
+        documents = loader.load()
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=200
+        )
+        chunks = text_splitter.split_documents(documents)
+        print(len(chunks))
+        # Create and save FAISS vector store
+        self.vector_store = FAISS.from_documents(chunks, embedding=self.embedding_model)
+        self.vector_store.save_local("faiss_index")
+        return self.vector_store
+    def load_or_create_vector_store(self):
+        try:
+            print("Loading existing FAISS index...")
+            self.vector_store = FAISS.load_local("faiss_index", self.embedding_model)
+            print("FAISS index loaded successfully")
+        except Exception as e:
+            print(f"Error loading FAISS index: {e}")
+            print("Creating new FAISS index...")
+            # Code to create a new vector store
+            documents = self.load_and_process_documents()  # Make sure this method exists
+            print("New FAISS index created and saved")
+    def generate_answer(self, query, context):
+        """Generate answer based on retrieved context"""
+        prompt_template = """
+        You are a financial analyst assistant that helps answer questions about company financial statements.
+        Use the provided financial information to give accurate and helpful answers.
+        Context information from financial statements:
+        {context}
+        Question: {query}
+        If the question requires a numerical calculation, show the step-by-step logic behind the calculation before providing the final answer.
+        Ensure that your approach remains consistent in methodology across different queries.
+        Provide a concise answer based only on the given context. You dont have to provide sources. If you don't have enough information to answer,
+        say so clearly.
+        Answer:
+        """
+        # Format context into a single string
+        context_str = "\n\n".join([doc.page_content for doc in context])
+        # Format prompt
+        prompt = prompt_template.format(context=context_str, query=query)
+        # Generate answer using small language model
+        response = self.llm(prompt)[0]['generated_text']
+        # Extract only the generated answer part (after the prompt)
+        answer = response[len(prompt):].strip()
+        return answer
+    def calculate_confidence_score(self, query, retrieved_docs, answer):
+        """A simpler confidence score calculation focused on consistency and LLM confidence"""
+        # Get LLM confidence
+        llm_confidence = self._get_llm_confidence(query, retrieved_docs, answer)
+        # Get consistency score
+        consistency_score = self._measure_answer_consistency(query, retrieved_docs, answer)
+        # Simple weighted average
+        confidence_score = (0.6 * consistency_score) + (0.4 * llm_confidence)
+        return confidence_score
+    # def calculate_confidence_score(self, query, retrieved_docs, answer):
+    #     """
+    #     Calculate confidence score based on multiple factors:
+    #     1. Retrieval similarity scores
+    #     2. Reranking scores
+    #     3. Answer consistency across documents
+    #     4. LLM-based confidence estimation
+    #     Returns:
+    #         float: Confidence score between 0 and 1
+    #     """
+    #     # 1. Calculate average similarity/relevance score from retrieved documents
+    #     retrieval_scores = []
+    #     for doc in retrieved_docs:
+    #         if hasattr(doc, 'metadata') and 'score' in doc.metadata:
+    #             retrieval_scores.append(doc.metadata['score'])
+    #     avg_retrieval_score = sum(retrieval_scores) / len(retrieval_scores) if retrieval_scores else 0.0
+    #     print(f"avg_retrieval_score : {avg_retrieval_score}")
+    #     # 2. Use cross-encoder scores as a stronger relevance signal
+    #     pairs = [(query, doc.page_content) for doc in retrieved_docs]
+    #     cross_encoder_scores = self.cross_encoder.predict(pairs) if pairs else []
+    #     avg_cross_encoder_score = sum(cross_encoder_scores) / len(cross_encoder_scores) if len(cross_encoder_scores) > 0 else 0.0
+    #     print(f"avg_cross_encoder_score : {avg_cross_encoder_score}")
+    #     # 3. Measure answer consistency across documents
+    #     consistency_score = self._measure_answer_consistency(query, retrieved_docs, answer)
+    #     print(f"consistency_score : {consistency_score}")
+    #     # 4. LLM-based confidence estimation
+    #     llm_confidence = self._get_llm_confidence(query, retrieved_docs, answer)
+    #     print(f"llm_confidence : {llm_confidence}")
+    #     # Combine all factors (adjust weights based on what's most important for your use case)
+    #     weights = {
+    #         'retrieval': 0.2,
+    #         'cross_encoder': 0.3,
+    #         'consistency': 0.3,
+    #         'llm_confidence': 0.2
+    #     }
+    #     confidence_score = (
+    #         weights['retrieval'] * avg_retrieval_score +
+    #         # weights['cross_encoder'] * avg_cross_encoder_score +
+    #         weights['consistency'] * consistency_score +
+    #         weights['llm_confidence'] * llm_confidence
+    #     )
+    #     # Normalize to 0-1 range if needed
+    #     total_weight = weights['retrieval'] + weights['consistency'] + weights['llm_confidence']
+    #     confidence_score = confidence_score / total_weight
+    #     # confidence_score = min(max(confidence_score, 0.0), 1.0)
+    #     return confidence_score
+    def _measure_answer_consistency(self, query, retrieved_docs, final_answer):
+        """
+        Measure consistency of the answer across multiple documents
+        Returns:
+            float: Consistency score between 0 and 1
+        """
+        if len(retrieved_docs) <= 1:
+            return 0.5  # Neutral score if we only have one document
+        # Generate individual answers from each document
+        individual_answers = []
+        for doc in retrieved_docs:
+            prompt = f"""
+            Based only on this specific financial information:
+            {doc.page_content}
+            Question: {query}
+            Provide a very brief answer (1-2 sentences maximum):
+            """
+            response = self.llm(prompt, max_new_tokens=100)[0]['generated_text']
+            answer = response[len(prompt):].strip()
+            # print(f"llm response validation : {answer}")
+            individual_answers.append(answer)
+        # Calculate semantic similarity between individual answers
+        # Using embedding model to calculate similarity
+        answer_embeddings = self.embedding_model.embed_documents(individual_answers + [final_answer])
+        # Calculate similarity of each individual answer to the final answer
+        final_answer_embedding = answer_embeddings[-1]  # Last embedding is for the final answer
+        individual_embeddings = answer_embeddings[:-1]  # All other embeddings
+        similarities = []
+        for emb in individual_embeddings:
+            # Calculate cosine similarity
+            dot_product = sum(a * b for a, b in zip(emb, final_answer_embedding))
+            magnitude_a = sum(a * a for a in emb) ** 0.5
+            magnitude_b = sum(b * b for b in final_answer_embedding) ** 0.5
+            similarity = dot_product / (magnitude_a * magnitude_b) if magnitude_a * magnitude_b > 0 else 0
+            similarities.append(similarity)
+        # Average similarity represents consistency
+        avg_similarity = sum(similarities) / len(similarities) if similarities else 0.5
+        return avg_similarity
+    def _get_llm_confidence(self, query, retrieved_docs, answer):
+        """
+        Ask the LLM to estimate its own confidence in the answer
+        Returns:
+            float: LLM confidence score between 0 and 1
+        """
+        # Concatenate retrieved contexts
+        context = "\n\n".join([doc.page_content for doc in retrieved_docs[:2]])  # Limit to top 2 to avoid token limit
+        # Create confidence estimation prompt
+        prompt = f"""
+        You are evaluating the confidence level of an answer to a financial question.
+        Question: {query}
+        Retrieved Context:
+        {context}
+        Generated Answer: {answer}
+        On a scale of 1 to 10, how confident are you that the answer is correct and supported by the retrieved context?
+        Provide only a number between 1 and 10, with 10 being extremely confident and 1 being not confident at all.
+        """
+        # Get confidence score from LLM
+        response = self.llm(prompt, max_new_tokens=10)[0]['generated_text']
+        # Extract numeric confidence score
+        try:
+            # Try to find a number in the response
+            import re
+            numbers = re.findall(r'\b([1-9]|10)\b', response)
+            if numbers:
+                llm_confidence = float(numbers[0]) / 10.0  # Normalize to 0-1
+            else:
+                llm_confidence = 0.5  # Default neutral value
+        except:
+            llm_confidence = 0.5  # Default neutral value
+        return llm_confidence
+    def get_confidence_level(self, confidence_score):
+        """
+        Convert numerical confidence score to a level (high, medium, low)
+        Args:
+            confidence_score: Float between 0 and 1
+        Returns:
+            str: Confidence level ("high", "medium", or "low")
+        """
+        if confidence_score >= self.confidence_thresholds["high"]:
+            return "high"
+        elif confidence_score >= self.confidence_thresholds["medium"]:
+            return "medium"
+        elif confidence_score >= self.confidence_thresholds["low"]:
+            return "low"
+        else:
+            return "very low"
+    def apply_input_guardrail(self, query):
+        """Check if query violates input guardrails"""
+        query_lower = query.lower()
+        for pattern in self.guardrail_patterns:
+            if pattern in query_lower:
+                return True, f"I cannot process queries about {pattern}. Please reformulate your question."
+        return False, ""
+    def retrieve_with_reranking(self, query, top_k=10, rerank_top_k=5):
+        """Retrieve relevant chunks and rerank them with cross-encoder"""
+        # Initial retrieval using embedding similarity
+        docs_and_scores = self.vector_store.similarity_search_with_score(query, k=top_k)
+         # Sort retrieved documents by FAISS similarity score (deterministic sorting)
+        docs_and_scores.sort(key=lambda x: x[1], reverse=True)
+        # Prepare pairs for cross-encoder
+        pairs = [(query, doc.page_content) for doc, _ in docs_and_scores]
+        # Get scores from cross-encoder
+        scores = self.cross_encoder.predict(pairs)
+        # Sort by cross-encoder scores
+        reranked_results = sorted(zip(docs_and_scores, scores), key=lambda x: x[1], reverse=True)
+        # Return the top reranked results
+        return [doc for (doc, _), _ in reranked_results[:rerank_top_k]]
+    def is_financial_question(self,query):
+        financial_keywords = [
+            "finance", "financial", "revenue", "profit", "loss", "EBITDA", "cash flow",
+            "balance sheet", "income statement", "stock", "bond", "investment", "risk",
+            "interest rate", "inflation", "debt", "equity", "valuation", "dividend",
+            "market", "economy", "GDP", "currency", "exchange rate", "tax", "audit",
+            "compliance", "regulation", "SEC", "earnings", "capital", "asset", "liability"
+        ]
+        query_lower = query.lower()
+        return any(keyword in query_lower for keyword in financial_keywords)
+    def answer_question(self, query):
+        """End-to-end pipeline to answer a question with confidence score"""
+        if not self.is_financial_question(query):
+            return {
+                "answer": "This question is outside the scope of financial data. Please ask a question related to finance.",
+                "source_documents": [],
+                "blocked": True,
+                "confidence_score": 0,
+                "confidence_level": "none"
+            }
+        # Apply input guardrail
+        blocked, message = self.apply_input_guardrail(query)
+        if blocked:
+            return {"answer": message, "source_documents": [], "blocked": True, "confidence_score": 0, "confidence_level": "none"}
+        # Retrieve and rerank relevant contexts
+        reranked_docs = self.retrieve_with_reranking(query)
+        # Generate answer
+        answer = self.generate_answer(query, reranked_docs)
+        # Calculate confidence score
+        confidence_score = self.calculate_confidence_score(query, reranked_docs, answer)
+        confidence_level = self.get_confidence_level(confidence_score)
+        return {
+            "answer": answer,
+            "source_documents": reranked_docs,
+            "blocked": False,
+            "confidence_score": confidence_score,
+            "confidence_level": confidence_level
+        }

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+from RagWithConfidenceScore import RagWithScore  #
+# Initialize the RAG system
+rag_system = RagWithScore()
+# Load or create the vector store
+rag_system.load_and_process_documents()
+# Define the function to handle user queries
+def answer_financial_query(query):
+    # Use the RAG system to answer the question
+    result = rag_system.answer_question(query)
+    # Format the output
+    answer = result["answer"]
+    confidence_score = result["confidence_score"]
+    confidence_level = result["confidence_level"]
+    sources = "\n\n".join([doc.page_content for doc in result["source_documents"]])
+    return answer, f"{confidence_score:.2f}", confidence_level, sources
+    # Return the results
+    # return {
+    #     "Answer": answer,
+    #     "Confidence Score": f"{confidence_score:.2f}",
+    #     "Confidence Level": confidence_level,
+    #     "Source Documents": sources
+    # }
+# Create a Gradio interface
+interface = gr.Interface(
+    fn=answer_financial_query,  # Function to call
+    inputs=gr.Textbox(lines=2, placeholder="Enter your financial query here..."),  # Input component
+    outputs=[  # Output components
+        gr.Textbox(label="Answer"),
+        gr.Textbox(label="Confidence Score"),
+        gr.Textbox(label="Confidence Level")
+        # gr.Textbox(label="Source Documents", lines=10)
+    ],
+    title="Financial RAG System",
+    description="Ask questions about financial data and get answers powered by Retrieval-Augmented Generation (RAG).",
+    examples=[
+        ["What is the current revenue growth rate?"],
+        ["Explain the concept of EBITDA."],
+        ["What are the key financial risks mentioned in the report?"]
+    ]
+)
+# Launch the interface
+interface.launch()

financial_docs/JPMorgan Chase Bank, N.A. 2024 Annual Consolidated Financial Statements - Final.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+pandas
+transformers
+sentence-transformers
+scikit-learn
+langchain
+langchain-community
+faiss-cpu
+accelerate>=0.26.0
+unstructured
+unstructured[pdf]
+langchain_huggingface
+gradio