File size: 7,980 Bytes
f9618f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# app.py
import gradio as gr
import requests
import json
import os
from retrive_docs import load_faiss_index_and_metadata, retrieve_relevant_chunks ,print_results

# --- CONFIGURATION ---
INDEX_PATH = "code_faiss.index"
METADATA_PATH = "code_metadata.json"
CHUNKS_JSON_PATH = "code_chunks.json"
EMBEDDING_MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
# TOP_K has been removed from here and is now a user input

# --- SYSTEM PROMPT ---
# This prompt is crucial for guiding the LLM's behavior.
SYSTEM_PROMPT = """

You are an expert software developer and technical analyst. Your task is to help a user understand a codebase and debug potential issues.



You have been provided with a user's question and a set of the most relevant code chunks retrieved from the codebase based on their query.



Your mission is to synthesize this information and provide a clear, accurate, and helpful response.



Follow these instructions carefully:

1.  **Analyze the Goal:** First, understand the user's primary goal. Are they reporting a bug, asking for an explanation, or trying to understand how something works?

2.  **Base Your Answer on Provided Context:** Your primary source of truth is the retrieved code chunks. Ground your entire analysis in the code provided. Do not invent functionality or assume the existence of code that is not present in the context.

3.  **Directly Address the Query:** Directly answer the user's question. If the context contains a definitive answer (e.g., a warning message about a known bug), state it clearly and quote the relevant code.

4.  **Synthesize and Hypothesize:** If the answer is not immediately obvious, synthesize information from multiple chunks. Form a hypothesis about the cause of the bug or the functionality in question, explaining your reasoning by referencing specific lines of code.

5.  **Provide Actionable Recommendations:** Conclude with clear, actionable advice. This could be a suggested code change, a command to run, or a recommendation to avoid a specific feature based on the evidence in the code.

6.  **Acknowledge Limitations:** If the provided code chunks are insufficient to fully answer the question, state this clearly. Explain what additional information would be needed.

7.  **Structure Your Response:** Format your response using Markdown for readability. Use code blocks for code snippets and bold text to highlight key findings.

8. **show output reference at the end:** to keep a trust show the source where you get the information from, like if it included in line .. or code ...  if available only in the context. 

"""

# --- LOAD DATA ON STARTUP ---
print("--- Initializing Application ---")
# Check if all required files exist before launching the UI
if not all(os.path.exists(p) for p in [INDEX_PATH, METADATA_PATH, CHUNKS_JSON_PATH]):
    print("ERROR: One or more required data files are missing.")
    print("Please make sure 'code_faiss.index', 'code_metadata.json', and 'code_chunks.json' are in the same directory.")
    # Gradio doesn't have a clean way to exit, so we'll show an error in the UI
    index, metadata, chunks_dict = None, None, None
else:
    index, metadata, chunks_dict = load_faiss_index_and_metadata(
        index_path=INDEX_PATH,
        metadata_path=METADATA_PATH,
        chunks_json_path=CHUNKS_JSON_PATH
    )
print("--- Initialization Complete ---")


def get_expert_analysis(api_key, api_url, llm_model_name, top_k, user_query):
    """

    The main function that orchestrates the RAG pipeline.

    """
    if not all([api_key, api_url, llm_model_name, user_query]):
        return "Error: API Key, API URL, Model Name, and Question are all required."

    if index is None:
        return "Error: FAISS index and data could not be loaded. Please check the console for errors and restart."

    # 1. RETRIEVAL: Get relevant code chunks
    print("\n--- Starting Retrieval ---")
    retrieved_results = retrieve_relevant_chunks(
        query=user_query,
        model_name=EMBEDDING_MODEL_NAME,
        index=index,
        metadata=metadata,
        chunks_dict=chunks_dict,
        top_k=top_k # Use the value from the UI
    )

    if not retrieved_results:
        return "Could not find any relevant code chunks for your query. Please try rephrasing it."

    context_str = print_results(retrieved_results)
    
    print("--- Starting Generation ---")
    final_user_prompt = f"""

{context_str}



--- User's Question ---

{user_query}



--- Analysis and Answer ---

Based on the provided code context, here is the analysis of your question:

"""

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    payload = {
        "model": llm_model_name,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": final_user_prompt}
        ]
    }

    try:
        print(f"Sending request to LLM: {llm_model_name} at {api_url}")
        response = requests.post(api_url, headers=headers, data=json.dumps(payload))
        response.raise_for_status()
        
        response_json = response.json()
        llm_answer = response_json['choices'][0]['message']['content']
        print("--- Generation Complete ---")
        
        full_response = f"## Expert Analysis\n\n{llm_answer}\n\n---\n\n### Retrieved Context\n\nThis analysis was based on the following retrieved code chunks:\n\n{context_str}"
        return full_response

    except requests.exceptions.RequestException as e:
        print(f"Error calling LLM API: {e}")
        return f"Error: Failed to connect to the LLM API. Please check your API URL, API key, and network connection.\n\nDetails: {e}"
    except (KeyError, IndexError) as e:
        print(f"Error parsing LLM response: {e}")
        return f"Error: Received an unexpected response from the LLM API. Please check the model name and try again.\n\nResponse: {response.text}"


# --- GRADIO UI ---
with gr.Blocks(theme=gr.themes.Soft(), title="RAG Code Assistant") as demo:
    gr.Markdown("# RAG-Powered Code Assistant")
    gr.Markdown("This tool uses a local code database (FAISS) and a Large Language Model (LLM) to answer questions about your codebase.")

    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(
                label="API Key",
                type="password",
                placeholder="Enter your API key here"
            )
            api_url_input = gr.Textbox(
                label="API Endpoint URL",
                value="https://openrouter.ai/api/v1/chat/completions",
                placeholder="Enter the chat completions endpoint URL"
            )
            llm_model_input = gr.Textbox(
                label="LLM Model Name",
                value="moonshotai/kimi-k2:free",
                placeholder="e.g., moonshotai/kimi-k2:free"
            )
            # New Dropdown for Top-K selection
            top_k_input = gr.Dropdown(
                label="Number of Chunks to Retrieve (Top K)",
                choices=[5, 6, 7, 8, 9, 10],
                value=10,
            )
            user_query_input = gr.Textbox(
                label="Your Question / Bug Report",
                lines=8,
                placeholder="e.g., 'When I use cache=True, my RAM usage explodes. Why?'"
            )
            submit_button = gr.Button("Get Analysis", variant="primary")

        with gr.Column(scale=2):
            gr.Markdown("## Analysis Result")
            output_text = gr.Markdown()

    # Update the inputs list for the click event
    submit_button.click(
        fn=get_expert_analysis,
        inputs=[api_key_input, api_url_input, llm_model_input, top_k_input, user_query_input],
        outputs=output_text
    )



if __name__ == "__main__":
    demo.launch(share=True)