import os
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.runnables import RunnablePassthrough
from langchain.schema.runnable import RunnableLambda

import schemas
import prompts
from prompts import (
    raw_prompt,
    raw_prompt_formatted,
    format_context,
    history_prompt_formatted,
    standalone_prompt_formatted,
    rag_prompt_formatted
)
from data_indexing import DataIndexer 
from transformers import AutoTokenizer

data_indexer = DataIndexer()

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

# llm = HuggingFaceEndpoint(
#     # repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
#     repo_id="deepseek-ai/DeepSeek-R1-0528",
#     huggingfacehub_api_token=os.environ['HF_TOKEN'],
#     max_new_tokens=512,
#     stop_sequences=["<|eot_id|>"],
#     streaming=True,
#     # task="conversational", 
#     task="text-generation",
#     # provider='novita',
#     # temperature=0.7,
# )
# llm = HuggingFaceEndpoint(
#     repo_id="deepseek-ai/DeepSeek-R1-0528",
#     max_new_tokens=512,
#     temperature=0.5,
#     huggingfacehub_api_token=os.environ['HF_TOKEN'],
#     provider="auto",
#     )

llm_endpoint = HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    task="text-generation",
    max_new_tokens=100,
    streaming=True,
    do_sample=False,
    temperature=0.9,
    repetition_penalty=1.03,
    provider="auto", 
)

llm = ChatHuggingFace(llm=llm_endpoint)

def print_and_pass(prompt_output):
    print("=" * 60)
    print("🔍 RAW PROMPT FORMATTED:")
    print("=" * 60)
    print(prompt_output)
    print("=" * 60)
    return prompt_output  # IMPORTANT: Must return the prompt unchanged

simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)


# TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.
formatted_chain = (raw_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.UserQuestion)

# TODO: use history_prompt_formatted and HistoryInput to create the history_chain
history_chain = (history_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.HistoryInput)

# TODO: Let's construct the standalone_chain by piping standalone_prompt_formatted with the LLM
standalone_chain = (standalone_prompt_formatted | llm).with_types(input_type=schemas.HistoryInput)

# summarize_chain = (summarize_propt_formatted | llm) 

import ast

def extract_definitions(source_code):
    """
    Extract top-level function and class definitions from Python code.
    """
    result = []
    try:
        tree = ast.parse(source_code)
        for node in ast.iter_child_nodes(tree):
            if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
                snippet = ast.get_source_segment(source_code, node)
                if snippet:
                    result.append(snippet)
    except Exception as e:
        print(f"Failed to parse code: {e}")
    return result

import re

def clean_code_text(code_text):
    """
    Remove comments and excessive blank lines for brevity.
    """
    # Remove multiline docstrings and comments
    code_text = re.sub(r'"""(.*?)"""', '', code_text, flags=re.DOTALL)
    code_text = re.sub(r"'''(.*?)'''", '', code_text, flags=re.DOTALL)
    
    # Remove inline comments
    code_text = re.sub(r'#.*', '', code_text)
    
    # Remove excessive whitespace
    code_text = re.sub(r'\n\s*\n+', '\n\n', code_text)
    
    return code_text.strip()


def safe_format_context(search_results):
    try:
        cleaned_results = []
        for result in search_results:
            if isinstance(result, str):
                # Optionally: extract relevant functions/classes
                code_parts = extract_definitions(result)
                for part in code_parts:
                    cleaned = clean_code_text(part)
                    cleaned_results.append(cleaned)
        return format_context(cleaned_results)
    except Exception as e:
        print(f"Error formatting context: {str(e)}")
    return "No relevant context found."


input_1 = RunnablePassthrough.assign(new_question=standalone_chain)

# input_1_beta = RunnablePassThrough.assign(new_context=summarize_chain)

def extract_question_text(new_question):
    if hasattr(new_question, "content"):
        return new_question.content
    return str(new_question)

# summarize_context = {
#     'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']))),
#     'standalone_question': lambda x: extract_question_text(x['new_question']),
# }

input_2 = {
    'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']))),
    'standalone_question': lambda x: extract_question_text(x['new_question']),
}

input_to_rag_chain = input_1 | input_2

# TODO: use input_to_rag_chain, rag_prompt_formatted, 
# HistoryInput and the LLM to build the rag_chain.
rag_chain = (input_to_rag_chain | RunnableLambda(print_and_pass) | rag_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.RagInput)

# TODO:  Implement the filtered_rag_chain. It should be the 
# same as the rag_chain but with hybrid_search = True.

input_2_hybrid_search = {
    'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']), hybrid_search=True)),
    'standalone_question': lambda x: x['new_question']
}

filtered_rag_chain = (input_1 | input_2_hybrid_search | rag_prompt_formatted | RunnableLambda(print_and_pass)| llm ).with_types(input_type=schemas.RagInput)