Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """app | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1nQCqeHSZ0ZKPv9Kw2wdR9hrIeUz7TQK1 | |
| %%capture | |
| %pip install gradio PyMuPDF python-docx langchain langchain-community chromadb huggingface_hub langchain-groq langchain-core langchain-text-splitters | |
| """ | |
| import gradio as gr | |
| import os | |
| import uuid | |
| import re | |
| import fitz # PyMuPDF for PDFs | |
| import docx # python-docx for Word files | |
| from langchain_groq import ChatGroq | |
| from langchain_core.messages import HumanMessage | |
| from langchain_chroma import Chroma | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_core.documents import Document | |
| # Set API Key (Ensure it's stored securely in an environment variable) | |
| groq_api_key = os.getenv("GROQ_API_KEY") | |
| # Initialize Embeddings and ChromaDB | |
| try: | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| except ImportError: | |
| # Fallback if sentence-transformers is not available | |
| print("sentence-transformers not available, trying alternative model...") | |
| embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") | |
| vectorstore = Chroma(embedding_function=embedding_model) | |
| # Initialize Embeddings and ChromaDB | |
| #embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| #vectorstore = Chroma(embedding_function=embedding_model) | |
| # Short-term memory for the LLM | |
| chat_memory = [] | |
| # Enhanced Resume Prompt with Attentive Reasoning Query (ARQ) | |
| resume_prompt_aqr = """ | |
| You are a professional resume generator. Your task is to create a customized resume STRICTLY based on the provided resume text and job scope. | |
| JOB SCOPE: {job_scope} | |
| RESUME TEXT: {resume_text} | |
| [ATTENTION: SOURCE_GROUNDING_PHASE] | |
| FIRST, carefully analyze the original resume text and identify ALL available information: | |
| - Extract personal details (name, contact info, location) | |
| - Identify ALL work experiences (companies, positions, dates, responsibilities) | |
| - Extract ALL education details (degrees, institutions, dates, certifications) | |
| - List ALL technical skills, tools, and technologies mentioned | |
| - Note ALL projects, achievements, and quantifiable results | |
| - Identify any gaps or missing information | |
| [ATTENTION: JOB_ALIGNMENT_PHASE] | |
| SECOND, analyze the job scope requirements: | |
| - Map required skills to candidate's actual skills from resume | |
| - Identify experience gaps between job requirements and candidate background | |
| - Note which qualifications directly match and which need creative framing | |
| - DO NOT invent qualifications that don't exist in the resume | |
| [ATTENTION: CONTENT_VALIDATION_PHASE] | |
| THIRD, for each section you plan to include, verify source evidence: | |
| - Personal Info: Must exactly match resume text | |
| - Experience: Each job must be in original resume with correct dates | |
| - Education: Each degree/certification must be in original resume | |
| - Skills: Each skill must be explicitly mentioned in resume | |
| - Achievements: Must be derived from quantifiable results in resume | |
| [ATTENTION: RESUME_CONSTRUCTION_PHASE] | |
| FOURTH, construct the resume following this structure. FOR EACH SECTION, explicitly note your source evidence: | |
| Name and Contact Information | |
| [Source: Personal details from resume lines X-X] | |
| Professional Title | |
| [Source: Most relevant role based on job scope and experience] | |
| Summary | |
| [Source: Synthesized from overall experience, skills, and achievements] | |
| Core Competencies | |
| [Source: Direct skills extraction from resume] | |
| Professional Experience | |
| [For each position: Source from specific resume sections] | |
| Education & Certifications | |
| [Source: Direct extraction from education section] | |
| Technical Skills | |
| [Source: Comprehensive list from skills mentioned] | |
| Notable Achievements | |
| [Source: Quantifiable results from experience section] | |
| Projects & AI Innovations | |
| [Source: Project descriptions from resume] | |
| [ATTENTION: HALLUCINATION_PREVENTION] | |
| CRITICAL RULES: | |
| 1. NEVER invent companies, positions, or dates not in resume | |
| 2. NEVER add skills, technologies, or tools not mentioned | |
| 3. NEVER create fictional projects or achievements | |
| 4. If information is missing, acknowledge gaps rather than inventing | |
| 5. Use qualifying language ("exposed to", "familiar with") for borderline cases | |
| 6. Mark inferences clearly vs direct facts | |
| FINAL OUTPUT: Generate the customized resume below: | |
| """ | |
| # Function to clean AI response by removing unwanted formatting | |
| def clean_response(response): | |
| """Removes <think> tags, asterisks, and markdown formatting.""" | |
| cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL) | |
| cleaned_text = re.sub(r"(\*\*|\*|\[|\])", "", cleaned_text) | |
| cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE) | |
| cleaned_text = re.sub(r"\\", "", cleaned_text) | |
| cleaned_text = re.sub(r"---", "", cleaned_text) | |
| cleaned_text = re.sub(r"\[Source:.*?\]", "", cleaned_text) # Remove source markers from final output | |
| return cleaned_text.strip() | |
| # Enhanced function with ARQl for resume generation | |
| def generate_resume_with_aqr(job_scope, resume_text, temperature): | |
| # Initialize Chat Model with lower temperature for more factual output | |
| chat_model = ChatGroq( | |
| model_name="meta-llama/llama-4-scout-17b-16e-instruct", | |
| api_key=groq_api_key, | |
| temperature=min(temperature, 0.8) # Cap temperature for factual tasks | |
| ) | |
| prompt = resume_prompt_aqr.format(job_scope=job_scope, resume_text=resume_text) | |
| response = chat_model.invoke([HumanMessage(content=prompt)]) | |
| cleaned_response = clean_response(response.content) | |
| return cleaned_response | |
| # Function to extract text from PDF with line numbering for source tracking | |
| def extract_text_from_pdf(pdf_path): | |
| try: | |
| doc = fitz.open(pdf_path) | |
| text_lines = [] | |
| for page_num, page in enumerate(doc): | |
| page_text = page.get_text("text") | |
| lines = page_text.split('\n') | |
| for i, line in enumerate(lines): | |
| if line.strip(): # Only include non-empty lines | |
| text_lines.append(f"[P{page_num+1}L{i+1}] {line.strip()}") | |
| return "\n".join(text_lines) if text_lines else "No extractable text found." | |
| except Exception as e: | |
| return f"Error extracting text from PDF: {str(e)}" | |
| # Function to extract text from Word files with paragraph numbering | |
| def extract_text_from_docx(docx_path): | |
| try: | |
| doc = docx.Document(docx_path) | |
| text_lines = [] | |
| for para_num, paragraph in enumerate(doc.paragraphs): | |
| if paragraph.text.strip(): | |
| text_lines.append(f"[Para{para_num+1}] {paragraph.text.strip()}") | |
| return "\n".join(text_lines) if text_lines else "No extractable text found." | |
| except Exception as e: | |
| return f"Error extracting text from Word document: {str(e)}" | |
| # Function to process documents safely | |
| def process_document(file): | |
| try: | |
| file_extension = os.path.splitext(file.name)[-1].lower() | |
| if file_extension == ".pdf": | |
| content = extract_text_from_pdf(file.name) | |
| elif file_extension == ".docx": | |
| content =extract_text_from_docx(file.name) | |
| else: | |
| return "Error: Unsupported file type. Please upload a PDF or DOCX file." | |
| return content | |
| except Exception as e: | |
| return f"Error processing document: {str(e)}" | |
| # Function to perform hallucination check on generated resume | |
| def check_hallucinations(original_text, generated_resume, job_scope): | |
| """Use a separate LLM call to verify factual accuracy""" | |
| verification_prompt = f""" | |
| ORIGINAL RESUME TEXT: | |
| {original_text} | |
| GENERATED RESUME: | |
| {generated_resume} | |
| JOB SCOPE: | |
| {job_scope} | |
| [ATTENTION: FACT_VERIFICATION] | |
| Analyze the generated resume and identify ANY information that cannot be directly verified in the original resume text. | |
| Check for: | |
| 1. Personal details not in original (name, contact, etc.) | |
| 2. Companies, positions, or employment dates not mentioned | |
| 3. Education credentials not listed in original | |
| 4. Skills, tools, or technologies not explicitly stated | |
| 5. Projects, achievements, or quantifiable results not present | |
| 6. Any other invented information | |
| [ATTENTION: VERIFICATION_REPORT] | |
| Provide a concise report: | |
| - Number of potential hallucinations found | |
| - Specific examples of unsupported claims | |
| - Overall accuracy rating (1-10) | |
| - Recommendations for improvement | |
| """ | |
| verification_model = ChatGroq( | |
| model_name="meta-llama/llama-4-scout-17b-16e-instruct", | |
| api_key=groq_api_key, | |
| temperature=0.1 # Very low temperature for factual verification | |
| ) | |
| response = verification_model.invoke([HumanMessage(content=verification_prompt)]) | |
| return response.content | |
| # Enhanced function to handle resume customization with ARQ and verification | |
| def customize_resume_with_verification(job_scope, resume_file, temperature, enable_verification=True): | |
| # Extract and process resume text | |
| resume_text = process_document(resume_file) | |
| if "Error" in resume_text: | |
| return resume_text, "Verification skipped due to document error." | |
| # Generate resume using ARQ | |
| generated_resume = generate_resume_with_aqr(job_scope, resume_text, temperature) | |
| # Perform hallucination verification if enabled | |
| verification_report = "" | |
| if enable_verification: | |
| verification_report = check_hallucinations(resume_text, generated_resume, job_scope) | |
| return generated_resume, verification_report | |
| # Enhanced Gradio Interface with Verification (FIXED) | |
| def resume_customizer(): | |
| with gr.Blocks() as app: | |
| gr.Markdown("# π AI Resume Customizer with Attentive Reasoning") | |
| gr.Markdown("Generate hallucination-free customized resumes using Attentive Reasoning Query") | |
| with gr.Row(): | |
| with gr.Column(): | |
| job_scope_input = gr.Textbox( | |
| label="Enter Job Scope or Requirement", | |
| placeholder="e.g., Business Analyst with AI/ML focus", | |
| info="Be specific about required skills and experience" | |
| ) | |
| resume_input = gr.File( | |
| label="Upload Resume (PDF or DOCX)", | |
| file_types=[".pdf", ".docx"] | |
| ) | |
| gr.Markdown("**Upload your original resume for customization**") | |
| temperature_slider = gr.Slider( | |
| label="Creativity Control (Lower = More Factual)", | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.5, | |
| step=0.1, | |
| info="0.1-0.5: Highly factual, 0.6-1.0: Balanced, 1.1-1.5: Creative" | |
| ) | |
| verification_checkbox = gr.Checkbox( | |
| label="Enable Hallucination Verification", | |
| value=True, | |
| info="Additional check for factual accuracy" | |
| ) | |
| generate_btn = gr.Button("Generate Verified Resume", variant="primary") | |
| with gr.Column(): | |
| resume_output = gr.Textbox( | |
| label="Customized Resume (Attentive Reasoning Generated)", | |
| lines=15, | |
| info="Resume generated with attentive reasoning to prevent hallucinations" | |
| ) | |
| verification_output = gr.Textbox( | |
| label="Hallucination Verification Report", | |
| lines=8, | |
| info="Detailed analysis of factual accuracy" | |
| ) | |
| # Examples section | |
| with gr.Accordion("π Example Job Scopes for Testing", open=False): | |
| gr.Markdown(""" | |
| **Business Analyst (AI/ML Focus):** | |
| ``` | |
| Seeking Business Analyst with 5+ years experience in AI/ML projects, | |
| proficiency in Python, SQL, and data analysis tools. Experience with | |
| machine learning model deployment and stakeholder management. | |
| ``` | |
| **Data Scientist:** | |
| ``` | |
| Data Scientist role requiring expertise in statistical analysis, | |
| machine learning algorithms, and big data technologies. Experience | |
| with TensorFlow/PyTorch and cloud platforms preferred. | |
| ``` | |
| **AI Engineer:** | |
| ``` | |
| AI Engineer position focusing on developing and deploying machine | |
| learning models. Required skills: Python, ML frameworks, MLOps, | |
| and experience with LLM applications. | |
| ``` | |
| """) | |
| generate_btn.click( | |
| customize_resume_with_verification, | |
| inputs=[job_scope_input, resume_input, temperature_slider, verification_checkbox], | |
| outputs=[resume_output, verification_output] | |
| ) | |
| gr.Markdown(""" | |
| ### π‘οΈ How Attentive Reasoning Reduces Hallucinations: | |
| **1. Source Grounding**: Every claim is traced back to original resume text | |
| **2. Multi-Phase Validation**: Systematic checking before content generation | |
| **3. Explicit Evidence Tracking**: Source references for all information | |
| **4. Gap Acknowledgment**: Missing information is noted rather than invented | |
| **5. Verification Layer**: Optional second LLM call for factual accuracy check | |
| **Expected Hallucination Reduction**: 70-85% compared to standard prompting | |
| """) | |
| app.launch(share=True) | |
| # Launch the Enhanced Resume Customizer | |
| if __name__ == "__main__": | |
| resume_customizer() |