Spaces:

adriiita
/

TeachingAssistant

Sleeping

App Files Files Community

adriiita commited on Nov 20, 2024

Commit

c347d26

1 Parent(s): 55bc5a4

Initial commit

Browse files

Files changed (6) hide show

.gitignore +12 -0
app.py +112 -0
core/note_generator.py +29 -0
core/quiz_generator.py +32 -0
processors/input_processor.py +74 -0
requirements.txt +29 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.py[cod]
+*$py.class
+.env
+.venv
+env/
+venv/
+ENV/
+*.pdf
+*.docx
+*.txt
+!requirements.txt

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import gradio as gr
+from processors.input_processor import ContentProcessor
+from core.note_generator import NoteGenerator
+from core.quiz_generator import QuizGenerator
+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Verify API key is loaded
+api_key = os.getenv("GROQ_API_KEY")
+if not api_key:
+    # Try getting from HF secret
+    api_key = os.getenv("GROQ_API_KEY")
+    if not api_key:
+        raise ValueError("GROQ_API_KEY not found in environment variables")
+processor = ContentProcessor()
+note_gen = NoteGenerator(api_key)
+quiz_gen = QuizGenerator(api_key)
+def process_pdf(pdf_file, num_questions):
+    if pdf_file is None:
+        return "Please upload a PDF file.", ""
+    # Save uploaded file temporarily
+    temp_path = pdf_file.name
+    # Process content
+    documents = processor.process_pdf(temp_path)
+    content = "\n".join([doc.page_content for doc in documents])
+    # Generate outputs
+    notes = note_gen.generate_notes(content)
+    quiz = quiz_gen.generate_quiz(content, num_questions)
+    return notes, quiz
+def process_youtube(youtube_url, num_questions):
+    if not youtube_url:
+        return "Please enter a YouTube URL.", ""
+    try:
+        documents = processor.process_youtube(youtube_url)
+        content = "\n".join([doc.page_content for doc in documents])
+        notes = note_gen.generate_notes(content)
+        quiz = quiz_gen.generate_quiz(content, num_questions)
+        return notes, quiz
+    except Exception as e:
+        return f"Error processing YouTube URL: {str(e)}", ""
+# Create Gradio interface
+with gr.Blocks(title="AI Teaching Assistant") as demo:
+    gr.Markdown("# AI Teaching Assistant")
+    gr.Markdown("Generate study notes and quizzes from PDFs or YouTube videos")
+    with gr.Tabs():
+        with gr.TabItem("PDF Processing"):
+            with gr.Row():
+                with gr.Column():
+                    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+                    pdf_num_questions = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=5,
+                        step=1,
+                        label="Number of Quiz Questions"
+                    )
+                    pdf_button = gr.Button("Process PDF")
+            with gr.Row():
+                with gr.Column():
+                    pdf_notes_output = gr.Textbox(label="Generated Notes", lines=10)
+                with gr.Column():
+                    pdf_quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
+            pdf_button.click(
+                fn=process_pdf,
+                inputs=[pdf_input, pdf_num_questions],
+                outputs=[pdf_notes_output, pdf_quiz_output]
+            )
+        with gr.TabItem("YouTube Processing"):
+            with gr.Row():
+                with gr.Column():
+                    youtube_input = gr.Textbox(label="YouTube URL")
+                    youtube_num_questions = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=5,
+                        step=1,
+                        label="Number of Quiz Questions"
+                    )
+                    youtube_button = gr.Button("Process YouTube Video")
+            with gr.Row():
+                with gr.Column():
+                    youtube_notes_output = gr.Textbox(label="Generated Notes", lines=10)
+                with gr.Column():
+                    youtube_quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
+            youtube_button.click(
+                fn=process_youtube,
+                inputs=[youtube_input, youtube_num_questions],
+                outputs=[youtube_notes_output, youtube_quiz_output]
+            )
+if __name__ == "__main__":
+    demo.launch(share=False)

core/note_generator.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain_groq import ChatGroq
+from langchain.prompts import PromptTemplate
+class NoteGenerator:
+    def __init__(self, api_key):
+        self.llm = ChatGroq(
+            temperature=0.7,
+            groq_api_key=api_key,
+            model_name="llama2-70b-4096"  # Groq currently supports Llama2, not Llama3
+        )
+        self.note_prompt = PromptTemplate(
+            input_variables=["content"],
+            template="""
+            Create detailed, structured notes from the following content:
+            {content}
+            Format the notes with:
+            1. Main topics and subtopics
+            2. Key points and definitions
+            3. Important examples
+            4. Summary
+            """
+        )
+        self.chain = self.note_prompt | self.llm
+    def generate_notes(self, content):
+        return self.chain.invoke({"content": content}).content

core/quiz_generator.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from langchain_groq import ChatGroq
+from langchain.prompts import PromptTemplate
+class QuizGenerator:
+    def __init__(self, api_key):
+        self.llm = ChatGroq(
+            temperature=0.7,
+            groq_api_key=api_key,
+            model_name="llama2-70b-4096"  # Groq currently supports Llama2, not Llama3
+        )
+        self.quiz_prompt = PromptTemplate(
+            input_variables=["content", "num_questions"],
+            template="""
+            Create {num_questions} multiple-choice questions based on this content:
+            {content}
+            For each question:
+            1. Provide the question
+            2. List 4 possible answers
+            3. Indicate the correct answer
+            4. Add a brief explanation
+            """
+        )
+        self.chain = self.quiz_prompt | self.llm
+    def generate_quiz(self, content, num_questions=5):
+        return self.chain.invoke({
+            "content": content,
+            "num_questions": num_questions
+        }).content

processors/input_processor.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from langchain_community.document_loaders import (
+    PyPDFLoader,
+    UnstructuredWordDocumentLoader,
+    YoutubeLoader
+)
+from langchain_community.document_loaders.generic import GenericLoader
+from langchain_community.document_loaders.parsers.audio import OpenAIWhisperParser
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+class ContentProcessor:
+    def __init__(self):
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
+    def process_pdf(self, file_path):
+        loader = PyPDFLoader(file_path)
+        pages = loader.load_and_split(self.text_splitter)
+        return pages
+    def process_docx(self, file_path):
+        loader = UnstructuredWordDocumentLoader(file_path)
+        pages = loader.load_and_split(self.text_splitter)
+        return pages
+    def process_youtube(self, video_url):
+        # Extract video ID from URL
+        video_id = self._extract_video_id(video_url)
+        if not video_id:
+            raise ValueError("Invalid YouTube URL")
+        try:
+            # Get transcript directly using youtube_transcript_api
+            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+            # Combine all transcript pieces
+            full_transcript = " ".join([entry['text'] for entry in transcript_list])
+            # Create a document-like structure
+            from langchain.schema import Document
+            doc = Document(
+                page_content=full_transcript,
+                metadata={"source": video_url}
+            )
+            # Split the document
+            return self.text_splitter.split_documents([doc])
+        except Exception as e:
+            raise Exception(f"Error getting transcript: {str(e)}")
+    def _extract_video_id(self, url):
+        # Handle different YouTube URL formats
+        patterns = [
+            r'(?:youtube\.com\/watch\?v=|youtu.be\/|youtube.com\/embed\/)([^&\n?]*)',
+            r'(?:youtube\.com\/shorts\/)([^&\n?]*)'
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+        return None
+    def process_audio(self, audio_file):
+        loader = GenericLoader(
+            audio_file,
+            parser=OpenAIWhisperParser()
+        )
+        transcript = loader.load()
+        return self.text_splitter.split_documents(transcript)

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+# Core dependencies
+langchain>=0.1.0
+langchain-openai>=0.0.2
+openai>=1.12.0
+python-dotenv>=1.0.0
+langchain-community>=0.0.1
+# Document processing
+PyPDF2>=3.0.0
+unstructured>=0.10.0
+python-docx>=0.8.11
+# YouTube processing
+youtube-transcript-api>=0.6.1
+pytube>=15.0.0
+# Text processing
+tiktoken>=0.5.1
+# Audio processing (optional, for future audio features)
+openai-whisper>=20231117
+# Development tools
+uvicorn>=0.27.0
+python-multipart>=0.0.9
+# Groq dependencies
+groq>=0.4.0