Spaces:

deenasun
/

ai-sl-api

Running

App Files Files Community

deenasun commited on Jun 22

Commit

dbca390

1 Parent(s): 288f6d2

set up app.py with fully integrated text to ASL video flow

Browse files

Files changed (21) hide show

.gitignore +2 -0
.gradio/certificate.pem +31 -0
__pycache__/asl_gloss.cpython-311.pyc +0 -0
__pycache__/asl_glosser.cpython-311.pyc +0 -0
__pycache__/document_parser.cpython-311.pyc +0 -0
__pycache__/document_parsing.cpython-311.pyc +0 -0
__pycache__/document_parsing.cpython-313.pyc +0 -0
__pycache__/document_to_gloss.cpython-311.pyc +0 -0
__pycache__/document_to_gloss.cpython-313.pyc +0 -0
__pycache__/vectorizer.cpython-311.pyc +0 -0
__pycache__/video_gen.cpython-311.pyc +0 -0
__pycache__/word2vec.cpython-311.pyc +0 -0
app.py +173 -4
asl_gloss.py +323 -0
document_parsing.py +263 -0
document_to_gloss.py +280 -0
requirements.txt +16 -0
test.py +85 -0
vectorizer.py +155 -0
video_gen.py +562 -0
videos_rows.csv +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .DS_Store
2	+ .env

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

__pycache__/asl_gloss.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

__pycache__/asl_glosser.cpython-311.pyc ADDED Viewed

Binary file (12.1 kB). View file

__pycache__/document_parser.cpython-311.pyc ADDED Viewed

Binary file (12.4 kB). View file

__pycache__/document_parsing.cpython-311.pyc ADDED Viewed

Binary file (12.4 kB). View file

__pycache__/document_parsing.cpython-313.pyc ADDED Viewed

Binary file (10.6 kB). View file

__pycache__/document_to_gloss.cpython-311.pyc ADDED Viewed

Binary file (12.1 kB). View file

__pycache__/document_to_gloss.cpython-313.pyc ADDED Viewed

Binary file (10.6 kB). View file

__pycache__/vectorizer.cpython-311.pyc ADDED Viewed

Binary file (7.07 kB). View file

__pycache__/video_gen.cpython-311.pyc ADDED Viewed

Binary file (29.8 kB). View file

__pycache__/word2vec.cpython-311.pyc ADDED Viewed

Binary file (7.56 kB). View file

app.py CHANGED Viewed

@@ -1,7 +1,176 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+from document_to_gloss import DocumentToASLConverter
+from vectorizer import Vectorizer
+from video_gen import create_multi_stitched_video
 import gradio as gr
+import asyncio
+import re
+import boto3
+import os
+from botocore.config import Config
+from dotenv import load_dotenv
+import requests
+import tempfile
+# Load environment variables from .env file
+load_dotenv()
+# Load R2/S3 environment secrets
+R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
+R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
+R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
+# Validate that required environment variables are set
+if not all([R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
+    raise ValueError("Missing required R2 environment variables. Please check your .env file.")
+title = "AI-SL"
+description = "Convert text to ASL!"
+article = ("<p style='text-align: center'><a href='https://github.com/deenasun' "
+           "target='_blank'>Deena Sun on Github</a></p>")
+inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
+outputs = [
+    gr.JSON(label="Processing Results"),
+    gr.Video(label="ASL Video Output")
+]
+asl_converter = DocumentToASLConverter()
+vectorizer = Vectorizer()
+session = boto3.session.Session()
+s3 = session.client(
+    service_name='s3',
+    region_name='auto',
+    endpoint_url=R2_ENDPOINT,
+    aws_access_key_id=R2_ACCESS_KEY_ID,
+    aws_secret_access_key=R2_SECRET_ACCESS_KEY,
+    config=Config(signature_version='s3v4')
+)
+def clean_gloss_token(token):
+    """
+    Clean a gloss token by removing brackets, newlines, and extra whitespace
+    """
+    # Remove brackets and newlines
+    cleaned = re.sub(r'[\[\]\n\r]', '', token)
+    # Remove extra whitespace
+    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    cleaned = cleaned.lower()
+    return cleaned
+async def parse_vectorize_and_search(file):
+    print(file)
+    gloss = asl_converter.convert_document(file)
+    print("ASL", gloss)
+    # Split by spaces and clean each token
+    gloss_tokens = gloss.split()
+    cleaned_tokens = []
+    for token in gloss_tokens:
+        cleaned = clean_gloss_token(token)
+        if cleaned:  # Only add non-empty tokens
+            cleaned_tokens.append(cleaned)
+    print("Cleaned tokens:", cleaned_tokens)
+    videos = []
+    video_files = []  # Store local file paths for stitching
+    for g in cleaned_tokens:
+        print(f"Processing {g}")
+        try:
+            result = await vectorizer.vector_query_from_supabase(query=g)
+            print("result", result)
+            if result.get("match", False):
+                video_url = result["video_url"]
+                videos.append(video_url)
+                # Download the video
+                local_path = download_video_from_url(video_url)
+                if local_path:
+                    video_files.append(local_path)
+        except Exception as e:
+            print(f"Error processing {g}: {e}")
+            continue
+    # Create stitched video if we have multiple videos
+    stitched_video_path = None
+    if len(video_files) > 1:
+        try:
+            print(f"Creating stitched video from {len(video_files)} videos...")
+            stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+            create_multi_stitched_video(video_files, stitched_video_path)
+            print(f"Stitched video created: {stitched_video_path}")
+        except Exception as e:
+            print(f"Error creating stitched video: {e}")
+            stitched_video_path = None
+    elif len(video_files) == 1:
+        # If only one video, just use it directly
+        stitched_video_path = video_files[0]
+    # Clean up individual video files after stitching
+    for video_file in video_files:
+        if video_file != stitched_video_path:  # Don't delete the final output
+            cleanup_temp_video(video_file)
+    return {
+        "status": "success",
+        "videos": videos,
+        "video_count": len(videos),
+        "gloss": gloss,
+        "cleaned_tokens": cleaned_tokens
+    }, stitched_video_path
+# Create a synchronous wrapper for Gradio
+def parse_vectorize_and_search_sync(file):
+    return asyncio.run(parse_vectorize_and_search(file))
+def download_video_from_url(video_url):
+    """
+    Download a video from a public R2 URL
+    Returns the local file path where the video is saved
+    """
+    try:
+        # Create a temporary file with .mp4 extension
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+        temp_path = temp_file.name
+        temp_file.close()
+        # Download the video
+        print(f"Downloading video from: {video_url}")
+        response = requests.get(video_url, stream=True)
+        response.raise_for_status()
+        # Save to temporary file
+        with open(temp_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Video downloaded to: {temp_path}")
+        return temp_path
+    except Exception as e:
+        print(f"Error downloading video: {e}")
+        return None
+def cleanup_temp_video(file_path):
+    """
+    Clean up temporary video file
+    """
+    try:
+        if file_path and os.path.exists(file_path):
+            os.unlink(file_path)
+            print(f"Cleaned up: {file_path}")
+    except Exception as e:
+        print(f"Error cleaning up file: {e}")
+intf = gr.Interface(
+    fn=parse_vectorize_and_search_sync,
+    inputs=inputs,
+    outputs=outputs,
+    title=title,
+    description=description,
+    article=article
+)
+intf.launch(share=True)

asl_gloss.py ADDED Viewed

	@@ -0,0 +1,323 @@

+#!/usr/bin/env python3
+"""
+ASL Gloss Converter using Claude API
+This script converts English text input and convert it to ASL gloss format.
+ASL gloss is a written representation of sign language
+that preserves the spatial and grammatical structure of ASL.
+"""
+import os
+import sys
+import argparse
+import json
+from typing import Optional, Dict, Any
+from pathlib import Path
+try:
+    import anthropic
+except ImportError:
+    print("Error: anthropic package not found. Please install it with:")
+    print("pip install anthropic")
+    sys.exit(1)
+class ASLGlossConverter:
+    """
+    Converts English text to ASL gloss using Claude's API.
+    ASL gloss preserves the spatial and grammatical structure of American Sign Language,
+    including features like:
+    - Topic-comment structure
+    - Spatial referencing
+    - Non-manual markers (facial expressions, head movements)
+    - Classifier predicates
+    - Time indicators
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the ASL gloss converter.
+        Args:
+            api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
+        """
+        self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
+        if not self.api_key:
+            raise ValueError(
+                "API key not provided. Set ANTHROPIC_API_KEY environment variable "
+                "or pass api_key parameter."
+            )
+        self.client = anthropic.Anthropic(api_key=self.api_key)
+        # System prompt that defines ASL gloss conversion rules
+        self.system_prompt = """You are an expert in American Sign Language (ASL) and ASL gloss. Your task is to convert English text to ASL gloss format with bracketed phrases.
+ASL GLOSS RULES:
+1. Use ALL CAPS for all signs
+2. Group related words/concepts into bracketed phrases [PHRASE]
+3. Use underscores (_) to connect words within a phrase that are signed together
+4. Use classifiers (CL:1, CL:3, CL:C, etc.) for spatial relationships
+5. Use pronouns: I (first person), YOU (second person), HE/SHE/THEY (third person)
+6. Use time indicators: PAST, FUTURE, NOW, ALWAYS, NEVER
+7. Use topic-comment structure: TOPIC COMMENT
+8. Use rhetorical questions: RHQ
+9. Use conditional markers: IF-THEN
+10. Use negation: NOT, NONE, CAN'T, DON'T-WANT
+11. Use aspect markers: FINISH, CONTINUE, REPEAT
+12. Use directional verbs: GIVE-TO, TELL-TO, ASK-TO
+13. Use location markers: HERE, THERE, WHERE
+14. Use manner adverbs: FAST, SLOW, CAREFUL, HARD
+PHRASE GROUPING GUIDELINES:
+- Group compound expressions: [GOOD_MORNING], [THANK_YOU], [HOW_ARE_YOU]
+- Keep names as single phrases: [JOHN], [NATALIIA], [CHRISTOPHER_ROBIN]
+- Group related concepts: [MY_NAME], [YOUR_HOUSE], [LAST_WEEK]
+- Keep simple words separate: [I] [LOVE] [YOU]
+IMPORTANT: Output ONLY the bracketed ASL phrases. Each phrase should be in ALL CAPS with underscores connecting related words.
+EXAMPLES:
+- "Good morning, Brian" → [GOOD_MORNING] [BRIAN]
+- "My name is Nataliia" → [I] [NAME] [NATALIIA]
+- "I love you" → [I] [LOVE] [YOU]
+- "What is your name?" → [YOU] [NAME] [WHAT]
+- "I don't understand" → [I] [UNDERSTAND] [NOT]
+- "Where is the bathroom?" → [BATHROOM] [WHERE]
+- "I want to go home" → [I] [WANT] [GO] [HOME]
+- "The cat is sleeping" → [CAT] [SLEEP]
+- "I finished my homework" → [I] [HOMEWORK] [FINISH]
+- "Do you want coffee?" → [YOU] [WANT] [COFFEE]
+- "I can't hear you" → [I] [HEAR] [YOU] [CAN'T]
+- "The weather is nice today" → [TODAY] [WEATHER] [NICE]
+- "Thank you very much" → [THANK_YOU] [VERY_MUCH]
+- "How are you doing?" → [HOW_ARE_YOU] [DOING]
+- "See you later" → [SEE_YOU_LATER]
+- "I work at Google" → [I] [WORK] [GOOGLE]
+Convert the given English text to proper ASL gloss format with bracketed phrases, maintaining the meaning and intent while following ASL grammar and structure."""
+    def convert_text(self, english_text: str) -> str:
+        """
+        Convert English text to ASL gloss using Anthropic v1.x messages API.
+        """
+        try:
+            message = self.client.messages.create(
+                model="claude-3-5-sonnet-20240620",
+                max_tokens=1000,
+                system=self.system_prompt,
+                messages=[
+                    {"role": "user", "content": f"Convert this English text to ASL gloss:\n\n{english_text}"}
+                ]
+            )
+            return message.content[0].text.strip()
+        except Exception as e:
+            raise Exception(f"Error converting text to ASL gloss: {str(e)}")
+    def convert_file(self, input_file: str, output_file: Optional[str] = None) -> str:
+        """
+        Convert text from a file to ASL gloss.
+        Args:
+            input_file: Path to input text file
+            output_file: Path to output file (optional)
+        Returns:
+            The ASL gloss text
+        """
+        try:
+            # Read input file
+            with open(input_file, 'r', encoding='utf-8') as f:
+                english_text = f.read().strip()
+            if not english_text:
+                raise ValueError("Input file is empty")
+            # Convert to ASL gloss
+            asl_gloss = self.convert_text(english_text)
+            # Write to output file if specified
+            if output_file:
+                with open(output_file, 'w', encoding='utf-8') as f:
+                    f.write(asl_gloss)
+                print(f"ASL gloss saved to: {output_file}")
+            return asl_gloss
+        except FileNotFoundError:
+            raise Exception(f"Input file not found: {input_file}")
+        except Exception as e:
+            raise Exception(f"Error processing file: {str(e)}")
+    def batch_convert(self, input_files: list, output_dir: Optional[str] = None) -> Dict[str, str]:
+        """
+        Convert multiple files to ASL gloss.
+        Args:
+            input_files: List of input file paths
+            output_dir: Directory to save output files (optional)
+        Returns:
+            Dictionary mapping input files to their ASL gloss
+        """
+        results = {}
+        for input_file in input_files:
+            try:
+                print(f"Converting: {input_file}")
+                if output_dir:
+                    # Create output filename
+                    input_path = Path(input_file)
+                    output_filename = f"{input_path.stem}_asl_gloss{input_path.suffix}"
+                    output_file = Path(output_dir) / output_filename
+                else:
+                    output_file = None
+                asl_gloss = self.convert_file(input_file, str(output_file) if output_file else None)
+                results[input_file] = asl_gloss
+                print(f"✓ Completed: {input_file}")
+            except Exception as e:
+                print(f"✗ Error processing {input_file}: {str(e)}")
+                results[input_file] = f"ERROR: {str(e)}"
+        return results
+def main():
+    """Main function for command-line usage."""
+    parser = argparse.ArgumentParser(
+        description="Convert English text to ASL gloss using Claude's API",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Convert text directly
+  python asl_gloss.py "Hello, how are you?"
+  # Convert from file
+  python asl_gloss.py -f input.txt
+  # Convert from file with output
+  python asl_gloss.py -f input.txt -o output.txt
+  # Batch convert multiple files
+  python asl_gloss.py -b file1.txt file2.txt -d output_dir/
+  # Interactive mode
+  python asl_gloss.py -i
+        """
+    )
+    parser.add_argument(
+        'text',
+        nargs='?',
+        help='English text to convert to ASL gloss'
+    )
+    parser.add_argument(
+        '-f', '--file',
+        help='Input file containing English text'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file for ASL gloss'
+    )
+    parser.add_argument(
+        '-b', '--batch',
+        nargs='+',
+        help='Batch convert multiple files'
+    )
+    parser.add_argument(
+        '-d', '--output-dir',
+        help='Output directory for batch conversion'
+    )
+    parser.add_argument(
+        '-i', '--interactive',
+        action='store_true',
+        help='Run in interactive mode'
+    )
+    parser.add_argument(
+        '--api-key',
+        help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
+    )
+    args = parser.parse_args()
+    try:
+        # Initialize converter
+        converter = ASLGlossConverter(api_key=args.api_key)
+        if args.interactive:
+            print("ASL Gloss Converter - Interactive Mode")
+            print("Enter English text to convert to ASL gloss (or 'quit' to exit):")
+            print("-" * 50)
+            while True:
+                try:
+                    text = input("\nEnglish text: ").strip()
+                    if text.lower() in ['quit', 'exit', 'q']:
+                        break
+                    if not text:
+                        continue
+                    print("Converting...")
+                    asl_gloss = converter.convert_text(text)
+                    print(f"ASL Gloss: {asl_gloss}")
+                except KeyboardInterrupt:
+                    print("\nExiting...")
+                    break
+                except Exception as e:
+                    print(f"Error: {str(e)}")
+        elif args.batch:
+            if not args.batch:
+                print("Error: No files specified for batch conversion")
+                return 1
+            print(f"Batch converting {len(args.batch)} files...")
+            results = converter.batch_convert(args.batch, args.output_dir)
+            print("\nResults:")
+            for input_file, result in results.items():
+                print(f"\n{input_file}:")
+                print(result)
+        elif args.file:
+            asl_gloss = converter.convert_file(args.file, args.output)
+            if not args.output:
+                print("ASL Gloss:")
+                print(asl_gloss)
+        elif args.text:
+            asl_gloss = converter.convert_text(args.text)
+            print("ASL Gloss:")
+            print(asl_gloss)
+            if args.output:
+                with open(args.output, 'w', encoding='utf-8') as f:
+                    f.write(asl_gloss)
+                print(f"\nSaved to: {args.output}")
+        else:
+            parser.print_help()
+            return 1
+        return 0
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

document_parsing.py ADDED Viewed

	@@ -0,0 +1,263 @@

+import os
+import sys
+from pathlib import Path
+from typing import Optional, Union
+import logging
+# Import document parsing libraries
+try:
+    import PyPDF2
+    from docx import Document
+    import ebooklib
+    from ebooklib import epub
+    from bs4 import BeautifulSoup
+except ImportError as e:
+    print(f"Missing required dependency: {e}")
+    print("Please install dependencies with: pip install -r requirements.txt")
+    sys.exit(1)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class DocumentParser:
+    """
+    A class to parse and extract text from various document formats.
+    Supports PDF, TXT, DOC, DOCX, and EPUB files.
+    """
+    def __init__(self):
+        self.supported_formats = {
+            'application/pdf': self._parse_pdf,
+            'text/plain': self._parse_txt,
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._parse_docx,
+            'application/msword': self._parse_doc,
+            'application/epub+zip': self._parse_epub
+        }
+    def get_file_type(self, file_path: Union[str, Path]) -> str:
+        """
+        Detect the MIME type of a file using file extension.
+        Args:
+            file_path: Path to the file
+        Returns:
+            MIME type string
+        """
+        return self._get_mime_from_extension(file_path)
+    def _get_mime_from_extension(self, file_path: Union[str, Path]) -> str:
+        """
+        Determine MIME type from file extension.
+        Args:
+            file_path: Path to the file
+        Returns:
+            MIME type string
+        """
+        extension = Path(file_path).suffix.lower()
+        extension_map = {
+            '.pdf': 'application/pdf',
+            '.txt': 'text/plain',
+            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            '.doc': 'application/msword',
+            '.epub': 'application/epub+zip'
+        }
+        return extension_map.get(extension, 'unknown')
+    def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
+        """
+        Extract text from a document file.
+        Args:
+            file_path: Path to the document file
+        Returns:
+            Extracted text as string, or None if extraction fails
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            logger.error(f"File not found: {file_path}")
+            return None
+        try:
+            mime_type = self.get_file_type(file_path)
+            logger.info(f"Detected file type: {mime_type}")
+            if mime_type in self.supported_formats:
+                return self.supported_formats[mime_type](file_path)
+            else:
+                logger.error(f"Unsupported file type: {mime_type}")
+                return None
+        except Exception as e:
+            logger.error(f"Error extracting text from {file_path}: {e}")
+            return None
+    def _parse_pdf(self, file_path: Path) -> str:
+        """
+        Extract text from PDF file.
+        Args:
+            file_path: Path to PDF file
+        Returns:
+            Extracted text
+        """
+        text = ""
+        try:
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                for page_num in range(len(pdf_reader.pages)):
+                    page = pdf_reader.pages[page_num]
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+        except Exception as e:
+            logger.error(f"Error parsing PDF {file_path}: {e}")
+            raise
+        return text.strip()
+    def _parse_txt(self, file_path: Path) -> str:
+        """
+        Extract text from plain text file.
+        Args:
+            file_path: Path to text file
+        Returns:
+            Extracted text
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                return file.read()
+        except UnicodeDecodeError:
+            # Try with different encoding
+            try:
+                with open(file_path, 'r', encoding='latin-1') as file:
+                    return file.read()
+            except Exception as e:
+                logger.error(f"Error reading text file {file_path}: {e}")
+                raise
+        except Exception as e:
+            logger.error(f"Error reading text file {file_path}: {e}")
+            raise
+    def _parse_docx(self, file_path: Path) -> str:
+        """
+        Extract text from DOCX file.
+        Args:
+            file_path: Path to DOCX file
+        Returns:
+            Extracted text
+        """
+        try:
+            doc = Document(file_path)
+            text = ""
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + "\n"
+            return text.strip()
+        except Exception as e:
+            logger.error(f"Error parsing DOCX {file_path}: {e}")
+            raise
+    def _parse_doc(self, file_path: Path) -> str:
+        """
+        Extract text from DOC file (legacy Word format).
+        Note: This requires additional dependencies like antiword or catdoc.
+        Args:
+            file_path: Path to DOC file
+        Returns:
+            Extracted text
+        """
+        try:
+            # Try using antiword if available
+            import subprocess
+            result = subprocess.run(['antiword', str(file_path)],
+                                  capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+            # Fallback: try catdoc
+            result = subprocess.run(['catdoc', str(file_path)],
+                                  capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+            raise Exception("Neither antiword nor catdoc found. Please install one of them.")
+        except FileNotFoundError:
+            raise Exception("antiword or catdoc not found. Please install one of them for DOC file support.")
+        except Exception as e:
+            logger.error(f"Error parsing DOC {file_path}: {e}")
+            raise
+    def _parse_epub(self, file_path: Path) -> str:
+        """
+        Extract text from EPUB file.
+        Args:
+            file_path: Path to EPUB file
+        Returns:
+            Extracted text
+        """
+        try:
+            book = epub.read_epub(file_path)
+            text = ""
+            for item in book.get_items():
+                if item.get_type() == ebooklib.ITEM_DOCUMENT:
+                    content = item.get_content().decode('utf-8')
+                    soup = BeautifulSoup(content, 'html.parser')
+                    text += soup.get_text() + "\n"
+            return text.strip()
+        except Exception as e:
+            logger.error(f"Error parsing EPUB {file_path}: {e}")
+            raise
+def main():
+    """
+    Main function to demonstrate usage of the DocumentParser.
+    """
+    if len(sys.argv) != 2:
+        print("Usage: python document_parsing.py <file_path>")
+        print("Supported formats: PDF, TXT, DOC, DOCX, EPUB")
+        sys.exit(1)
+    file_path = sys.argv[1]
+    parser = DocumentParser()
+    print(f"Extracting text from: {file_path}")
+    print("-" * 50)
+    extracted_text = parser.extract_text(file_path)
+    if extracted_text:
+        print("Extracted text:")
+        print(extracted_text)
+        print(f"\nTotal characters: {len(extracted_text)}")
+    else:
+        print("Failed to extract text from the file.")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

document_to_gloss.py ADDED Viewed

	@@ -0,0 +1,280 @@

+#!/usr/bin/env python3
+"""
+Document to ASL Gloss Converter
+This script combines document parsing and ASL glossing to convert
+uploaded documents (PDF, TXT, DOC, DOCX, EPUB) directly to ASL gloss format.
+"""
+import os
+import sys
+import argparse
+from typing import Optional, Dict, Any
+from pathlib import Path
+# Import our existing modules
+from document_parsing import DocumentParser
+from asl_gloss import ASLGlossConverter
+class DocumentToASLConverter:
+    """
+    Combines document parsing and ASL glossing functionality.
+    Extracts text from various document formats and converts to ASL gloss.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the document to ASL converter.
+        Args:
+            api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
+        """
+        self.document_parser = DocumentParser()
+        self.asl_converter = ASLGlossConverter(api_key=api_key)
+    def convert_document(self, document_path: str, output_file: Optional[str] = None) -> str:
+        """
+        Convert a document file to ASL gloss.
+        Args:
+            document_path: Path to the document file
+            output_file: Path to output file (optional)
+        Returns:
+            The ASL gloss text
+        """
+        try:
+            print(f"Processing document: {document_path}")
+            # Step 1: Extract text from document
+            print("Step 1: Extracting text from document...")
+            extracted_text = self.document_parser.extract_text(document_path)
+            if not extracted_text:
+                raise Exception("Failed to extract text from document")
+            print(f"✓ Extracted {len(extracted_text)} characters")
+            # Step 2: Convert text to ASL gloss
+            print("Step 2: Converting to ASL gloss...")
+            asl_gloss = self.asl_converter.convert_text(extracted_text)
+            print("✓ ASL gloss conversion completed")
+            # Step 3: Save to output file if specified
+            if output_file:
+                with open(output_file, 'w', encoding='utf-8') as f:
+                    f.write(asl_gloss)
+                print(f"✓ ASL gloss saved to: {output_file}")
+            return asl_gloss
+        except Exception as e:
+            raise Exception(f"Error processing document: {str(e)}")
+    def batch_convert_documents(self, document_paths: list, output_dir: Optional[str] = None) -> Dict[str, str]:
+        """
+        Convert multiple documents to ASL gloss.
+        Args:
+            document_paths: List of document file paths
+            output_dir: Directory to save output files (optional)
+        Returns:
+            Dictionary mapping input files to their ASL gloss
+        """
+        results = {}
+        for document_path in document_paths:
+            try:
+                print(f"\n{'='*50}")
+                print(f"Converting: {document_path}")
+                print(f"{'='*50}")
+                if output_dir:
+                    # Create output filename
+                    input_path = Path(document_path)
+                    output_filename = f"{input_path.stem}_asl_gloss.txt"
+                    output_file = Path(output_dir) / output_filename
+                else:
+                    output_file = None
+                asl_gloss = self.convert_document(document_path, str(output_file) if output_file else None)
+                results[document_path] = asl_gloss
+                print(f"✓ Completed: {document_path}")
+            except Exception as e:
+                print(f"✗ Error processing {document_path}: {str(e)}")
+                results[document_path] = f"ERROR: {str(e)}"
+        return results
+    def get_supported_formats(self) -> list:
+        """
+        Get list of supported document formats.
+        Returns:
+            List of supported file extensions
+        """
+        return ['.pdf', '.txt', '.docx', '.doc', '.epub']
+def main():
+    """Main function for command-line usage."""
+    parser = argparse.ArgumentParser(
+        description="Convert documents to ASL gloss using Claude's API",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Convert a single document
+  python document_to_asl.py document.pdf
+  # Convert document with output file
+  python document_to_asl.py document.pdf -o output.txt
+  # Batch convert multiple documents
+  python document_to_asl.py -b doc1.pdf doc2.docx doc3.txt -d output_dir/
+  # Interactive mode
+  python document_to_asl.py -i
+  # Show supported formats
+  python document_to_asl.py --formats
+        """
+    )
+    parser.add_argument(
+        'document',
+        nargs='?',
+        help='Document file to convert to ASL gloss'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file for ASL gloss'
+    )
+    parser.add_argument(
+        '-b', '--batch',
+        nargs='+',
+        help='Batch convert multiple documents'
+    )
+    parser.add_argument(
+        '-d', '--output-dir',
+        help='Output directory for batch conversion'
+    )
+    parser.add_argument(
+        '-i', '--interactive',
+        action='store_true',
+        help='Run in interactive mode'
+    )
+    parser.add_argument(
+        '--formats',
+        action='store_true',
+        help='Show supported document formats'
+    )
+    parser.add_argument(
+        '--api-key',
+        help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
+    )
+    args = parser.parse_args()
+    try:
+        # Initialize converter
+        converter = DocumentToASLConverter(api_key=args.api_key)
+        if args.formats:
+            print("Supported Document Formats:")
+            print("=" * 30)
+            formats = converter.get_supported_formats()
+            for fmt in formats:
+                print(f"  • {fmt}")
+            print("\nExamples: .pdf, .txt, .docx, .doc, .epub")
+            return 0
+        if args.interactive:
+            print("Document to ASL Gloss Converter - Interactive Mode")
+            print("Enter document file paths to convert (or 'quit' to exit):")
+            print("-" * 60)
+            while True:
+                try:
+                    doc_path = input("\nDocument path: ").strip()
+                    if doc_path.lower() in ['quit', 'exit', 'q']:
+                        break
+                    if not doc_path:
+                        continue
+                    if not os.path.exists(doc_path):
+                        print(f"Error: File not found: {doc_path}")
+                        continue
+                    # Ask for output file
+                    output_file = input("Output file (optional, press Enter to skip): ").strip()
+                    if not output_file:
+                        output_file = None
+                    print("Converting...")
+                    asl_gloss = converter.convert_document(doc_path, output_file)
+                    if not output_file:
+                        print("\nASL Gloss:")
+                        print("-" * 20)
+                        print(asl_gloss)
+                except KeyboardInterrupt:
+                    print("\nExiting...")
+                    break
+                except Exception as e:
+                    print(f"Error: {str(e)}")
+        elif args.batch:
+            if not args.batch:
+                print("Error: No documents specified for batch conversion")
+                return 1
+            print(f"Batch converting {len(args.batch)} documents...")
+            results = converter.batch_convert_documents(args.batch, args.output_dir)
+            print("\n" + "="*60)
+            print("BATCH CONVERSION RESULTS")
+            print("="*60)
+            for doc_path, result in results.items():
+                print(f"\nDocument: {doc_path}")
+                print("-" * 40)
+                if result.startswith("ERROR:"):
+                    print(f"❌ {result}")
+                else:
+                    print("✅ Conversion successful")
+                    if not args.output_dir:
+                        print("ASL Gloss:")
+                        print(result[:500] + "..." if len(result) > 500 else result)
+        elif args.document:
+            asl_gloss = converter.convert_document(args.document, args.output)
+            if not args.output:
+                print("\nASL Gloss:")
+                print("-" * 20)
+                print(asl_gloss)
+        else:
+            parser.print_help()
+            return 1
+        return 0
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+anthropic==0.54.0
+beautifulsoup4==4.13.4
+boto3==1.38.41
+botocore==1.38.41
+ebooklib==0.19
+gensim==4.3.3
+gradio==5.34.2
+mediapipe==0.10.21
+numpy<2.0
+opencv-python
+pandas<2.0
+PyPDF2==3.0.1
+python-dotenv==1.1.0
+python-docx
+requests==2.32.4
+supabase==2.15.3

test.py ADDED Viewed

	@@ -0,0 +1,85 @@

+#!/usr/bin/env python3
+"""
+Test script to verify the fixes for the ASL gloss processing
+"""
+import asyncio
+import re
+from vectorizer import Vectorizer
+def clean_gloss_token(token):
+    """
+    Clean a gloss token by removing brackets, newlines, and extra whitespace
+    """
+    # Remove brackets and newlines
+    cleaned = re.sub(r'[\[\]\n\r]', '', token)
+    # Remove extra whitespace
+    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    return cleaned.lower()
+def test_gloss_parsing():
+    """Test the gloss parsing functionality"""
+    # Sample gloss output from the notebook
+    sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] "
+                    "[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n"
+                    "[PROBLEM] [SOLVE] [FINISH]")
+    print("Original gloss:")
+    print(sample_gloss)
+    print("\n" + "="*50 + "\n")
+    # Split by spaces and clean each token
+    gloss_tokens = sample_gloss.split()
+    cleaned_tokens = []
+    for token in gloss_tokens:
+        cleaned = clean_gloss_token(token)
+        if cleaned:  # Only add non-empty tokens
+            cleaned_tokens.append(cleaned)
+    print("Cleaned tokens:")
+    for i, token in enumerate(cleaned_tokens):
+        print(f"{i+1:2d}. {token}")
+    return cleaned_tokens
+async def test_vectorizer():
+    """Test the vectorizer functionality"""
+    try:
+        vectorizer = Vectorizer()
+        # Test with a simple word that should be in the vocabulary
+        test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"]
+        for word in test_words:
+            print(f"\nTesting word: {word}")
+            result = await vectorizer.vector_query_from_supabase(word)
+            print(f"Result: {result}")
+    except Exception as e:
+        print(f"Error testing vectorizer: {e}")
+async def main():
+    """Main test function"""
+    print("Testing ASL Gloss Processing Fixes")
+    print("=" * 50)
+    # Test 1: Gloss parsing
+    print("\n1. Testing gloss parsing...")
+    cleaned_tokens = test_gloss_parsing()
+    print(f"Total cleaned tokens: {len(cleaned_tokens)}")
+    # Test 2: Vectorizer (if environment is set up)
+    print("\n2. Testing vectorizer...")
+    await test_vectorizer()
+    print("\n" + "=" * 50)
+    print("Test completed!")
+if __name__ == "__main__":
+    asyncio.run(main())

vectorizer.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import gensim
+import gensim.downloader
+import numpy as np
+import pandas as pd
+import os
+from supabase import acreate_client, AsyncClient
+from dotenv import load_dotenv
+class Vectorizer:
+    """
+    A class to:
+        - Generate embeddings of words
+        - Query for words from Supabase database based on vector similarity
+        - Return matching ASL videos for words
+    """
+    def load_kv(self, model_name='word2vec-google-news-300'):
+        """
+        Returns a KeyedVector object loaded from gensim
+        """
+        try:
+            kv = gensim.downloader.load(model_name)  # returns a keyedvector
+            return kv
+        except Exception as e:
+            print(f"Unable to load embedding model from gensim: {e}")
+            return None
+    async def initialize_supabase(self):
+        url: str = os.environ.get("SUPABASE_URL")
+        key: str = os.environ.get("SUPABASE_KEY")
+        supabase: AsyncClient = await acreate_client(url, key)
+        return supabase
+    def __init__(self):
+        load_dotenv()
+        self.kv = self.load_kv()
+        self.supabase = None  # Will be initialized when needed
+    async def ensure_supabase_initialized(self):
+        """Ensure Supabase client is initialized"""
+        if self.supabase is None:
+            self.supabase = await self.initialize_supabase()
+    def encode(self, word):
+        print(f"encoding {word}")
+        if self.kv is not None and word in self.kv.key_to_index:
+            return self.kv[word]
+        else:
+            print(f"Error: {word} is not in the KeyedVector's vocabulary")
+            return None
+    def encode_and_format(self, word):
+        """
+        Apply encoding function to each word.
+        Prettify the encoding to match expected format for Supabase vectors
+        """
+        enc = self.encode(word)
+        return "[" + ",".join(map(str, enc.tolist())) + "]" if enc is not None else None
+    async def vector_query_from_supabase(self, query):
+        try:
+            await self.ensure_supabase_initialized()
+            query_embedding = self.encode(query)
+            if query_embedding is None:
+                return {
+                    "match": False,
+                    "error": f"'{query}' not in vocabulary"
+                }
+            query_embedding = query_embedding.tolist()
+            if self.supabase is not None:
+                result = await self.supabase.rpc(
+                    "match_vector",
+                    {
+                        "query_embedding": query_embedding,
+                        "match_threshold": 0.0,
+                        "match_count": 1
+                    }
+                ).execute()
+                data = result.data
+                if data:
+                    match = data[0]
+                    return {
+                        "match": True,
+                        "query": query,
+                        "matching_word": match["word"],
+                        "video_url": match["video_url"],
+                        "similarity": match["similarity"]
+                    }
+                else:
+                    return {"match": False}
+            else:
+                return {"match": False, "error": "Supabase not initialized"}
+        except Exception as e:
+            print(f"RPC call failed: {e}")
+            return {"match": False, "error": str(e)}
+def load_filtered_kv(model_name='word2vec-google-news-300', vocab=None):
+    """
+    Returns a KeyedVector object whose vocabulary
+    consists of the words in vocab
+    """
+    if vocab is None:
+        vocab = []
+    try:
+        # gensim.downloader.load returns a KeyedVector
+        original_kv = gensim.downloader.load(model_name)
+        if vocab:
+            filtered_key2vec_map = {}
+            for key in vocab:
+                if key in original_kv.key_to_index:
+                    filtered_key2vec_map[key] = original_kv[key]
+            new_kv = gensim.models.KeyedVectors(
+                vector_size=original_kv.vector_size)
+            new_kv.add_vectors(list(filtered_key2vec_map.keys()),
+                               np.array(list(filtered_key2vec_map.values())))
+            return original_kv
+        else:
+            return original_kv
+    except Exception as e:
+        print(f"Unable to load embedding model from gensim: {e}")
+        return None
+async def main():
+    vectorizer = Vectorizer()
+    vector = vectorizer.encode("test")
+    print(vector)
+    result = await vectorizer.vector_query_from_supabase("dog")
+    print(result)
+    result = await vectorizer.vector_query_from_supabase("cat")
+    print(result)
+    # read word list
+    # df = pd.read_csv('videos_rows.csv')
+    # # Add embeddings column - apply encode to each word
+    # df['embedding'] = df['word'].apply(vectorizer.encode_and_format)
+    # # Drop any rows that don't have an embedding
+    # df = df.dropna(subset=['embedding'])
+    # print(df.head())
+    # df.to_csv("vectors.csv", index=False, columns=["word", "video_url", "embedding"], header=True)
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

video_gen.py ADDED Viewed

	@@ -0,0 +1,562 @@

+import cv2
+import mediapipe as mp
+import numpy as np
+def extract_keypoints_from_video(video_path, verbose=False):
+    mp_pose = mp.solutions.pose
+    mp_hands = mp.solutions.hands
+    pose_model = mp_pose.Pose()
+    hands_model = mp_hands.Hands(static_image_mode=False, max_num_hands=2)
+    cap = cv2.VideoCapture(video_path)
+    keypoints_sequence = []
+    frame_idx = 0
+    while cap.isOpened():
+        success, frame = cap.read()
+        if not success:
+            break
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        h, w, _ = frame.shape
+        # Pose estimation
+        pose_results = pose_model.process(frame_rgb)
+        if not pose_results.pose_landmarks:
+            frame_idx += 1
+            continue
+        # Extract 33 body keypoints
+        pose_landmarks = pose_results.pose_landmarks.landmark
+        pose = np.array([[lm.x, lm.y] for lm in pose_landmarks])  # shape (33, 2)
+        # Hand tracking
+        left_hand = np.zeros((21, 2))
+        right_hand = np.zeros((21, 2))
+        hand_results = hands_model.process(frame_rgb)
+        if hand_results.multi_hand_landmarks and hand_results.multi_handedness:
+            for hand_landmarks, hand_info in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
+                label = hand_info.classification[0].label  # 'Left' or 'Right'
+                hand_array = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
+                if label == "Left":
+                    left_hand = hand_array
+                else:
+                    right_hand = hand_array
+        keypoints_sequence.append((pose, left_hand, right_hand))
+        if verbose:
+            print(f"Processed frame {frame_idx}")
+        frame_idx += 1
+    cap.release()
+    pose_model.close()
+    hands_model.close()
+    return keypoints_sequence
+def render_person(frame, pose, left_hand, right_hand):
+    h, w = frame.shape[:2]
+    # Define MediaPipe Pose keypoint indices
+    # Face
+    NOSE = 0
+    LEFT_EYE = 2
+    RIGHT_EYE = 5
+    LEFT_EAR = 7
+    RIGHT_EAR = 8
+    # Body
+    LEFT_SHOULDER = 11
+    RIGHT_SHOULDER = 12
+    LEFT_ELBOW = 13
+    RIGHT_ELBOW = 14
+    LEFT_WRIST = 15
+    RIGHT_WRIST = 16
+    LEFT_HIP = 23
+    RIGHT_HIP = 24
+    LEFT_KNEE = 25
+    RIGHT_KNEE = 26
+    LEFT_ANKLE = 27
+    RIGHT_ANKLE = 28
+    # Define hand keypoint indices for MediaPipe Hands
+    # Thumb: 0-4, Index: 5-8, Middle: 9-12, Ring: 13-16, Pinky: 17-20
+    THUMB_TIP = 4
+    INDEX_TIP = 8
+    MIDDLE_TIP = 12
+    RING_TIP = 16
+    PINKY_TIP = 20
+    # Define finger connections
+    finger_connections = [
+        # Thumb
+        (0, 1), (1, 2), (2, 3), (3, 4),
+        # Index finger
+        (0, 5), (5, 6), (6, 7), (7, 8),
+        # Middle finger
+        (0, 9), (9, 10), (10, 11), (11, 12),
+        # Ring finger
+        (0, 13), (13, 14), (14, 15), (15, 16),
+        # Pinky
+        (0, 17), (17, 18), (18, 19), (19, 20)
+    ]
+    # Enhanced friendly color palette
+    skin_color = (173, 216, 230)  # Light brown bear skin
+    outline_color = (40, 40, 40)  # Softer outline
+    shirt_color = (205, 170, 125) # Light blue tuxedo jacket
+    # shirt_color = (205, 170, 125)
+    # skin_color = (173, 216, 230)
+    pants_color = (135, 206, 235) # Slightly darker light blue tuxedo pants
+    bow_tie_color = (255, 255, 255)  # White bow tie
+    eye_color = (255, 255, 255)   # White eyes
+    pupil_color = (0, 0, 0)       # Black pupils
+    # Draw body parts as filled shapes
+    # 1. Head (face) with enhanced friendly styling
+    if len(pose) > max(LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR):
+        # Calculate head center and size
+        head_center_x = pose[NOSE][0] * w
+        head_center_y = pose[NOSE][1] * h
+        # Estimate head size based on face keypoints
+        if pose[LEFT_EYE][0] > 0 and pose[RIGHT_EYE][0] > 0:
+            eye_distance = abs(pose[LEFT_EYE][0] - pose[RIGHT_EYE][0]) * w
+            head_radius = eye_distance * 1.8  # Larger head for friendlier look
+        else:
+            head_radius = 35
+        # Draw bear ears first (behind the head)
+        ear_radius = int(head_radius * 0.4)
+        # Left ear
+        left_ear_x = int(head_center_x - head_radius * 0.6)
+        left_ear_y = int(head_center_y - head_radius * 0.8)
+        cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, skin_color, -1)
+        cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, outline_color, 2)
+        # Inner ear detail
+        cv2.circle(frame, (left_ear_x, left_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
+        # Right ear
+        right_ear_x = int(head_center_x + head_radius * 0.6)
+        right_ear_y = int(head_center_y - head_radius * 0.8)
+        cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, skin_color, -1)
+        cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, outline_color, 2)
+        # Inner ear detail
+        cv2.circle(frame, (right_ear_x, right_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
+        # Draw head with skin color
+        cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), skin_color, -1)
+        cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), outline_color, 2)
+        # Draw larger, cuter bear eyes
+        if pose[LEFT_EYE][0] > 0 and pose[LEFT_EYE][1] > 0:
+            eye_x, eye_y = int(pose[LEFT_EYE][0] * w), int(pose[LEFT_EYE][1] * h)
+            # Larger white eye
+            cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
+            # Larger pupil
+            cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
+            # Eye outline
+            cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
+            # Eye shine
+            cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
+        if pose[RIGHT_EYE][0] > 0 and pose[RIGHT_EYE][1] > 0:
+            eye_x, eye_y = int(pose[RIGHT_EYE][0] * w), int(pose[RIGHT_EYE][1] * h)
+            # Larger white eye
+            cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
+            # Larger pupil
+            cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
+            # Eye outline
+            cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
+            # Eye shine
+            cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
+        # Draw cute bear nose
+        nose_x = int(head_center_x)
+        nose_y = int(head_center_y + head_radius * 0.1)
+        # Draw a cute round nose
+        cv2.circle(frame, (nose_x, nose_y), 6, (80, 40, 20), -1)  # Dark brown nose
+        cv2.circle(frame, (nose_x, nose_y), 6, outline_color, 1)
+        # Draw friendly smile
+        smile_center_x = int(head_center_x)
+        smile_center_y = int(head_center_y + head_radius * 0.3)
+        smile_radius = int(head_radius * 0.6)
+        # Draw smile arc
+        cv2.ellipse(frame, (smile_center_x, smile_center_y), (smile_radius, smile_radius//2),
+                   0, 0, 180, outline_color, 3)
+    # 2. Torso with nice shirt
+    if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_HIP, RIGHT_HIP):
+        # Calculate torso points
+        left_shoulder = (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h))
+        right_shoulder = (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h))
+        left_hip = (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h))
+        right_hip = (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h))
+        # Draw torso as a filled polygon with nice shirt color
+        torso_points = np.array([left_shoulder, right_shoulder, right_hip, left_hip], np.int32)
+        cv2.fillPoly(frame, [torso_points], shirt_color)
+        cv2.polylines(frame, [torso_points], True, outline_color, 2)
+    # 3. Arms with better proportions (non-stick)
+    # Left arm
+    if len(pose) > max(LEFT_SHOULDER, LEFT_ELBOW, LEFT_WRIST):
+        if pose[LEFT_SHOULDER][0] > 0 and pose[LEFT_ELBOW][0] > 0:
+            # Upper arm - 3x thicker and more natural
+            cv2.line(frame,
+                     (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
+                     (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
+                     skin_color, 36)
+            cv2.line(frame,
+                     (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
+                     (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
+                     outline_color, 2)
+            # Lower arm
+            if pose[LEFT_WRIST][0] > 0:
+                cv2.line(frame,
+                         (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
+                         (int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
+                         skin_color, 30)
+                cv2.line(frame,
+                         (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
+                         (int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
+                         outline_color, 2)
+    # Right arm
+    if len(pose) > max(RIGHT_SHOULDER, RIGHT_ELBOW, RIGHT_WRIST):
+        if pose[RIGHT_SHOULDER][0] > 0 and pose[RIGHT_ELBOW][0] > 0:
+            # Upper arm - 3x thicker and more natural
+            cv2.line(frame,
+                     (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
+                     (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
+                     skin_color, 36)
+            cv2.line(frame,
+                     (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
+                     (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
+                     outline_color, 2)
+            # Lower arm
+            if pose[RIGHT_WRIST][0] > 0:
+                cv2.line(frame,
+                         (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
+                         (int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
+                         skin_color, 30)
+                cv2.line(frame,
+                         (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
+                         (int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
+                         outline_color, 2)
+    # 4. Legs with nice pants
+    # Left leg
+    if len(pose) > max(LEFT_HIP, LEFT_KNEE, LEFT_ANKLE):
+        if pose[LEFT_HIP][0] > 0 and pose[LEFT_KNEE][0] > 0:
+            # Upper leg
+            cv2.line(frame,
+                     (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
+                     (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
+                     pants_color, 14)
+            cv2.line(frame,
+                     (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
+                     (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
+                     outline_color, 2)
+            # Lower leg
+            if pose[LEFT_ANKLE][0] > 0:
+                cv2.line(frame,
+                         (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
+                         (int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
+                         pants_color, 12)
+                cv2.line(frame,
+                         (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
+                         (int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
+                         outline_color, 2)
+    # Right leg
+    if len(pose) > max(RIGHT_HIP, RIGHT_KNEE, RIGHT_ANKLE):
+        if pose[RIGHT_HIP][0] > 0 and pose[RIGHT_KNEE][0] > 0:
+            # Upper leg
+            cv2.line(frame,
+                     (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
+                     (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
+                     pants_color, 14)
+            cv2.line(frame,
+                     (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
+                     (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
+                     outline_color, 2)
+            # Lower leg
+            if pose[RIGHT_ANKLE][0] > 0:
+                cv2.line(frame,
+                         (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
+                         (int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
+                         pants_color, 12)
+                cv2.line(frame,
+                         (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
+                         (int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
+                         outline_color, 2)
+    # 1.5. Neck connecting head to shoulders
+    if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER):
+        if pose[LEFT_SHOULDER][0] > 0 and pose[RIGHT_SHOULDER][0] > 0:
+            # Calculate neck position and size
+            neck_center_x = (pose[LEFT_SHOULDER][0] + pose[RIGHT_SHOULDER][0]) / 2 * w
+            neck_center_y = (pose[LEFT_SHOULDER][1] + pose[RIGHT_SHOULDER][1]) / 2 * h
+            # Position neck slightly above shoulders
+            neck_y = neck_center_y - 15
+            # Calculate neck width based on shoulder distance
+            shoulder_distance = abs(pose[LEFT_SHOULDER][0] - pose[RIGHT_SHOULDER][0]) * w
+            neck_width = shoulder_distance * 0.3  # Neck is about 30% of shoulder width
+            neck_height = 25
+            # Draw neck as a filled rectangle with rounded corners effect
+            neck_left = int(neck_center_x - neck_width / 2)
+            neck_right = int(neck_center_x + neck_width / 2)
+            neck_top = int(neck_y - neck_height / 2)
+            neck_bottom = int(neck_y + neck_height / 2)
+            # Draw neck with skin color
+            cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), skin_color, -1)
+            cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), outline_color, 2)
+            # Draw bow tie
+            bow_center_x = int(neck_center_x)
+            bow_center_y = int(neck_y + neck_height / 2 + 5)
+            bow_width = 20
+            bow_height = 12
+            # Draw left side of bow tie
+            left_bow_points = np.array([
+                [bow_center_x - bow_width//2, bow_center_y - bow_height//2],
+                [bow_center_x - bow_width//2 - 8, bow_center_y],
+                [bow_center_x - bow_width//2, bow_center_y + bow_height//2],
+                [bow_center_x - 2, bow_center_y + bow_height//2],
+                [bow_center_x - 2, bow_center_y - bow_height//2]
+            ], np.int32)
+            cv2.fillPoly(frame, [left_bow_points], bow_tie_color)
+            cv2.polylines(frame, [left_bow_points], True, outline_color, 1)
+            # Draw right side of bow tie
+            right_bow_points = np.array([
+                [bow_center_x + bow_width//2, bow_center_y - bow_height//2],
+                [bow_center_x + bow_width//2 + 8, bow_center_y],
+                [bow_center_x + bow_width//2, bow_center_y + bow_height//2],
+                [bow_center_x + 2, bow_center_y + bow_height//2],
+                [bow_center_x + 2, bow_center_y - bow_height//2]
+            ], np.int32)
+            cv2.fillPoly(frame, [right_bow_points], bow_tie_color)
+            cv2.polylines(frame, [right_bow_points], True, outline_color, 1)
+            # Draw center knot of bow tie
+            knot_points = np.array([
+                [bow_center_x - 2, bow_center_y - 3],
+                [bow_center_x + 2, bow_center_y - 3],
+                [bow_center_x + 2, bow_center_y + 3],
+                [bow_center_x - 2, bow_center_y + 3]
+            ], np.int32)
+            cv2.fillPoly(frame, [knot_points], bow_tie_color)
+            cv2.polylines(frame, [knot_points], True, outline_color, 1)
+    # 5. Enhanced Hands with clear finger definition (drawn last to ensure they're always in front)
+    for hand, hand_color in [(left_hand, (255, 0, 0)), (right_hand, (0, 0, 255))]:
+        if np.any(hand != 0):  # Only draw if hand is detected
+            # Draw hand palm as a filled shape
+            palm_points = []
+            # Use wrist and base of fingers for palm
+            palm_indices = [0, 5, 9, 13, 17]  # Wrist and base of each finger
+            for idx in palm_indices:
+                if idx < len(hand) and hand[idx][0] > 0 and hand[idx][1] > 0:
+                    palm_points.append([int(hand[idx][0] * w), int(hand[idx][1] * h)])
+            if len(palm_points) > 3:
+                palm_points = np.array(palm_points, np.int32)
+                hull = cv2.convexHull(palm_points)
+                cv2.fillPoly(frame, [hull], (255, 182, 193))  # Light pink color for palm
+                cv2.polylines(frame, [hull], True, outline_color, 2)
+            # Draw individual fingers with clear connections
+            for connection in finger_connections:
+                start_idx, end_idx = connection
+                if (start_idx < len(hand) and end_idx < len(hand) and
+                    hand[start_idx][0] > 0 and hand[start_idx][1] > 0 and
+                    hand[end_idx][0] > 0 and hand[end_idx][1] > 0):
+                    start_point = (int(hand[start_idx][0] * w), int(hand[start_idx][1] * h))
+                    end_point = (int(hand[end_idx][0] * w), int(hand[end_idx][1] * h))
+                    # Draw finger bone
+                    cv2.line(frame, start_point, end_point, (255, 182, 193), 9)  # Light pink color for finger bones
+                    cv2.line(frame, start_point, end_point, outline_color, 1)
+            # Draw finger tips with emphasis
+            finger_tips = [THUMB_TIP, INDEX_TIP, MIDDLE_TIP, RING_TIP, PINKY_TIP]
+            for tip_idx in finger_tips:
+                if tip_idx < len(hand) and hand[tip_idx][0] > 0 and hand[tip_idx][1] > 0:
+                    tip_x, tip_y = int(hand[tip_idx][0] * w), int(hand[tip_idx][1] * h)
+                    # Draw larger, more visible finger tips
+                    cv2.circle(frame, (tip_x, tip_y), 4, (255, 182, 193), -1)  # Light pink color for finger tips
+                    cv2.circle(frame, (tip_x, tip_y), 4, outline_color, 2)
+                    # Add a small highlight
+                    cv2.circle(frame, (tip_x-1, tip_y-1), 1, (255, 255, 255), -1)
+            # Draw all hand keypoints for clarity (keeping original red/blue colors for dots)
+            for i, (x, y) in enumerate(hand):
+                if x > 0 and y > 0:
+                    point_x, point_y = int(x * w), int(y * h)
+                    # Different colors for different parts of the hand
+                    if i in finger_tips:
+                        cv2.circle(frame, (point_x, point_y), 2, hand_color, -1)
+                    else:
+                        cv2.circle(frame, (point_x, point_y), 1, hand_color, -1)
+    return frame
+def interpolate_keypoints(kptsA, kptsB, steps):
+    poseA, leftA, rightA = kptsA
+    poseB, leftB, rightB = kptsB
+    frames = []
+    for t in range(1, steps + 1):
+        alpha = t / (steps + 1)
+        interp_pose = (1 - alpha) * poseA + alpha * poseB
+        # Check if hands are detected (non-zero coordinates)
+        leftA_detected = np.any(leftA != 0)
+        rightA_detected = np.any(rightA != 0)
+        leftB_detected = np.any(leftB != 0)
+        rightB_detected = np.any(rightB != 0)
+        # Interpolate left hand only if both frames have detected hands
+        if leftA_detected and leftB_detected:
+            print("leftA_detected and leftB_detected")
+            interp_left = (1 - alpha) * leftA + alpha * leftB
+        elif leftA_detected:
+            interp_left = leftA  # Keep the last known position
+        elif leftB_detected:
+            interp_left = leftB  # Use the new position
+        else:
+            interp_left = np.zeros((21, 2))  # No hands detected
+        # Interpolate right hand only if both frames have detected hands
+        if rightA_detected and rightB_detected:
+            print("rightA_detected and rightB_detected")
+            interp_right = (1 - alpha) * rightA + alpha * rightB
+        elif rightA_detected:
+            interp_right = rightA  # Keep the last known position
+        elif rightB_detected:
+            interp_right = rightB  # Use the new position
+        else:
+            interp_right = np.zeros((21, 2))  # No hands detected
+        frames.append((interp_pose, interp_left, interp_right))
+    return frames
+def create_stitched_video(videoA_path, videoB_path, output_path="stitched_output.mp4"):
+    # Extract keypoints from both videos
+    videoA_keypoints = extract_keypoints_from_video(videoA_path)
+    videoB_keypoints = extract_keypoints_from_video(videoB_path)
+    # Create video writer
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
+    # Show original A
+    for pose, l, r in videoA_keypoints:
+        frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
+        out.write(render_person(frame, pose, l, r))
+    # Interpolation
+    interp = interpolate_keypoints(videoA_keypoints[-1], videoB_keypoints[0], steps=15)
+    for pose, l, r in interp:
+        frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
+        out.write(render_person(frame, pose, l, r))
+    # Show original B
+    for pose, l, r in videoB_keypoints:
+        frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
+        out.write(render_person(frame, pose, l, r))
+    out.release()
+    print(f"Video saved to {output_path}")
+def create_multi_stitched_video(video_paths, output_path="multi_stitched_output.mp4", transition_steps=15):
+    """
+    Create a stitched video from multiple video files.
+    Args:
+        video_paths (list): List of paths to MP4 video files
+        output_path (str): Output path for the final video
+        transition_steps (int): Number of frames for transitions between videos
+    """
+    if len(video_paths) < 2:
+        print("Need at least 2 videos to stitch together!")
+        return
+    print(f"Processing {len(video_paths)} videos...")
+    # Extract keypoints from all videos
+    all_keypoints = []
+    for i, video_path in enumerate(video_paths):
+        print(f"Extracting keypoints from video {i+1}/{len(video_paths)}: {video_path}")
+        keypoints = extract_keypoints_from_video(video_path)
+        all_keypoints.append(keypoints)
+        print(f"  - Extracted {len(keypoints)} frames")
+    # Create video writer
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
+    total_frames = 0
+    # Process each video
+    for i, keypoints in enumerate(all_keypoints):
+        print(f"Rendering video {i+1}/{len(all_keypoints)}...")
+        # Render all frames from current video
+        for pose, l, r in keypoints:
+            frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
+            out.write(render_person(frame, pose, l, r))
+            total_frames += 1
+        # Add transition to next video (except for the last video)
+        if i < len(all_keypoints) - 1:
+            print(f"  Adding transition to next video...")
+            next_keypoints = all_keypoints[i + 1]
+            # Interpolate between last frame of current video and first frame of next video
+            interp = interpolate_keypoints(keypoints[-1], next_keypoints[0], steps=transition_steps)
+            for pose, l, r in interp:
+                frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
+                out.write(render_person(frame, pose, l, r))
+                total_frames += 1
+    out.release()
+    print(f"Multi-stitched video saved to {output_path}")
+    print(f"Total frames rendered: {total_frames}")
+    print(f"Video duration: {total_frames/30:.2f} seconds")
+if __name__ == "__main__":
+    # Example usage for multiple videos
+    video_list = [
+        "/Users/ethantam/desktop/35304.mp4",
+        "/Users/ethantam/desktop/23978.mp4",
+        "/Users/ethantam/desktop/23106.mp4",
+        # Add more video paths here as needed
+    ]
+    # Create multi-stitched video
+    create_multi_stitched_video(video_list, "multi_stitched_output_1.mp4")
+    # Or use the original 2-video function
+    # create_stitched_video("/Users/ethantam/desktop/35304.mp4", "/Users/ethantam/desktop/23978.mp4")

videos_rows.csv ADDED Viewed

The diff for this file is too large to render. See raw diff