deenasun commited on
Commit
dbca390
·
1 Parent(s): 288f6d2

set up app.py with fully integrated text to ASL video flow

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ .env
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
__pycache__/asl_gloss.cpython-311.pyc ADDED
Binary file (14.3 kB). View file
 
__pycache__/asl_glosser.cpython-311.pyc ADDED
Binary file (12.1 kB). View file
 
__pycache__/document_parser.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
__pycache__/document_parsing.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
__pycache__/document_parsing.cpython-313.pyc ADDED
Binary file (10.6 kB). View file
 
__pycache__/document_to_gloss.cpython-311.pyc ADDED
Binary file (12.1 kB). View file
 
__pycache__/document_to_gloss.cpython-313.pyc ADDED
Binary file (10.6 kB). View file
 
__pycache__/vectorizer.cpython-311.pyc ADDED
Binary file (7.07 kB). View file
 
__pycache__/video_gen.cpython-311.pyc ADDED
Binary file (29.8 kB). View file
 
__pycache__/word2vec.cpython-311.pyc ADDED
Binary file (7.56 kB). View file
 
app.py CHANGED
@@ -1,7 +1,176 @@
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from document_to_gloss import DocumentToASLConverter
2
+ from vectorizer import Vectorizer
3
+ from video_gen import create_multi_stitched_video
4
  import gradio as gr
5
+ import asyncio
6
+ import re
7
+ import boto3
8
+ import os
9
+ from botocore.config import Config
10
+ from dotenv import load_dotenv
11
+ import requests
12
+ import tempfile
13
 
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
 
17
+ # Load R2/S3 environment secrets
18
+ R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
19
+ R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
20
+ R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
21
+
22
+ # Validate that required environment variables are set
23
+ if not all([R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
24
+ raise ValueError("Missing required R2 environment variables. Please check your .env file.")
25
+
26
+ title = "AI-SL"
27
+ description = "Convert text to ASL!"
28
+ article = ("<p style='text-align: center'><a href='https://github.com/deenasun' "
29
+ "target='_blank'>Deena Sun on Github</a></p>")
30
+ inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
31
+ outputs = [
32
+ gr.JSON(label="Processing Results"),
33
+ gr.Video(label="ASL Video Output")
34
+ ]
35
+
36
+ asl_converter = DocumentToASLConverter()
37
+ vectorizer = Vectorizer()
38
+ session = boto3.session.Session()
39
+
40
+ s3 = session.client(
41
+ service_name='s3',
42
+ region_name='auto',
43
+ endpoint_url=R2_ENDPOINT,
44
+ aws_access_key_id=R2_ACCESS_KEY_ID,
45
+ aws_secret_access_key=R2_SECRET_ACCESS_KEY,
46
+ config=Config(signature_version='s3v4')
47
+ )
48
+
49
+ def clean_gloss_token(token):
50
+ """
51
+ Clean a gloss token by removing brackets, newlines, and extra whitespace
52
+ """
53
+ # Remove brackets and newlines
54
+ cleaned = re.sub(r'[\[\]\n\r]', '', token)
55
+ # Remove extra whitespace
56
+ cleaned = re.sub(r'\s+', ' ', cleaned).strip()
57
+ cleaned = cleaned.lower()
58
+ return cleaned
59
+
60
+ async def parse_vectorize_and_search(file):
61
+ print(file)
62
+ gloss = asl_converter.convert_document(file)
63
+ print("ASL", gloss)
64
+
65
+ # Split by spaces and clean each token
66
+ gloss_tokens = gloss.split()
67
+ cleaned_tokens = []
68
+
69
+ for token in gloss_tokens:
70
+ cleaned = clean_gloss_token(token)
71
+ if cleaned: # Only add non-empty tokens
72
+ cleaned_tokens.append(cleaned)
73
+
74
+ print("Cleaned tokens:", cleaned_tokens)
75
+
76
+ videos = []
77
+ video_files = [] # Store local file paths for stitching
78
+
79
+ for g in cleaned_tokens:
80
+ print(f"Processing {g}")
81
+ try:
82
+ result = await vectorizer.vector_query_from_supabase(query=g)
83
+ print("result", result)
84
+ if result.get("match", False):
85
+ video_url = result["video_url"]
86
+ videos.append(video_url)
87
+
88
+ # Download the video
89
+ local_path = download_video_from_url(video_url)
90
+ if local_path:
91
+ video_files.append(local_path)
92
+
93
+ except Exception as e:
94
+ print(f"Error processing {g}: {e}")
95
+ continue
96
+
97
+ # Create stitched video if we have multiple videos
98
+ stitched_video_path = None
99
+ if len(video_files) > 1:
100
+ try:
101
+ print(f"Creating stitched video from {len(video_files)} videos...")
102
+ stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
103
+ create_multi_stitched_video(video_files, stitched_video_path)
104
+ print(f"Stitched video created: {stitched_video_path}")
105
+ except Exception as e:
106
+ print(f"Error creating stitched video: {e}")
107
+ stitched_video_path = None
108
+ elif len(video_files) == 1:
109
+ # If only one video, just use it directly
110
+ stitched_video_path = video_files[0]
111
+
112
+ # Clean up individual video files after stitching
113
+ for video_file in video_files:
114
+ if video_file != stitched_video_path: # Don't delete the final output
115
+ cleanup_temp_video(video_file)
116
+
117
+ return {
118
+ "status": "success",
119
+ "videos": videos,
120
+ "video_count": len(videos),
121
+ "gloss": gloss,
122
+ "cleaned_tokens": cleaned_tokens
123
+ }, stitched_video_path
124
+
125
+ # Create a synchronous wrapper for Gradio
126
+ def parse_vectorize_and_search_sync(file):
127
+ return asyncio.run(parse_vectorize_and_search(file))
128
+
129
+ def download_video_from_url(video_url):
130
+ """
131
+ Download a video from a public R2 URL
132
+ Returns the local file path where the video is saved
133
+ """
134
+ try:
135
+ # Create a temporary file with .mp4 extension
136
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
137
+ temp_path = temp_file.name
138
+ temp_file.close()
139
+
140
+ # Download the video
141
+ print(f"Downloading video from: {video_url}")
142
+ response = requests.get(video_url, stream=True)
143
+ response.raise_for_status()
144
+
145
+ # Save to temporary file
146
+ with open(temp_path, 'wb') as f:
147
+ for chunk in response.iter_content(chunk_size=8192):
148
+ f.write(chunk)
149
+
150
+ print(f"Video downloaded to: {temp_path}")
151
+ return temp_path
152
+
153
+ except Exception as e:
154
+ print(f"Error downloading video: {e}")
155
+ return None
156
+
157
+ def cleanup_temp_video(file_path):
158
+ """
159
+ Clean up temporary video file
160
+ """
161
+ try:
162
+ if file_path and os.path.exists(file_path):
163
+ os.unlink(file_path)
164
+ print(f"Cleaned up: {file_path}")
165
+ except Exception as e:
166
+ print(f"Error cleaning up file: {e}")
167
+
168
+ intf = gr.Interface(
169
+ fn=parse_vectorize_and_search_sync,
170
+ inputs=inputs,
171
+ outputs=outputs,
172
+ title=title,
173
+ description=description,
174
+ article=article
175
+ )
176
+ intf.launch(share=True)
asl_gloss.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ASL Gloss Converter using Claude API
4
+
5
+ This script converts English text input and convert it to ASL gloss format.
6
+ ASL gloss is a written representation of sign language
7
+ that preserves the spatial and grammatical structure of ASL.
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import argparse
13
+ import json
14
+ from typing import Optional, Dict, Any
15
+ from pathlib import Path
16
+
17
+ try:
18
+ import anthropic
19
+ except ImportError:
20
+ print("Error: anthropic package not found. Please install it with:")
21
+ print("pip install anthropic")
22
+ sys.exit(1)
23
+
24
+
25
+ class ASLGlossConverter:
26
+ """
27
+ Converts English text to ASL gloss using Claude's API.
28
+
29
+ ASL gloss preserves the spatial and grammatical structure of American Sign Language,
30
+ including features like:
31
+ - Topic-comment structure
32
+ - Spatial referencing
33
+ - Non-manual markers (facial expressions, head movements)
34
+ - Classifier predicates
35
+ - Time indicators
36
+ """
37
+
38
+ def __init__(self, api_key: Optional[str] = None):
39
+ """
40
+ Initialize the ASL gloss converter.
41
+
42
+ Args:
43
+ api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
44
+ """
45
+ self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
46
+ if not self.api_key:
47
+ raise ValueError(
48
+ "API key not provided. Set ANTHROPIC_API_KEY environment variable "
49
+ "or pass api_key parameter."
50
+ )
51
+
52
+ self.client = anthropic.Anthropic(api_key=self.api_key)
53
+
54
+ # System prompt that defines ASL gloss conversion rules
55
+ self.system_prompt = """You are an expert in American Sign Language (ASL) and ASL gloss. Your task is to convert English text to ASL gloss format with bracketed phrases.
56
+
57
+ ASL GLOSS RULES:
58
+ 1. Use ALL CAPS for all signs
59
+ 2. Group related words/concepts into bracketed phrases [PHRASE]
60
+ 3. Use underscores (_) to connect words within a phrase that are signed together
61
+ 4. Use classifiers (CL:1, CL:3, CL:C, etc.) for spatial relationships
62
+ 5. Use pronouns: I (first person), YOU (second person), HE/SHE/THEY (third person)
63
+ 6. Use time indicators: PAST, FUTURE, NOW, ALWAYS, NEVER
64
+ 7. Use topic-comment structure: TOPIC COMMENT
65
+ 8. Use rhetorical questions: RHQ
66
+ 9. Use conditional markers: IF-THEN
67
+ 10. Use negation: NOT, NONE, CAN'T, DON'T-WANT
68
+ 11. Use aspect markers: FINISH, CONTINUE, REPEAT
69
+ 12. Use directional verbs: GIVE-TO, TELL-TO, ASK-TO
70
+ 13. Use location markers: HERE, THERE, WHERE
71
+ 14. Use manner adverbs: FAST, SLOW, CAREFUL, HARD
72
+
73
+ PHRASE GROUPING GUIDELINES:
74
+ - Group compound expressions: [GOOD_MORNING], [THANK_YOU], [HOW_ARE_YOU]
75
+ - Keep names as single phrases: [JOHN], [NATALIIA], [CHRISTOPHER_ROBIN]
76
+ - Group related concepts: [MY_NAME], [YOUR_HOUSE], [LAST_WEEK]
77
+ - Keep simple words separate: [I] [LOVE] [YOU]
78
+
79
+ IMPORTANT: Output ONLY the bracketed ASL phrases. Each phrase should be in ALL CAPS with underscores connecting related words.
80
+
81
+ EXAMPLES:
82
+ - "Good morning, Brian" → [GOOD_MORNING] [BRIAN]
83
+ - "My name is Nataliia" → [I] [NAME] [NATALIIA]
84
+ - "I love you" → [I] [LOVE] [YOU]
85
+ - "What is your name?" → [YOU] [NAME] [WHAT]
86
+ - "I don't understand" → [I] [UNDERSTAND] [NOT]
87
+ - "Where is the bathroom?" → [BATHROOM] [WHERE]
88
+ - "I want to go home" → [I] [WANT] [GO] [HOME]
89
+ - "The cat is sleeping" → [CAT] [SLEEP]
90
+ - "I finished my homework" → [I] [HOMEWORK] [FINISH]
91
+ - "Do you want coffee?" → [YOU] [WANT] [COFFEE]
92
+ - "I can't hear you" → [I] [HEAR] [YOU] [CAN'T]
93
+ - "The weather is nice today" → [TODAY] [WEATHER] [NICE]
94
+ - "Thank you very much" → [THANK_YOU] [VERY_MUCH]
95
+ - "How are you doing?" → [HOW_ARE_YOU] [DOING]
96
+ - "See you later" → [SEE_YOU_LATER]
97
+ - "I work at Google" → [I] [WORK] [GOOGLE]
98
+
99
+ Convert the given English text to proper ASL gloss format with bracketed phrases, maintaining the meaning and intent while following ASL grammar and structure."""
100
+
101
+ def convert_text(self, english_text: str) -> str:
102
+ """
103
+ Convert English text to ASL gloss using Anthropic v1.x messages API.
104
+ """
105
+ try:
106
+ message = self.client.messages.create(
107
+ model="claude-3-5-sonnet-20240620",
108
+ max_tokens=1000,
109
+ system=self.system_prompt,
110
+ messages=[
111
+ {"role": "user", "content": f"Convert this English text to ASL gloss:\n\n{english_text}"}
112
+ ]
113
+ )
114
+ return message.content[0].text.strip()
115
+ except Exception as e:
116
+ raise Exception(f"Error converting text to ASL gloss: {str(e)}")
117
+
118
+ def convert_file(self, input_file: str, output_file: Optional[str] = None) -> str:
119
+ """
120
+ Convert text from a file to ASL gloss.
121
+
122
+ Args:
123
+ input_file: Path to input text file
124
+ output_file: Path to output file (optional)
125
+
126
+ Returns:
127
+ The ASL gloss text
128
+ """
129
+ try:
130
+ # Read input file
131
+ with open(input_file, 'r', encoding='utf-8') as f:
132
+ english_text = f.read().strip()
133
+
134
+ if not english_text:
135
+ raise ValueError("Input file is empty")
136
+
137
+ # Convert to ASL gloss
138
+ asl_gloss = self.convert_text(english_text)
139
+
140
+ # Write to output file if specified
141
+ if output_file:
142
+ with open(output_file, 'w', encoding='utf-8') as f:
143
+ f.write(asl_gloss)
144
+ print(f"ASL gloss saved to: {output_file}")
145
+
146
+ return asl_gloss
147
+
148
+ except FileNotFoundError:
149
+ raise Exception(f"Input file not found: {input_file}")
150
+ except Exception as e:
151
+ raise Exception(f"Error processing file: {str(e)}")
152
+
153
+ def batch_convert(self, input_files: list, output_dir: Optional[str] = None) -> Dict[str, str]:
154
+ """
155
+ Convert multiple files to ASL gloss.
156
+
157
+ Args:
158
+ input_files: List of input file paths
159
+ output_dir: Directory to save output files (optional)
160
+
161
+ Returns:
162
+ Dictionary mapping input files to their ASL gloss
163
+ """
164
+ results = {}
165
+
166
+ for input_file in input_files:
167
+ try:
168
+ print(f"Converting: {input_file}")
169
+
170
+ if output_dir:
171
+ # Create output filename
172
+ input_path = Path(input_file)
173
+ output_filename = f"{input_path.stem}_asl_gloss{input_path.suffix}"
174
+ output_file = Path(output_dir) / output_filename
175
+ else:
176
+ output_file = None
177
+
178
+ asl_gloss = self.convert_file(input_file, str(output_file) if output_file else None)
179
+ results[input_file] = asl_gloss
180
+
181
+ print(f"✓ Completed: {input_file}")
182
+
183
+ except Exception as e:
184
+ print(f"✗ Error processing {input_file}: {str(e)}")
185
+ results[input_file] = f"ERROR: {str(e)}"
186
+
187
+ return results
188
+
189
+
190
+ def main():
191
+ """Main function for command-line usage."""
192
+ parser = argparse.ArgumentParser(
193
+ description="Convert English text to ASL gloss using Claude's API",
194
+ formatter_class=argparse.RawDescriptionHelpFormatter,
195
+ epilog="""
196
+ Examples:
197
+ # Convert text directly
198
+ python asl_gloss.py "Hello, how are you?"
199
+
200
+ # Convert from file
201
+ python asl_gloss.py -f input.txt
202
+
203
+ # Convert from file with output
204
+ python asl_gloss.py -f input.txt -o output.txt
205
+
206
+ # Batch convert multiple files
207
+ python asl_gloss.py -b file1.txt file2.txt -d output_dir/
208
+
209
+ # Interactive mode
210
+ python asl_gloss.py -i
211
+ """
212
+ )
213
+
214
+ parser.add_argument(
215
+ 'text',
216
+ nargs='?',
217
+ help='English text to convert to ASL gloss'
218
+ )
219
+
220
+ parser.add_argument(
221
+ '-f', '--file',
222
+ help='Input file containing English text'
223
+ )
224
+
225
+ parser.add_argument(
226
+ '-o', '--output',
227
+ help='Output file for ASL gloss'
228
+ )
229
+
230
+ parser.add_argument(
231
+ '-b', '--batch',
232
+ nargs='+',
233
+ help='Batch convert multiple files'
234
+ )
235
+
236
+ parser.add_argument(
237
+ '-d', '--output-dir',
238
+ help='Output directory for batch conversion'
239
+ )
240
+
241
+ parser.add_argument(
242
+ '-i', '--interactive',
243
+ action='store_true',
244
+ help='Run in interactive mode'
245
+ )
246
+
247
+ parser.add_argument(
248
+ '--api-key',
249
+ help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
250
+ )
251
+
252
+ args = parser.parse_args()
253
+
254
+ try:
255
+ # Initialize converter
256
+ converter = ASLGlossConverter(api_key=args.api_key)
257
+
258
+ if args.interactive:
259
+ print("ASL Gloss Converter - Interactive Mode")
260
+ print("Enter English text to convert to ASL gloss (or 'quit' to exit):")
261
+ print("-" * 50)
262
+
263
+ while True:
264
+ try:
265
+ text = input("\nEnglish text: ").strip()
266
+ if text.lower() in ['quit', 'exit', 'q']:
267
+ break
268
+
269
+ if not text:
270
+ continue
271
+
272
+ print("Converting...")
273
+ asl_gloss = converter.convert_text(text)
274
+ print(f"ASL Gloss: {asl_gloss}")
275
+
276
+ except KeyboardInterrupt:
277
+ print("\nExiting...")
278
+ break
279
+ except Exception as e:
280
+ print(f"Error: {str(e)}")
281
+
282
+ elif args.batch:
283
+ if not args.batch:
284
+ print("Error: No files specified for batch conversion")
285
+ return 1
286
+
287
+ print(f"Batch converting {len(args.batch)} files...")
288
+ results = converter.batch_convert(args.batch, args.output_dir)
289
+
290
+ print("\nResults:")
291
+ for input_file, result in results.items():
292
+ print(f"\n{input_file}:")
293
+ print(result)
294
+
295
+ elif args.file:
296
+ asl_gloss = converter.convert_file(args.file, args.output)
297
+ if not args.output:
298
+ print("ASL Gloss:")
299
+ print(asl_gloss)
300
+
301
+ elif args.text:
302
+ asl_gloss = converter.convert_text(args.text)
303
+ print("ASL Gloss:")
304
+ print(asl_gloss)
305
+
306
+ if args.output:
307
+ with open(args.output, 'w', encoding='utf-8') as f:
308
+ f.write(asl_gloss)
309
+ print(f"\nSaved to: {args.output}")
310
+
311
+ else:
312
+ parser.print_help()
313
+ return 1
314
+
315
+ return 0
316
+
317
+ except Exception as e:
318
+ print(f"Error: {str(e)}")
319
+ return 1
320
+
321
+
322
+ if __name__ == "__main__":
323
+ sys.exit(main())
document_parsing.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional, Union
5
+ import logging
6
+
7
+ # Import document parsing libraries
8
+ try:
9
+ import PyPDF2
10
+ from docx import Document
11
+ import ebooklib
12
+ from ebooklib import epub
13
+ from bs4 import BeautifulSoup
14
+ except ImportError as e:
15
+ print(f"Missing required dependency: {e}")
16
+ print("Please install dependencies with: pip install -r requirements.txt")
17
+ sys.exit(1)
18
+
19
+ # Configure logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class DocumentParser:
25
+ """
26
+ A class to parse and extract text from various document formats.
27
+ Supports PDF, TXT, DOC, DOCX, and EPUB files.
28
+ """
29
+
30
+ def __init__(self):
31
+ self.supported_formats = {
32
+ 'application/pdf': self._parse_pdf,
33
+ 'text/plain': self._parse_txt,
34
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._parse_docx,
35
+ 'application/msword': self._parse_doc,
36
+ 'application/epub+zip': self._parse_epub
37
+ }
38
+
39
+ def get_file_type(self, file_path: Union[str, Path]) -> str:
40
+ """
41
+ Detect the MIME type of a file using file extension.
42
+
43
+ Args:
44
+ file_path: Path to the file
45
+
46
+ Returns:
47
+ MIME type string
48
+ """
49
+ return self._get_mime_from_extension(file_path)
50
+
51
+ def _get_mime_from_extension(self, file_path: Union[str, Path]) -> str:
52
+ """
53
+ Determine MIME type from file extension.
54
+
55
+ Args:
56
+ file_path: Path to the file
57
+
58
+ Returns:
59
+ MIME type string
60
+ """
61
+ extension = Path(file_path).suffix.lower()
62
+ extension_map = {
63
+ '.pdf': 'application/pdf',
64
+ '.txt': 'text/plain',
65
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
66
+ '.doc': 'application/msword',
67
+ '.epub': 'application/epub+zip'
68
+ }
69
+ return extension_map.get(extension, 'unknown')
70
+
71
+ def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
72
+ """
73
+ Extract text from a document file.
74
+
75
+ Args:
76
+ file_path: Path to the document file
77
+
78
+ Returns:
79
+ Extracted text as string, or None if extraction fails
80
+ """
81
+ file_path = Path(file_path)
82
+
83
+ if not file_path.exists():
84
+ logger.error(f"File not found: {file_path}")
85
+ return None
86
+
87
+ try:
88
+ mime_type = self.get_file_type(file_path)
89
+ logger.info(f"Detected file type: {mime_type}")
90
+
91
+ if mime_type in self.supported_formats:
92
+ return self.supported_formats[mime_type](file_path)
93
+ else:
94
+ logger.error(f"Unsupported file type: {mime_type}")
95
+ return None
96
+
97
+ except Exception as e:
98
+ logger.error(f"Error extracting text from {file_path}: {e}")
99
+ return None
100
+
101
+ def _parse_pdf(self, file_path: Path) -> str:
102
+ """
103
+ Extract text from PDF file.
104
+
105
+ Args:
106
+ file_path: Path to PDF file
107
+
108
+ Returns:
109
+ Extracted text
110
+ """
111
+ text = ""
112
+ try:
113
+ with open(file_path, 'rb') as file:
114
+ pdf_reader = PyPDF2.PdfReader(file)
115
+
116
+ for page_num in range(len(pdf_reader.pages)):
117
+ page = pdf_reader.pages[page_num]
118
+ page_text = page.extract_text()
119
+ if page_text:
120
+ text += page_text + "\n"
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error parsing PDF {file_path}: {e}")
124
+ raise
125
+
126
+ return text.strip()
127
+
128
+ def _parse_txt(self, file_path: Path) -> str:
129
+ """
130
+ Extract text from plain text file.
131
+
132
+ Args:
133
+ file_path: Path to text file
134
+
135
+ Returns:
136
+ Extracted text
137
+ """
138
+ try:
139
+ with open(file_path, 'r', encoding='utf-8') as file:
140
+ return file.read()
141
+ except UnicodeDecodeError:
142
+ # Try with different encoding
143
+ try:
144
+ with open(file_path, 'r', encoding='latin-1') as file:
145
+ return file.read()
146
+ except Exception as e:
147
+ logger.error(f"Error reading text file {file_path}: {e}")
148
+ raise
149
+ except Exception as e:
150
+ logger.error(f"Error reading text file {file_path}: {e}")
151
+ raise
152
+
153
+ def _parse_docx(self, file_path: Path) -> str:
154
+ """
155
+ Extract text from DOCX file.
156
+
157
+ Args:
158
+ file_path: Path to DOCX file
159
+
160
+ Returns:
161
+ Extracted text
162
+ """
163
+ try:
164
+ doc = Document(file_path)
165
+ text = ""
166
+
167
+ for paragraph in doc.paragraphs:
168
+ text += paragraph.text + "\n"
169
+
170
+ return text.strip()
171
+
172
+ except Exception as e:
173
+ logger.error(f"Error parsing DOCX {file_path}: {e}")
174
+ raise
175
+
176
+ def _parse_doc(self, file_path: Path) -> str:
177
+ """
178
+ Extract text from DOC file (legacy Word format).
179
+ Note: This requires additional dependencies like antiword or catdoc.
180
+
181
+ Args:
182
+ file_path: Path to DOC file
183
+
184
+ Returns:
185
+ Extracted text
186
+ """
187
+ try:
188
+ # Try using antiword if available
189
+ import subprocess
190
+ result = subprocess.run(['antiword', str(file_path)],
191
+ capture_output=True, text=True)
192
+ if result.returncode == 0:
193
+ return result.stdout.strip()
194
+
195
+ # Fallback: try catdoc
196
+ result = subprocess.run(['catdoc', str(file_path)],
197
+ capture_output=True, text=True)
198
+ if result.returncode == 0:
199
+ return result.stdout.strip()
200
+
201
+ raise Exception("Neither antiword nor catdoc found. Please install one of them.")
202
+
203
+ except FileNotFoundError:
204
+ raise Exception("antiword or catdoc not found. Please install one of them for DOC file support.")
205
+ except Exception as e:
206
+ logger.error(f"Error parsing DOC {file_path}: {e}")
207
+ raise
208
+
209
+ def _parse_epub(self, file_path: Path) -> str:
210
+ """
211
+ Extract text from EPUB file.
212
+
213
+ Args:
214
+ file_path: Path to EPUB file
215
+
216
+ Returns:
217
+ Extracted text
218
+ """
219
+ try:
220
+ book = epub.read_epub(file_path)
221
+ text = ""
222
+
223
+ for item in book.get_items():
224
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
225
+ content = item.get_content().decode('utf-8')
226
+ soup = BeautifulSoup(content, 'html.parser')
227
+ text += soup.get_text() + "\n"
228
+
229
+ return text.strip()
230
+
231
+ except Exception as e:
232
+ logger.error(f"Error parsing EPUB {file_path}: {e}")
233
+ raise
234
+
235
+
236
+ def main():
237
+ """
238
+ Main function to demonstrate usage of the DocumentParser.
239
+ """
240
+ if len(sys.argv) != 2:
241
+ print("Usage: python document_parsing.py <file_path>")
242
+ print("Supported formats: PDF, TXT, DOC, DOCX, EPUB")
243
+ sys.exit(1)
244
+
245
+ file_path = sys.argv[1]
246
+ parser = DocumentParser()
247
+
248
+ print(f"Extracting text from: {file_path}")
249
+ print("-" * 50)
250
+
251
+ extracted_text = parser.extract_text(file_path)
252
+
253
+ if extracted_text:
254
+ print("Extracted text:")
255
+ print(extracted_text)
256
+ print(f"\nTotal characters: {len(extracted_text)}")
257
+ else:
258
+ print("Failed to extract text from the file.")
259
+ sys.exit(1)
260
+
261
+
262
+ if __name__ == "__main__":
263
+ main()
document_to_gloss.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Document to ASL Gloss Converter
4
+
5
+ This script combines document parsing and ASL glossing to convert
6
+ uploaded documents (PDF, TXT, DOC, DOCX, EPUB) directly to ASL gloss format.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import argparse
12
+ from typing import Optional, Dict, Any
13
+ from pathlib import Path
14
+
15
+ # Import our existing modules
16
+ from document_parsing import DocumentParser
17
+ from asl_gloss import ASLGlossConverter
18
+
19
+
20
+ class DocumentToASLConverter:
21
+ """
22
+ Combines document parsing and ASL glossing functionality.
23
+ Extracts text from various document formats and converts to ASL gloss.
24
+ """
25
+
26
+ def __init__(self, api_key: Optional[str] = None):
27
+ """
28
+ Initialize the document to ASL converter.
29
+
30
+ Args:
31
+ api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
32
+ """
33
+ self.document_parser = DocumentParser()
34
+ self.asl_converter = ASLGlossConverter(api_key=api_key)
35
+
36
+ def convert_document(self, document_path: str, output_file: Optional[str] = None) -> str:
37
+ """
38
+ Convert a document file to ASL gloss.
39
+
40
+ Args:
41
+ document_path: Path to the document file
42
+ output_file: Path to output file (optional)
43
+
44
+ Returns:
45
+ The ASL gloss text
46
+ """
47
+ try:
48
+ print(f"Processing document: {document_path}")
49
+
50
+ # Step 1: Extract text from document
51
+ print("Step 1: Extracting text from document...")
52
+ extracted_text = self.document_parser.extract_text(document_path)
53
+
54
+ if not extracted_text:
55
+ raise Exception("Failed to extract text from document")
56
+
57
+ print(f"✓ Extracted {len(extracted_text)} characters")
58
+
59
+ # Step 2: Convert text to ASL gloss
60
+ print("Step 2: Converting to ASL gloss...")
61
+ asl_gloss = self.asl_converter.convert_text(extracted_text)
62
+
63
+ print("✓ ASL gloss conversion completed")
64
+
65
+ # Step 3: Save to output file if specified
66
+ if output_file:
67
+ with open(output_file, 'w', encoding='utf-8') as f:
68
+ f.write(asl_gloss)
69
+ print(f"✓ ASL gloss saved to: {output_file}")
70
+
71
+ return asl_gloss
72
+
73
+ except Exception as e:
74
+ raise Exception(f"Error processing document: {str(e)}")
75
+
76
+ def batch_convert_documents(self, document_paths: list, output_dir: Optional[str] = None) -> Dict[str, str]:
77
+ """
78
+ Convert multiple documents to ASL gloss.
79
+
80
+ Args:
81
+ document_paths: List of document file paths
82
+ output_dir: Directory to save output files (optional)
83
+
84
+ Returns:
85
+ Dictionary mapping input files to their ASL gloss
86
+ """
87
+ results = {}
88
+
89
+ for document_path in document_paths:
90
+ try:
91
+ print(f"\n{'='*50}")
92
+ print(f"Converting: {document_path}")
93
+ print(f"{'='*50}")
94
+
95
+ if output_dir:
96
+ # Create output filename
97
+ input_path = Path(document_path)
98
+ output_filename = f"{input_path.stem}_asl_gloss.txt"
99
+ output_file = Path(output_dir) / output_filename
100
+ else:
101
+ output_file = None
102
+
103
+ asl_gloss = self.convert_document(document_path, str(output_file) if output_file else None)
104
+ results[document_path] = asl_gloss
105
+
106
+ print(f"✓ Completed: {document_path}")
107
+
108
+ except Exception as e:
109
+ print(f"✗ Error processing {document_path}: {str(e)}")
110
+ results[document_path] = f"ERROR: {str(e)}"
111
+
112
+ return results
113
+
114
+ def get_supported_formats(self) -> list:
115
+ """
116
+ Get list of supported document formats.
117
+
118
+ Returns:
119
+ List of supported file extensions
120
+ """
121
+ return ['.pdf', '.txt', '.docx', '.doc', '.epub']
122
+
123
+
124
+ def main():
125
+ """Main function for command-line usage."""
126
+ parser = argparse.ArgumentParser(
127
+ description="Convert documents to ASL gloss using Claude's API",
128
+ formatter_class=argparse.RawDescriptionHelpFormatter,
129
+ epilog="""
130
+ Examples:
131
+ # Convert a single document
132
+ python document_to_asl.py document.pdf
133
+
134
+ # Convert document with output file
135
+ python document_to_asl.py document.pdf -o output.txt
136
+
137
+ # Batch convert multiple documents
138
+ python document_to_asl.py -b doc1.pdf doc2.docx doc3.txt -d output_dir/
139
+
140
+ # Interactive mode
141
+ python document_to_asl.py -i
142
+
143
+ # Show supported formats
144
+ python document_to_asl.py --formats
145
+ """
146
+ )
147
+
148
+ parser.add_argument(
149
+ 'document',
150
+ nargs='?',
151
+ help='Document file to convert to ASL gloss'
152
+ )
153
+
154
+ parser.add_argument(
155
+ '-o', '--output',
156
+ help='Output file for ASL gloss'
157
+ )
158
+
159
+ parser.add_argument(
160
+ '-b', '--batch',
161
+ nargs='+',
162
+ help='Batch convert multiple documents'
163
+ )
164
+
165
+ parser.add_argument(
166
+ '-d', '--output-dir',
167
+ help='Output directory for batch conversion'
168
+ )
169
+
170
+ parser.add_argument(
171
+ '-i', '--interactive',
172
+ action='store_true',
173
+ help='Run in interactive mode'
174
+ )
175
+
176
+ parser.add_argument(
177
+ '--formats',
178
+ action='store_true',
179
+ help='Show supported document formats'
180
+ )
181
+
182
+ parser.add_argument(
183
+ '--api-key',
184
+ help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
185
+ )
186
+
187
+ args = parser.parse_args()
188
+
189
+ try:
190
+ # Initialize converter
191
+ converter = DocumentToASLConverter(api_key=args.api_key)
192
+
193
+ if args.formats:
194
+ print("Supported Document Formats:")
195
+ print("=" * 30)
196
+ formats = converter.get_supported_formats()
197
+ for fmt in formats:
198
+ print(f" • {fmt}")
199
+ print("\nExamples: .pdf, .txt, .docx, .doc, .epub")
200
+ return 0
201
+
202
+ if args.interactive:
203
+ print("Document to ASL Gloss Converter - Interactive Mode")
204
+ print("Enter document file paths to convert (or 'quit' to exit):")
205
+ print("-" * 60)
206
+
207
+ while True:
208
+ try:
209
+ doc_path = input("\nDocument path: ").strip()
210
+ if doc_path.lower() in ['quit', 'exit', 'q']:
211
+ break
212
+
213
+ if not doc_path:
214
+ continue
215
+
216
+ if not os.path.exists(doc_path):
217
+ print(f"Error: File not found: {doc_path}")
218
+ continue
219
+
220
+ # Ask for output file
221
+ output_file = input("Output file (optional, press Enter to skip): ").strip()
222
+ if not output_file:
223
+ output_file = None
224
+
225
+ print("Converting...")
226
+ asl_gloss = converter.convert_document(doc_path, output_file)
227
+
228
+ if not output_file:
229
+ print("\nASL Gloss:")
230
+ print("-" * 20)
231
+ print(asl_gloss)
232
+
233
+ except KeyboardInterrupt:
234
+ print("\nExiting...")
235
+ break
236
+ except Exception as e:
237
+ print(f"Error: {str(e)}")
238
+
239
+ elif args.batch:
240
+ if not args.batch:
241
+ print("Error: No documents specified for batch conversion")
242
+ return 1
243
+
244
+ print(f"Batch converting {len(args.batch)} documents...")
245
+ results = converter.batch_convert_documents(args.batch, args.output_dir)
246
+
247
+ print("\n" + "="*60)
248
+ print("BATCH CONVERSION RESULTS")
249
+ print("="*60)
250
+ for doc_path, result in results.items():
251
+ print(f"\nDocument: {doc_path}")
252
+ print("-" * 40)
253
+ if result.startswith("ERROR:"):
254
+ print(f"❌ {result}")
255
+ else:
256
+ print("✅ Conversion successful")
257
+ if not args.output_dir:
258
+ print("ASL Gloss:")
259
+ print(result[:500] + "..." if len(result) > 500 else result)
260
+
261
+ elif args.document:
262
+ asl_gloss = converter.convert_document(args.document, args.output)
263
+ if not args.output:
264
+ print("\nASL Gloss:")
265
+ print("-" * 20)
266
+ print(asl_gloss)
267
+
268
+ else:
269
+ parser.print_help()
270
+ return 1
271
+
272
+ return 0
273
+
274
+ except Exception as e:
275
+ print(f"Error: {str(e)}")
276
+ return 1
277
+
278
+
279
+ if __name__ == "__main__":
280
+ sys.exit(main())
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anthropic==0.54.0
2
+ beautifulsoup4==4.13.4
3
+ boto3==1.38.41
4
+ botocore==1.38.41
5
+ ebooklib==0.19
6
+ gensim==4.3.3
7
+ gradio==5.34.2
8
+ mediapipe==0.10.21
9
+ numpy<2.0
10
+ opencv-python
11
+ pandas<2.0
12
+ PyPDF2==3.0.1
13
+ python-dotenv==1.1.0
14
+ python-docx
15
+ requests==2.32.4
16
+ supabase==2.15.3
test.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify the fixes for the ASL gloss processing
4
+ """
5
+
6
+ import asyncio
7
+ import re
8
+ from vectorizer import Vectorizer
9
+
10
+
11
+ def clean_gloss_token(token):
12
+ """
13
+ Clean a gloss token by removing brackets, newlines, and extra whitespace
14
+ """
15
+ # Remove brackets and newlines
16
+ cleaned = re.sub(r'[\[\]\n\r]', '', token)
17
+ # Remove extra whitespace
18
+ cleaned = re.sub(r'\s+', ' ', cleaned).strip()
19
+ return cleaned.lower()
20
+
21
+
22
+ def test_gloss_parsing():
23
+ """Test the gloss parsing functionality"""
24
+ # Sample gloss output from the notebook
25
+ sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] "
26
+ "[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n"
27
+ "[PROBLEM] [SOLVE] [FINISH]")
28
+
29
+ print("Original gloss:")
30
+ print(sample_gloss)
31
+ print("\n" + "="*50 + "\n")
32
+
33
+ # Split by spaces and clean each token
34
+ gloss_tokens = sample_gloss.split()
35
+ cleaned_tokens = []
36
+
37
+ for token in gloss_tokens:
38
+ cleaned = clean_gloss_token(token)
39
+ if cleaned: # Only add non-empty tokens
40
+ cleaned_tokens.append(cleaned)
41
+
42
+ print("Cleaned tokens:")
43
+ for i, token in enumerate(cleaned_tokens):
44
+ print(f"{i+1:2d}. {token}")
45
+
46
+ return cleaned_tokens
47
+
48
+
49
+ async def test_vectorizer():
50
+ """Test the vectorizer functionality"""
51
+ try:
52
+ vectorizer = Vectorizer()
53
+
54
+ # Test with a simple word that should be in the vocabulary
55
+ test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"]
56
+
57
+ for word in test_words:
58
+ print(f"\nTesting word: {word}")
59
+ result = await vectorizer.vector_query_from_supabase(word)
60
+ print(f"Result: {result}")
61
+
62
+ except Exception as e:
63
+ print(f"Error testing vectorizer: {e}")
64
+
65
+
66
+ async def main():
67
+ """Main test function"""
68
+ print("Testing ASL Gloss Processing Fixes")
69
+ print("=" * 50)
70
+
71
+ # Test 1: Gloss parsing
72
+ print("\n1. Testing gloss parsing...")
73
+ cleaned_tokens = test_gloss_parsing()
74
+ print(f"Total cleaned tokens: {len(cleaned_tokens)}")
75
+
76
+ # Test 2: Vectorizer (if environment is set up)
77
+ print("\n2. Testing vectorizer...")
78
+ await test_vectorizer()
79
+
80
+ print("\n" + "=" * 50)
81
+ print("Test completed!")
82
+
83
+
84
+ if __name__ == "__main__":
85
+ asyncio.run(main())
vectorizer.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gensim
2
+ import gensim.downloader
3
+ import numpy as np
4
+ import pandas as pd
5
+ import os
6
+ from supabase import acreate_client, AsyncClient
7
+ from dotenv import load_dotenv
8
+
9
+ class Vectorizer:
10
+ """
11
+ A class to:
12
+ - Generate embeddings of words
13
+ - Query for words from Supabase database based on vector similarity
14
+ - Return matching ASL videos for words
15
+ """
16
+
17
+ def load_kv(self, model_name='word2vec-google-news-300'):
18
+ """
19
+ Returns a KeyedVector object loaded from gensim
20
+ """
21
+ try:
22
+ kv = gensim.downloader.load(model_name) # returns a keyedvector
23
+ return kv
24
+ except Exception as e:
25
+ print(f"Unable to load embedding model from gensim: {e}")
26
+ return None
27
+
28
+ async def initialize_supabase(self):
29
+ url: str = os.environ.get("SUPABASE_URL")
30
+ key: str = os.environ.get("SUPABASE_KEY")
31
+ supabase: AsyncClient = await acreate_client(url, key)
32
+ return supabase
33
+
34
+ def __init__(self):
35
+ load_dotenv()
36
+ self.kv = self.load_kv()
37
+ self.supabase = None # Will be initialized when needed
38
+
39
+ async def ensure_supabase_initialized(self):
40
+ """Ensure Supabase client is initialized"""
41
+ if self.supabase is None:
42
+ self.supabase = await self.initialize_supabase()
43
+
44
+ def encode(self, word):
45
+ print(f"encoding {word}")
46
+ if self.kv is not None and word in self.kv.key_to_index:
47
+ return self.kv[word]
48
+ else:
49
+ print(f"Error: {word} is not in the KeyedVector's vocabulary")
50
+ return None
51
+
52
+ def encode_and_format(self, word):
53
+ """
54
+ Apply encoding function to each word.
55
+ Prettify the encoding to match expected format for Supabase vectors
56
+ """
57
+ enc = self.encode(word)
58
+ return "[" + ",".join(map(str, enc.tolist())) + "]" if enc is not None else None
59
+
60
+ async def vector_query_from_supabase(self, query):
61
+ try:
62
+ await self.ensure_supabase_initialized()
63
+ query_embedding = self.encode(query)
64
+ if query_embedding is None:
65
+ return {
66
+ "match": False,
67
+ "error": f"'{query}' not in vocabulary"
68
+ }
69
+
70
+ query_embedding = query_embedding.tolist()
71
+
72
+ if self.supabase is not None:
73
+ result = await self.supabase.rpc(
74
+ "match_vector",
75
+ {
76
+ "query_embedding": query_embedding,
77
+ "match_threshold": 0.0,
78
+ "match_count": 1
79
+ }
80
+ ).execute()
81
+
82
+ data = result.data
83
+ if data:
84
+ match = data[0]
85
+ return {
86
+ "match": True,
87
+ "query": query,
88
+ "matching_word": match["word"],
89
+ "video_url": match["video_url"],
90
+ "similarity": match["similarity"]
91
+ }
92
+ else:
93
+ return {"match": False}
94
+ else:
95
+ return {"match": False, "error": "Supabase not initialized"}
96
+
97
+ except Exception as e:
98
+ print(f"RPC call failed: {e}")
99
+ return {"match": False, "error": str(e)}
100
+
101
+
102
+ def load_filtered_kv(model_name='word2vec-google-news-300', vocab=None):
103
+ """
104
+ Returns a KeyedVector object whose vocabulary
105
+ consists of the words in vocab
106
+ """
107
+ if vocab is None:
108
+ vocab = []
109
+ try:
110
+ # gensim.downloader.load returns a KeyedVector
111
+ original_kv = gensim.downloader.load(model_name)
112
+ if vocab:
113
+ filtered_key2vec_map = {}
114
+ for key in vocab:
115
+ if key in original_kv.key_to_index:
116
+ filtered_key2vec_map[key] = original_kv[key]
117
+
118
+ new_kv = gensim.models.KeyedVectors(
119
+ vector_size=original_kv.vector_size)
120
+ new_kv.add_vectors(list(filtered_key2vec_map.keys()),
121
+ np.array(list(filtered_key2vec_map.values())))
122
+ return original_kv
123
+ else:
124
+ return original_kv
125
+ except Exception as e:
126
+ print(f"Unable to load embedding model from gensim: {e}")
127
+ return None
128
+
129
+
130
+ async def main():
131
+ vectorizer = Vectorizer()
132
+
133
+ vector = vectorizer.encode("test")
134
+ print(vector)
135
+ result = await vectorizer.vector_query_from_supabase("dog")
136
+ print(result)
137
+ result = await vectorizer.vector_query_from_supabase("cat")
138
+ print(result)
139
+
140
+ # read word list
141
+ # df = pd.read_csv('videos_rows.csv')
142
+
143
+ # # Add embeddings column - apply encode to each word
144
+ # df['embedding'] = df['word'].apply(vectorizer.encode_and_format)
145
+
146
+ # # Drop any rows that don't have an embedding
147
+ # df = df.dropna(subset=['embedding'])
148
+ # print(df.head())
149
+
150
+ # df.to_csv("vectors.csv", index=False, columns=["word", "video_url", "embedding"], header=True)
151
+
152
+
153
+ if __name__ == "__main__":
154
+ import asyncio
155
+ asyncio.run(main())
video_gen.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import mediapipe as mp
3
+ import numpy as np
4
+
5
+ def extract_keypoints_from_video(video_path, verbose=False):
6
+ mp_pose = mp.solutions.pose
7
+ mp_hands = mp.solutions.hands
8
+
9
+ pose_model = mp_pose.Pose()
10
+ hands_model = mp_hands.Hands(static_image_mode=False, max_num_hands=2)
11
+
12
+ cap = cv2.VideoCapture(video_path)
13
+ keypoints_sequence = []
14
+
15
+ frame_idx = 0
16
+
17
+ while cap.isOpened():
18
+ success, frame = cap.read()
19
+ if not success:
20
+ break
21
+
22
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
23
+ h, w, _ = frame.shape
24
+
25
+ # Pose estimation
26
+ pose_results = pose_model.process(frame_rgb)
27
+ if not pose_results.pose_landmarks:
28
+ frame_idx += 1
29
+ continue
30
+
31
+ # Extract 33 body keypoints
32
+ pose_landmarks = pose_results.pose_landmarks.landmark
33
+ pose = np.array([[lm.x, lm.y] for lm in pose_landmarks]) # shape (33, 2)
34
+
35
+ # Hand tracking
36
+ left_hand = np.zeros((21, 2))
37
+ right_hand = np.zeros((21, 2))
38
+
39
+ hand_results = hands_model.process(frame_rgb)
40
+ if hand_results.multi_hand_landmarks and hand_results.multi_handedness:
41
+ for hand_landmarks, hand_info in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
42
+ label = hand_info.classification[0].label # 'Left' or 'Right'
43
+ hand_array = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
44
+ if label == "Left":
45
+ left_hand = hand_array
46
+ else:
47
+ right_hand = hand_array
48
+
49
+ keypoints_sequence.append((pose, left_hand, right_hand))
50
+
51
+ if verbose:
52
+ print(f"Processed frame {frame_idx}")
53
+ frame_idx += 1
54
+
55
+ cap.release()
56
+ pose_model.close()
57
+ hands_model.close()
58
+
59
+ return keypoints_sequence
60
+
61
+ def render_person(frame, pose, left_hand, right_hand):
62
+ h, w = frame.shape[:2]
63
+
64
+ # Define MediaPipe Pose keypoint indices
65
+ # Face
66
+ NOSE = 0
67
+ LEFT_EYE = 2
68
+ RIGHT_EYE = 5
69
+ LEFT_EAR = 7
70
+ RIGHT_EAR = 8
71
+
72
+ # Body
73
+ LEFT_SHOULDER = 11
74
+ RIGHT_SHOULDER = 12
75
+ LEFT_ELBOW = 13
76
+ RIGHT_ELBOW = 14
77
+ LEFT_WRIST = 15
78
+ RIGHT_WRIST = 16
79
+ LEFT_HIP = 23
80
+ RIGHT_HIP = 24
81
+ LEFT_KNEE = 25
82
+ RIGHT_KNEE = 26
83
+ LEFT_ANKLE = 27
84
+ RIGHT_ANKLE = 28
85
+
86
+ # Define hand keypoint indices for MediaPipe Hands
87
+ # Thumb: 0-4, Index: 5-8, Middle: 9-12, Ring: 13-16, Pinky: 17-20
88
+ THUMB_TIP = 4
89
+ INDEX_TIP = 8
90
+ MIDDLE_TIP = 12
91
+ RING_TIP = 16
92
+ PINKY_TIP = 20
93
+
94
+ # Define finger connections
95
+ finger_connections = [
96
+ # Thumb
97
+ (0, 1), (1, 2), (2, 3), (3, 4),
98
+ # Index finger
99
+ (0, 5), (5, 6), (6, 7), (7, 8),
100
+ # Middle finger
101
+ (0, 9), (9, 10), (10, 11), (11, 12),
102
+ # Ring finger
103
+ (0, 13), (13, 14), (14, 15), (15, 16),
104
+ # Pinky
105
+ (0, 17), (17, 18), (18, 19), (19, 20)
106
+ ]
107
+
108
+ # Enhanced friendly color palette
109
+ skin_color = (173, 216, 230) # Light brown bear skin
110
+ outline_color = (40, 40, 40) # Softer outline
111
+ shirt_color = (205, 170, 125) # Light blue tuxedo jacket
112
+
113
+ # shirt_color = (205, 170, 125)
114
+ # skin_color = (173, 216, 230)
115
+
116
+
117
+ pants_color = (135, 206, 235) # Slightly darker light blue tuxedo pants
118
+ bow_tie_color = (255, 255, 255) # White bow tie
119
+ eye_color = (255, 255, 255) # White eyes
120
+ pupil_color = (0, 0, 0) # Black pupils
121
+
122
+ # Draw body parts as filled shapes
123
+
124
+ # 1. Head (face) with enhanced friendly styling
125
+ if len(pose) > max(LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR):
126
+ # Calculate head center and size
127
+ head_center_x = pose[NOSE][0] * w
128
+ head_center_y = pose[NOSE][1] * h
129
+
130
+ # Estimate head size based on face keypoints
131
+ if pose[LEFT_EYE][0] > 0 and pose[RIGHT_EYE][0] > 0:
132
+ eye_distance = abs(pose[LEFT_EYE][0] - pose[RIGHT_EYE][0]) * w
133
+ head_radius = eye_distance * 1.8 # Larger head for friendlier look
134
+ else:
135
+ head_radius = 35
136
+
137
+ # Draw bear ears first (behind the head)
138
+ ear_radius = int(head_radius * 0.4)
139
+ # Left ear
140
+ left_ear_x = int(head_center_x - head_radius * 0.6)
141
+ left_ear_y = int(head_center_y - head_radius * 0.8)
142
+ cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, skin_color, -1)
143
+ cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, outline_color, 2)
144
+ # Inner ear detail
145
+ cv2.circle(frame, (left_ear_x, left_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
146
+
147
+ # Right ear
148
+ right_ear_x = int(head_center_x + head_radius * 0.6)
149
+ right_ear_y = int(head_center_y - head_radius * 0.8)
150
+ cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, skin_color, -1)
151
+ cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, outline_color, 2)
152
+ # Inner ear detail
153
+ cv2.circle(frame, (right_ear_x, right_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
154
+
155
+ # Draw head with skin color
156
+ cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), skin_color, -1)
157
+ cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), outline_color, 2)
158
+
159
+ # Draw larger, cuter bear eyes
160
+ if pose[LEFT_EYE][0] > 0 and pose[LEFT_EYE][1] > 0:
161
+ eye_x, eye_y = int(pose[LEFT_EYE][0] * w), int(pose[LEFT_EYE][1] * h)
162
+ # Larger white eye
163
+ cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
164
+ # Larger pupil
165
+ cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
166
+ # Eye outline
167
+ cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
168
+ # Eye shine
169
+ cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
170
+
171
+ if pose[RIGHT_EYE][0] > 0 and pose[RIGHT_EYE][1] > 0:
172
+ eye_x, eye_y = int(pose[RIGHT_EYE][0] * w), int(pose[RIGHT_EYE][1] * h)
173
+ # Larger white eye
174
+ cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
175
+ # Larger pupil
176
+ cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
177
+ # Eye outline
178
+ cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
179
+ # Eye shine
180
+ cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
181
+
182
+ # Draw cute bear nose
183
+ nose_x = int(head_center_x)
184
+ nose_y = int(head_center_y + head_radius * 0.1)
185
+ # Draw a cute round nose
186
+ cv2.circle(frame, (nose_x, nose_y), 6, (80, 40, 20), -1) # Dark brown nose
187
+ cv2.circle(frame, (nose_x, nose_y), 6, outline_color, 1)
188
+
189
+ # Draw friendly smile
190
+ smile_center_x = int(head_center_x)
191
+ smile_center_y = int(head_center_y + head_radius * 0.3)
192
+ smile_radius = int(head_radius * 0.6)
193
+ # Draw smile arc
194
+ cv2.ellipse(frame, (smile_center_x, smile_center_y), (smile_radius, smile_radius//2),
195
+ 0, 0, 180, outline_color, 3)
196
+
197
+ # 2. Torso with nice shirt
198
+ if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_HIP, RIGHT_HIP):
199
+ # Calculate torso points
200
+ left_shoulder = (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h))
201
+ right_shoulder = (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h))
202
+ left_hip = (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h))
203
+ right_hip = (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h))
204
+
205
+ # Draw torso as a filled polygon with nice shirt color
206
+ torso_points = np.array([left_shoulder, right_shoulder, right_hip, left_hip], np.int32)
207
+ cv2.fillPoly(frame, [torso_points], shirt_color)
208
+ cv2.polylines(frame, [torso_points], True, outline_color, 2)
209
+
210
+ # 3. Arms with better proportions (non-stick)
211
+ # Left arm
212
+ if len(pose) > max(LEFT_SHOULDER, LEFT_ELBOW, LEFT_WRIST):
213
+ if pose[LEFT_SHOULDER][0] > 0 and pose[LEFT_ELBOW][0] > 0:
214
+ # Upper arm - 3x thicker and more natural
215
+ cv2.line(frame,
216
+ (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
217
+ (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
218
+ skin_color, 36)
219
+ cv2.line(frame,
220
+ (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
221
+ (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
222
+ outline_color, 2)
223
+
224
+ # Lower arm
225
+ if pose[LEFT_WRIST][0] > 0:
226
+ cv2.line(frame,
227
+ (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
228
+ (int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
229
+ skin_color, 30)
230
+ cv2.line(frame,
231
+ (int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
232
+ (int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
233
+ outline_color, 2)
234
+
235
+ # Right arm
236
+ if len(pose) > max(RIGHT_SHOULDER, RIGHT_ELBOW, RIGHT_WRIST):
237
+ if pose[RIGHT_SHOULDER][0] > 0 and pose[RIGHT_ELBOW][0] > 0:
238
+ # Upper arm - 3x thicker and more natural
239
+ cv2.line(frame,
240
+ (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
241
+ (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
242
+ skin_color, 36)
243
+ cv2.line(frame,
244
+ (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
245
+ (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
246
+ outline_color, 2)
247
+
248
+ # Lower arm
249
+ if pose[RIGHT_WRIST][0] > 0:
250
+ cv2.line(frame,
251
+ (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
252
+ (int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
253
+ skin_color, 30)
254
+ cv2.line(frame,
255
+ (int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
256
+ (int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
257
+ outline_color, 2)
258
+
259
+ # 4. Legs with nice pants
260
+ # Left leg
261
+ if len(pose) > max(LEFT_HIP, LEFT_KNEE, LEFT_ANKLE):
262
+ if pose[LEFT_HIP][0] > 0 and pose[LEFT_KNEE][0] > 0:
263
+ # Upper leg
264
+ cv2.line(frame,
265
+ (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
266
+ (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
267
+ pants_color, 14)
268
+ cv2.line(frame,
269
+ (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
270
+ (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
271
+ outline_color, 2)
272
+
273
+ # Lower leg
274
+ if pose[LEFT_ANKLE][0] > 0:
275
+ cv2.line(frame,
276
+ (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
277
+ (int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
278
+ pants_color, 12)
279
+ cv2.line(frame,
280
+ (int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
281
+ (int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
282
+ outline_color, 2)
283
+
284
+ # Right leg
285
+ if len(pose) > max(RIGHT_HIP, RIGHT_KNEE, RIGHT_ANKLE):
286
+ if pose[RIGHT_HIP][0] > 0 and pose[RIGHT_KNEE][0] > 0:
287
+ # Upper leg
288
+ cv2.line(frame,
289
+ (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
290
+ (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
291
+ pants_color, 14)
292
+ cv2.line(frame,
293
+ (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
294
+ (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
295
+ outline_color, 2)
296
+
297
+ # Lower leg
298
+ if pose[RIGHT_ANKLE][0] > 0:
299
+ cv2.line(frame,
300
+ (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
301
+ (int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
302
+ pants_color, 12)
303
+ cv2.line(frame,
304
+ (int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
305
+ (int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
306
+ outline_color, 2)
307
+
308
+ # 1.5. Neck connecting head to shoulders
309
+ if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER):
310
+ if pose[LEFT_SHOULDER][0] > 0 and pose[RIGHT_SHOULDER][0] > 0:
311
+ # Calculate neck position and size
312
+ neck_center_x = (pose[LEFT_SHOULDER][0] + pose[RIGHT_SHOULDER][0]) / 2 * w
313
+ neck_center_y = (pose[LEFT_SHOULDER][1] + pose[RIGHT_SHOULDER][1]) / 2 * h
314
+
315
+ # Position neck slightly above shoulders
316
+ neck_y = neck_center_y - 15
317
+
318
+ # Calculate neck width based on shoulder distance
319
+ shoulder_distance = abs(pose[LEFT_SHOULDER][0] - pose[RIGHT_SHOULDER][0]) * w
320
+ neck_width = shoulder_distance * 0.3 # Neck is about 30% of shoulder width
321
+ neck_height = 25
322
+
323
+ # Draw neck as a filled rectangle with rounded corners effect
324
+ neck_left = int(neck_center_x - neck_width / 2)
325
+ neck_right = int(neck_center_x + neck_width / 2)
326
+ neck_top = int(neck_y - neck_height / 2)
327
+ neck_bottom = int(neck_y + neck_height / 2)
328
+
329
+ # Draw neck with skin color
330
+ cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), skin_color, -1)
331
+ cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), outline_color, 2)
332
+
333
+ # Draw bow tie
334
+ bow_center_x = int(neck_center_x)
335
+ bow_center_y = int(neck_y + neck_height / 2 + 5)
336
+ bow_width = 20
337
+ bow_height = 12
338
+
339
+ # Draw left side of bow tie
340
+ left_bow_points = np.array([
341
+ [bow_center_x - bow_width//2, bow_center_y - bow_height//2],
342
+ [bow_center_x - bow_width//2 - 8, bow_center_y],
343
+ [bow_center_x - bow_width//2, bow_center_y + bow_height//2],
344
+ [bow_center_x - 2, bow_center_y + bow_height//2],
345
+ [bow_center_x - 2, bow_center_y - bow_height//2]
346
+ ], np.int32)
347
+ cv2.fillPoly(frame, [left_bow_points], bow_tie_color)
348
+ cv2.polylines(frame, [left_bow_points], True, outline_color, 1)
349
+
350
+ # Draw right side of bow tie
351
+ right_bow_points = np.array([
352
+ [bow_center_x + bow_width//2, bow_center_y - bow_height//2],
353
+ [bow_center_x + bow_width//2 + 8, bow_center_y],
354
+ [bow_center_x + bow_width//2, bow_center_y + bow_height//2],
355
+ [bow_center_x + 2, bow_center_y + bow_height//2],
356
+ [bow_center_x + 2, bow_center_y - bow_height//2]
357
+ ], np.int32)
358
+ cv2.fillPoly(frame, [right_bow_points], bow_tie_color)
359
+ cv2.polylines(frame, [right_bow_points], True, outline_color, 1)
360
+
361
+ # Draw center knot of bow tie
362
+ knot_points = np.array([
363
+ [bow_center_x - 2, bow_center_y - 3],
364
+ [bow_center_x + 2, bow_center_y - 3],
365
+ [bow_center_x + 2, bow_center_y + 3],
366
+ [bow_center_x - 2, bow_center_y + 3]
367
+ ], np.int32)
368
+ cv2.fillPoly(frame, [knot_points], bow_tie_color)
369
+ cv2.polylines(frame, [knot_points], True, outline_color, 1)
370
+
371
+ # 5. Enhanced Hands with clear finger definition (drawn last to ensure they're always in front)
372
+ for hand, hand_color in [(left_hand, (255, 0, 0)), (right_hand, (0, 0, 255))]:
373
+ if np.any(hand != 0): # Only draw if hand is detected
374
+ # Draw hand palm as a filled shape
375
+ palm_points = []
376
+ # Use wrist and base of fingers for palm
377
+ palm_indices = [0, 5, 9, 13, 17] # Wrist and base of each finger
378
+ for idx in palm_indices:
379
+ if idx < len(hand) and hand[idx][0] > 0 and hand[idx][1] > 0:
380
+ palm_points.append([int(hand[idx][0] * w), int(hand[idx][1] * h)])
381
+
382
+ if len(palm_points) > 3:
383
+ palm_points = np.array(palm_points, np.int32)
384
+ hull = cv2.convexHull(palm_points)
385
+ cv2.fillPoly(frame, [hull], (255, 182, 193)) # Light pink color for palm
386
+ cv2.polylines(frame, [hull], True, outline_color, 2)
387
+
388
+ # Draw individual fingers with clear connections
389
+ for connection in finger_connections:
390
+ start_idx, end_idx = connection
391
+ if (start_idx < len(hand) and end_idx < len(hand) and
392
+ hand[start_idx][0] > 0 and hand[start_idx][1] > 0 and
393
+ hand[end_idx][0] > 0 and hand[end_idx][1] > 0):
394
+
395
+ start_point = (int(hand[start_idx][0] * w), int(hand[start_idx][1] * h))
396
+ end_point = (int(hand[end_idx][0] * w), int(hand[end_idx][1] * h))
397
+
398
+ # Draw finger bone
399
+ cv2.line(frame, start_point, end_point, (255, 182, 193), 9) # Light pink color for finger bones
400
+ cv2.line(frame, start_point, end_point, outline_color, 1)
401
+
402
+ # Draw finger tips with emphasis
403
+ finger_tips = [THUMB_TIP, INDEX_TIP, MIDDLE_TIP, RING_TIP, PINKY_TIP]
404
+ for tip_idx in finger_tips:
405
+ if tip_idx < len(hand) and hand[tip_idx][0] > 0 and hand[tip_idx][1] > 0:
406
+ tip_x, tip_y = int(hand[tip_idx][0] * w), int(hand[tip_idx][1] * h)
407
+ # Draw larger, more visible finger tips
408
+ cv2.circle(frame, (tip_x, tip_y), 4, (255, 182, 193), -1) # Light pink color for finger tips
409
+ cv2.circle(frame, (tip_x, tip_y), 4, outline_color, 2)
410
+ # Add a small highlight
411
+ cv2.circle(frame, (tip_x-1, tip_y-1), 1, (255, 255, 255), -1)
412
+
413
+ # Draw all hand keypoints for clarity (keeping original red/blue colors for dots)
414
+ for i, (x, y) in enumerate(hand):
415
+ if x > 0 and y > 0:
416
+ point_x, point_y = int(x * w), int(y * h)
417
+ # Different colors for different parts of the hand
418
+ if i in finger_tips:
419
+ cv2.circle(frame, (point_x, point_y), 2, hand_color, -1)
420
+ else:
421
+ cv2.circle(frame, (point_x, point_y), 1, hand_color, -1)
422
+
423
+ return frame
424
+
425
+ def interpolate_keypoints(kptsA, kptsB, steps):
426
+ poseA, leftA, rightA = kptsA
427
+ poseB, leftB, rightB = kptsB
428
+
429
+ frames = []
430
+ for t in range(1, steps + 1):
431
+ alpha = t / (steps + 1)
432
+ interp_pose = (1 - alpha) * poseA + alpha * poseB
433
+
434
+ # Check if hands are detected (non-zero coordinates)
435
+ leftA_detected = np.any(leftA != 0)
436
+ rightA_detected = np.any(rightA != 0)
437
+ leftB_detected = np.any(leftB != 0)
438
+ rightB_detected = np.any(rightB != 0)
439
+
440
+ # Interpolate left hand only if both frames have detected hands
441
+ if leftA_detected and leftB_detected:
442
+ print("leftA_detected and leftB_detected")
443
+ interp_left = (1 - alpha) * leftA + alpha * leftB
444
+ elif leftA_detected:
445
+ interp_left = leftA # Keep the last known position
446
+ elif leftB_detected:
447
+ interp_left = leftB # Use the new position
448
+ else:
449
+ interp_left = np.zeros((21, 2)) # No hands detected
450
+
451
+ # Interpolate right hand only if both frames have detected hands
452
+ if rightA_detected and rightB_detected:
453
+ print("rightA_detected and rightB_detected")
454
+ interp_right = (1 - alpha) * rightA + alpha * rightB
455
+ elif rightA_detected:
456
+ interp_right = rightA # Keep the last known position
457
+ elif rightB_detected:
458
+ interp_right = rightB # Use the new position
459
+ else:
460
+ interp_right = np.zeros((21, 2)) # No hands detected
461
+
462
+ frames.append((interp_pose, interp_left, interp_right))
463
+ return frames
464
+
465
+ def create_stitched_video(videoA_path, videoB_path, output_path="stitched_output.mp4"):
466
+ # Extract keypoints from both videos
467
+ videoA_keypoints = extract_keypoints_from_video(videoA_path)
468
+ videoB_keypoints = extract_keypoints_from_video(videoB_path)
469
+
470
+ # Create video writer
471
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
472
+ out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
473
+
474
+ # Show original A
475
+ for pose, l, r in videoA_keypoints:
476
+ frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
477
+ out.write(render_person(frame, pose, l, r))
478
+
479
+ # Interpolation
480
+ interp = interpolate_keypoints(videoA_keypoints[-1], videoB_keypoints[0], steps=15)
481
+ for pose, l, r in interp:
482
+ frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
483
+ out.write(render_person(frame, pose, l, r))
484
+
485
+ # Show original B
486
+ for pose, l, r in videoB_keypoints:
487
+ frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
488
+ out.write(render_person(frame, pose, l, r))
489
+
490
+ out.release()
491
+ print(f"Video saved to {output_path}")
492
+
493
+ def create_multi_stitched_video(video_paths, output_path="multi_stitched_output.mp4", transition_steps=15):
494
+ """
495
+ Create a stitched video from multiple video files.
496
+
497
+ Args:
498
+ video_paths (list): List of paths to MP4 video files
499
+ output_path (str): Output path for the final video
500
+ transition_steps (int): Number of frames for transitions between videos
501
+ """
502
+ if len(video_paths) < 2:
503
+ print("Need at least 2 videos to stitch together!")
504
+ return
505
+
506
+ print(f"Processing {len(video_paths)} videos...")
507
+
508
+ # Extract keypoints from all videos
509
+ all_keypoints = []
510
+ for i, video_path in enumerate(video_paths):
511
+ print(f"Extracting keypoints from video {i+1}/{len(video_paths)}: {video_path}")
512
+ keypoints = extract_keypoints_from_video(video_path)
513
+ all_keypoints.append(keypoints)
514
+ print(f" - Extracted {len(keypoints)} frames")
515
+
516
+ # Create video writer
517
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
518
+ out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
519
+
520
+ total_frames = 0
521
+
522
+ # Process each video
523
+ for i, keypoints in enumerate(all_keypoints):
524
+ print(f"Rendering video {i+1}/{len(all_keypoints)}...")
525
+
526
+ # Render all frames from current video
527
+ for pose, l, r in keypoints:
528
+ frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
529
+ out.write(render_person(frame, pose, l, r))
530
+ total_frames += 1
531
+
532
+ # Add transition to next video (except for the last video)
533
+ if i < len(all_keypoints) - 1:
534
+ print(f" Adding transition to next video...")
535
+ next_keypoints = all_keypoints[i + 1]
536
+
537
+ # Interpolate between last frame of current video and first frame of next video
538
+ interp = interpolate_keypoints(keypoints[-1], next_keypoints[0], steps=transition_steps)
539
+ for pose, l, r in interp:
540
+ frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
541
+ out.write(render_person(frame, pose, l, r))
542
+ total_frames += 1
543
+
544
+ out.release()
545
+ print(f"Multi-stitched video saved to {output_path}")
546
+ print(f"Total frames rendered: {total_frames}")
547
+ print(f"Video duration: {total_frames/30:.2f} seconds")
548
+
549
+ if __name__ == "__main__":
550
+ # Example usage for multiple videos
551
+ video_list = [
552
+ "/Users/ethantam/desktop/35304.mp4",
553
+ "/Users/ethantam/desktop/23978.mp4",
554
+ "/Users/ethantam/desktop/23106.mp4",
555
+ # Add more video paths here as needed
556
+ ]
557
+
558
+ # Create multi-stitched video
559
+ create_multi_stitched_video(video_list, "multi_stitched_output_1.mp4")
560
+
561
+ # Or use the original 2-video function
562
+ # create_stitched_video("/Users/ethantam/desktop/35304.mp4", "/Users/ethantam/desktop/23978.mp4")
videos_rows.csv ADDED
The diff for this file is too large to render. See raw diff