Spaces:
Running
Running
set up app.py with fully integrated text to ASL video flow
Browse files- .gitignore +2 -0
- .gradio/certificate.pem +31 -0
- __pycache__/asl_gloss.cpython-311.pyc +0 -0
- __pycache__/asl_glosser.cpython-311.pyc +0 -0
- __pycache__/document_parser.cpython-311.pyc +0 -0
- __pycache__/document_parsing.cpython-311.pyc +0 -0
- __pycache__/document_parsing.cpython-313.pyc +0 -0
- __pycache__/document_to_gloss.cpython-311.pyc +0 -0
- __pycache__/document_to_gloss.cpython-313.pyc +0 -0
- __pycache__/vectorizer.cpython-311.pyc +0 -0
- __pycache__/video_gen.cpython-311.pyc +0 -0
- __pycache__/word2vec.cpython-311.pyc +0 -0
- app.py +173 -4
- asl_gloss.py +323 -0
- document_parsing.py +263 -0
- document_to_gloss.py +280 -0
- requirements.txt +16 -0
- test.py +85 -0
- vectorizer.py +155 -0
- video_gen.py +562 -0
- videos_rows.csv +0 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.env
|
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
__pycache__/asl_gloss.cpython-311.pyc
ADDED
Binary file (14.3 kB). View file
|
|
__pycache__/asl_glosser.cpython-311.pyc
ADDED
Binary file (12.1 kB). View file
|
|
__pycache__/document_parser.cpython-311.pyc
ADDED
Binary file (12.4 kB). View file
|
|
__pycache__/document_parsing.cpython-311.pyc
ADDED
Binary file (12.4 kB). View file
|
|
__pycache__/document_parsing.cpython-313.pyc
ADDED
Binary file (10.6 kB). View file
|
|
__pycache__/document_to_gloss.cpython-311.pyc
ADDED
Binary file (12.1 kB). View file
|
|
__pycache__/document_to_gloss.cpython-313.pyc
ADDED
Binary file (10.6 kB). View file
|
|
__pycache__/vectorizer.cpython-311.pyc
ADDED
Binary file (7.07 kB). View file
|
|
__pycache__/video_gen.cpython-311.pyc
ADDED
Binary file (29.8 kB). View file
|
|
__pycache__/word2vec.cpython-311.pyc
ADDED
Binary file (7.56 kB). View file
|
|
app.py
CHANGED
@@ -1,7 +1,176 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from document_to_gloss import DocumentToASLConverter
|
2 |
+
from vectorizer import Vectorizer
|
3 |
+
from video_gen import create_multi_stitched_video
|
4 |
import gradio as gr
|
5 |
+
import asyncio
|
6 |
+
import re
|
7 |
+
import boto3
|
8 |
+
import os
|
9 |
+
from botocore.config import Config
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
import requests
|
12 |
+
import tempfile
|
13 |
|
14 |
+
# Load environment variables from .env file
|
15 |
+
load_dotenv()
|
16 |
|
17 |
+
# Load R2/S3 environment secrets
|
18 |
+
R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
|
19 |
+
R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
|
20 |
+
R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
|
21 |
+
|
22 |
+
# Validate that required environment variables are set
|
23 |
+
if not all([R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
|
24 |
+
raise ValueError("Missing required R2 environment variables. Please check your .env file.")
|
25 |
+
|
26 |
+
title = "AI-SL"
|
27 |
+
description = "Convert text to ASL!"
|
28 |
+
article = ("<p style='text-align: center'><a href='https://github.com/deenasun' "
|
29 |
+
"target='_blank'>Deena Sun on Github</a></p>")
|
30 |
+
inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
|
31 |
+
outputs = [
|
32 |
+
gr.JSON(label="Processing Results"),
|
33 |
+
gr.Video(label="ASL Video Output")
|
34 |
+
]
|
35 |
+
|
36 |
+
asl_converter = DocumentToASLConverter()
|
37 |
+
vectorizer = Vectorizer()
|
38 |
+
session = boto3.session.Session()
|
39 |
+
|
40 |
+
s3 = session.client(
|
41 |
+
service_name='s3',
|
42 |
+
region_name='auto',
|
43 |
+
endpoint_url=R2_ENDPOINT,
|
44 |
+
aws_access_key_id=R2_ACCESS_KEY_ID,
|
45 |
+
aws_secret_access_key=R2_SECRET_ACCESS_KEY,
|
46 |
+
config=Config(signature_version='s3v4')
|
47 |
+
)
|
48 |
+
|
49 |
+
def clean_gloss_token(token):
|
50 |
+
"""
|
51 |
+
Clean a gloss token by removing brackets, newlines, and extra whitespace
|
52 |
+
"""
|
53 |
+
# Remove brackets and newlines
|
54 |
+
cleaned = re.sub(r'[\[\]\n\r]', '', token)
|
55 |
+
# Remove extra whitespace
|
56 |
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
57 |
+
cleaned = cleaned.lower()
|
58 |
+
return cleaned
|
59 |
+
|
60 |
+
async def parse_vectorize_and_search(file):
|
61 |
+
print(file)
|
62 |
+
gloss = asl_converter.convert_document(file)
|
63 |
+
print("ASL", gloss)
|
64 |
+
|
65 |
+
# Split by spaces and clean each token
|
66 |
+
gloss_tokens = gloss.split()
|
67 |
+
cleaned_tokens = []
|
68 |
+
|
69 |
+
for token in gloss_tokens:
|
70 |
+
cleaned = clean_gloss_token(token)
|
71 |
+
if cleaned: # Only add non-empty tokens
|
72 |
+
cleaned_tokens.append(cleaned)
|
73 |
+
|
74 |
+
print("Cleaned tokens:", cleaned_tokens)
|
75 |
+
|
76 |
+
videos = []
|
77 |
+
video_files = [] # Store local file paths for stitching
|
78 |
+
|
79 |
+
for g in cleaned_tokens:
|
80 |
+
print(f"Processing {g}")
|
81 |
+
try:
|
82 |
+
result = await vectorizer.vector_query_from_supabase(query=g)
|
83 |
+
print("result", result)
|
84 |
+
if result.get("match", False):
|
85 |
+
video_url = result["video_url"]
|
86 |
+
videos.append(video_url)
|
87 |
+
|
88 |
+
# Download the video
|
89 |
+
local_path = download_video_from_url(video_url)
|
90 |
+
if local_path:
|
91 |
+
video_files.append(local_path)
|
92 |
+
|
93 |
+
except Exception as e:
|
94 |
+
print(f"Error processing {g}: {e}")
|
95 |
+
continue
|
96 |
+
|
97 |
+
# Create stitched video if we have multiple videos
|
98 |
+
stitched_video_path = None
|
99 |
+
if len(video_files) > 1:
|
100 |
+
try:
|
101 |
+
print(f"Creating stitched video from {len(video_files)} videos...")
|
102 |
+
stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
103 |
+
create_multi_stitched_video(video_files, stitched_video_path)
|
104 |
+
print(f"Stitched video created: {stitched_video_path}")
|
105 |
+
except Exception as e:
|
106 |
+
print(f"Error creating stitched video: {e}")
|
107 |
+
stitched_video_path = None
|
108 |
+
elif len(video_files) == 1:
|
109 |
+
# If only one video, just use it directly
|
110 |
+
stitched_video_path = video_files[0]
|
111 |
+
|
112 |
+
# Clean up individual video files after stitching
|
113 |
+
for video_file in video_files:
|
114 |
+
if video_file != stitched_video_path: # Don't delete the final output
|
115 |
+
cleanup_temp_video(video_file)
|
116 |
+
|
117 |
+
return {
|
118 |
+
"status": "success",
|
119 |
+
"videos": videos,
|
120 |
+
"video_count": len(videos),
|
121 |
+
"gloss": gloss,
|
122 |
+
"cleaned_tokens": cleaned_tokens
|
123 |
+
}, stitched_video_path
|
124 |
+
|
125 |
+
# Create a synchronous wrapper for Gradio
|
126 |
+
def parse_vectorize_and_search_sync(file):
|
127 |
+
return asyncio.run(parse_vectorize_and_search(file))
|
128 |
+
|
129 |
+
def download_video_from_url(video_url):
|
130 |
+
"""
|
131 |
+
Download a video from a public R2 URL
|
132 |
+
Returns the local file path where the video is saved
|
133 |
+
"""
|
134 |
+
try:
|
135 |
+
# Create a temporary file with .mp4 extension
|
136 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
137 |
+
temp_path = temp_file.name
|
138 |
+
temp_file.close()
|
139 |
+
|
140 |
+
# Download the video
|
141 |
+
print(f"Downloading video from: {video_url}")
|
142 |
+
response = requests.get(video_url, stream=True)
|
143 |
+
response.raise_for_status()
|
144 |
+
|
145 |
+
# Save to temporary file
|
146 |
+
with open(temp_path, 'wb') as f:
|
147 |
+
for chunk in response.iter_content(chunk_size=8192):
|
148 |
+
f.write(chunk)
|
149 |
+
|
150 |
+
print(f"Video downloaded to: {temp_path}")
|
151 |
+
return temp_path
|
152 |
+
|
153 |
+
except Exception as e:
|
154 |
+
print(f"Error downloading video: {e}")
|
155 |
+
return None
|
156 |
+
|
157 |
+
def cleanup_temp_video(file_path):
|
158 |
+
"""
|
159 |
+
Clean up temporary video file
|
160 |
+
"""
|
161 |
+
try:
|
162 |
+
if file_path and os.path.exists(file_path):
|
163 |
+
os.unlink(file_path)
|
164 |
+
print(f"Cleaned up: {file_path}")
|
165 |
+
except Exception as e:
|
166 |
+
print(f"Error cleaning up file: {e}")
|
167 |
+
|
168 |
+
intf = gr.Interface(
|
169 |
+
fn=parse_vectorize_and_search_sync,
|
170 |
+
inputs=inputs,
|
171 |
+
outputs=outputs,
|
172 |
+
title=title,
|
173 |
+
description=description,
|
174 |
+
article=article
|
175 |
+
)
|
176 |
+
intf.launch(share=True)
|
asl_gloss.py
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
ASL Gloss Converter using Claude API
|
4 |
+
|
5 |
+
This script converts English text input and convert it to ASL gloss format.
|
6 |
+
ASL gloss is a written representation of sign language
|
7 |
+
that preserves the spatial and grammatical structure of ASL.
|
8 |
+
"""
|
9 |
+
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
import argparse
|
13 |
+
import json
|
14 |
+
from typing import Optional, Dict, Any
|
15 |
+
from pathlib import Path
|
16 |
+
|
17 |
+
try:
|
18 |
+
import anthropic
|
19 |
+
except ImportError:
|
20 |
+
print("Error: anthropic package not found. Please install it with:")
|
21 |
+
print("pip install anthropic")
|
22 |
+
sys.exit(1)
|
23 |
+
|
24 |
+
|
25 |
+
class ASLGlossConverter:
|
26 |
+
"""
|
27 |
+
Converts English text to ASL gloss using Claude's API.
|
28 |
+
|
29 |
+
ASL gloss preserves the spatial and grammatical structure of American Sign Language,
|
30 |
+
including features like:
|
31 |
+
- Topic-comment structure
|
32 |
+
- Spatial referencing
|
33 |
+
- Non-manual markers (facial expressions, head movements)
|
34 |
+
- Classifier predicates
|
35 |
+
- Time indicators
|
36 |
+
"""
|
37 |
+
|
38 |
+
def __init__(self, api_key: Optional[str] = None):
|
39 |
+
"""
|
40 |
+
Initialize the ASL gloss converter.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
|
44 |
+
"""
|
45 |
+
self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
|
46 |
+
if not self.api_key:
|
47 |
+
raise ValueError(
|
48 |
+
"API key not provided. Set ANTHROPIC_API_KEY environment variable "
|
49 |
+
"or pass api_key parameter."
|
50 |
+
)
|
51 |
+
|
52 |
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
53 |
+
|
54 |
+
# System prompt that defines ASL gloss conversion rules
|
55 |
+
self.system_prompt = """You are an expert in American Sign Language (ASL) and ASL gloss. Your task is to convert English text to ASL gloss format with bracketed phrases.
|
56 |
+
|
57 |
+
ASL GLOSS RULES:
|
58 |
+
1. Use ALL CAPS for all signs
|
59 |
+
2. Group related words/concepts into bracketed phrases [PHRASE]
|
60 |
+
3. Use underscores (_) to connect words within a phrase that are signed together
|
61 |
+
4. Use classifiers (CL:1, CL:3, CL:C, etc.) for spatial relationships
|
62 |
+
5. Use pronouns: I (first person), YOU (second person), HE/SHE/THEY (third person)
|
63 |
+
6. Use time indicators: PAST, FUTURE, NOW, ALWAYS, NEVER
|
64 |
+
7. Use topic-comment structure: TOPIC COMMENT
|
65 |
+
8. Use rhetorical questions: RHQ
|
66 |
+
9. Use conditional markers: IF-THEN
|
67 |
+
10. Use negation: NOT, NONE, CAN'T, DON'T-WANT
|
68 |
+
11. Use aspect markers: FINISH, CONTINUE, REPEAT
|
69 |
+
12. Use directional verbs: GIVE-TO, TELL-TO, ASK-TO
|
70 |
+
13. Use location markers: HERE, THERE, WHERE
|
71 |
+
14. Use manner adverbs: FAST, SLOW, CAREFUL, HARD
|
72 |
+
|
73 |
+
PHRASE GROUPING GUIDELINES:
|
74 |
+
- Group compound expressions: [GOOD_MORNING], [THANK_YOU], [HOW_ARE_YOU]
|
75 |
+
- Keep names as single phrases: [JOHN], [NATALIIA], [CHRISTOPHER_ROBIN]
|
76 |
+
- Group related concepts: [MY_NAME], [YOUR_HOUSE], [LAST_WEEK]
|
77 |
+
- Keep simple words separate: [I] [LOVE] [YOU]
|
78 |
+
|
79 |
+
IMPORTANT: Output ONLY the bracketed ASL phrases. Each phrase should be in ALL CAPS with underscores connecting related words.
|
80 |
+
|
81 |
+
EXAMPLES:
|
82 |
+
- "Good morning, Brian" → [GOOD_MORNING] [BRIAN]
|
83 |
+
- "My name is Nataliia" → [I] [NAME] [NATALIIA]
|
84 |
+
- "I love you" → [I] [LOVE] [YOU]
|
85 |
+
- "What is your name?" → [YOU] [NAME] [WHAT]
|
86 |
+
- "I don't understand" → [I] [UNDERSTAND] [NOT]
|
87 |
+
- "Where is the bathroom?" → [BATHROOM] [WHERE]
|
88 |
+
- "I want to go home" → [I] [WANT] [GO] [HOME]
|
89 |
+
- "The cat is sleeping" → [CAT] [SLEEP]
|
90 |
+
- "I finished my homework" → [I] [HOMEWORK] [FINISH]
|
91 |
+
- "Do you want coffee?" → [YOU] [WANT] [COFFEE]
|
92 |
+
- "I can't hear you" → [I] [HEAR] [YOU] [CAN'T]
|
93 |
+
- "The weather is nice today" → [TODAY] [WEATHER] [NICE]
|
94 |
+
- "Thank you very much" → [THANK_YOU] [VERY_MUCH]
|
95 |
+
- "How are you doing?" → [HOW_ARE_YOU] [DOING]
|
96 |
+
- "See you later" → [SEE_YOU_LATER]
|
97 |
+
- "I work at Google" → [I] [WORK] [GOOGLE]
|
98 |
+
|
99 |
+
Convert the given English text to proper ASL gloss format with bracketed phrases, maintaining the meaning and intent while following ASL grammar and structure."""
|
100 |
+
|
101 |
+
def convert_text(self, english_text: str) -> str:
|
102 |
+
"""
|
103 |
+
Convert English text to ASL gloss using Anthropic v1.x messages API.
|
104 |
+
"""
|
105 |
+
try:
|
106 |
+
message = self.client.messages.create(
|
107 |
+
model="claude-3-5-sonnet-20240620",
|
108 |
+
max_tokens=1000,
|
109 |
+
system=self.system_prompt,
|
110 |
+
messages=[
|
111 |
+
{"role": "user", "content": f"Convert this English text to ASL gloss:\n\n{english_text}"}
|
112 |
+
]
|
113 |
+
)
|
114 |
+
return message.content[0].text.strip()
|
115 |
+
except Exception as e:
|
116 |
+
raise Exception(f"Error converting text to ASL gloss: {str(e)}")
|
117 |
+
|
118 |
+
def convert_file(self, input_file: str, output_file: Optional[str] = None) -> str:
|
119 |
+
"""
|
120 |
+
Convert text from a file to ASL gloss.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
input_file: Path to input text file
|
124 |
+
output_file: Path to output file (optional)
|
125 |
+
|
126 |
+
Returns:
|
127 |
+
The ASL gloss text
|
128 |
+
"""
|
129 |
+
try:
|
130 |
+
# Read input file
|
131 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
132 |
+
english_text = f.read().strip()
|
133 |
+
|
134 |
+
if not english_text:
|
135 |
+
raise ValueError("Input file is empty")
|
136 |
+
|
137 |
+
# Convert to ASL gloss
|
138 |
+
asl_gloss = self.convert_text(english_text)
|
139 |
+
|
140 |
+
# Write to output file if specified
|
141 |
+
if output_file:
|
142 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
143 |
+
f.write(asl_gloss)
|
144 |
+
print(f"ASL gloss saved to: {output_file}")
|
145 |
+
|
146 |
+
return asl_gloss
|
147 |
+
|
148 |
+
except FileNotFoundError:
|
149 |
+
raise Exception(f"Input file not found: {input_file}")
|
150 |
+
except Exception as e:
|
151 |
+
raise Exception(f"Error processing file: {str(e)}")
|
152 |
+
|
153 |
+
def batch_convert(self, input_files: list, output_dir: Optional[str] = None) -> Dict[str, str]:
|
154 |
+
"""
|
155 |
+
Convert multiple files to ASL gloss.
|
156 |
+
|
157 |
+
Args:
|
158 |
+
input_files: List of input file paths
|
159 |
+
output_dir: Directory to save output files (optional)
|
160 |
+
|
161 |
+
Returns:
|
162 |
+
Dictionary mapping input files to their ASL gloss
|
163 |
+
"""
|
164 |
+
results = {}
|
165 |
+
|
166 |
+
for input_file in input_files:
|
167 |
+
try:
|
168 |
+
print(f"Converting: {input_file}")
|
169 |
+
|
170 |
+
if output_dir:
|
171 |
+
# Create output filename
|
172 |
+
input_path = Path(input_file)
|
173 |
+
output_filename = f"{input_path.stem}_asl_gloss{input_path.suffix}"
|
174 |
+
output_file = Path(output_dir) / output_filename
|
175 |
+
else:
|
176 |
+
output_file = None
|
177 |
+
|
178 |
+
asl_gloss = self.convert_file(input_file, str(output_file) if output_file else None)
|
179 |
+
results[input_file] = asl_gloss
|
180 |
+
|
181 |
+
print(f"✓ Completed: {input_file}")
|
182 |
+
|
183 |
+
except Exception as e:
|
184 |
+
print(f"✗ Error processing {input_file}: {str(e)}")
|
185 |
+
results[input_file] = f"ERROR: {str(e)}"
|
186 |
+
|
187 |
+
return results
|
188 |
+
|
189 |
+
|
190 |
+
def main():
|
191 |
+
"""Main function for command-line usage."""
|
192 |
+
parser = argparse.ArgumentParser(
|
193 |
+
description="Convert English text to ASL gloss using Claude's API",
|
194 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
195 |
+
epilog="""
|
196 |
+
Examples:
|
197 |
+
# Convert text directly
|
198 |
+
python asl_gloss.py "Hello, how are you?"
|
199 |
+
|
200 |
+
# Convert from file
|
201 |
+
python asl_gloss.py -f input.txt
|
202 |
+
|
203 |
+
# Convert from file with output
|
204 |
+
python asl_gloss.py -f input.txt -o output.txt
|
205 |
+
|
206 |
+
# Batch convert multiple files
|
207 |
+
python asl_gloss.py -b file1.txt file2.txt -d output_dir/
|
208 |
+
|
209 |
+
# Interactive mode
|
210 |
+
python asl_gloss.py -i
|
211 |
+
"""
|
212 |
+
)
|
213 |
+
|
214 |
+
parser.add_argument(
|
215 |
+
'text',
|
216 |
+
nargs='?',
|
217 |
+
help='English text to convert to ASL gloss'
|
218 |
+
)
|
219 |
+
|
220 |
+
parser.add_argument(
|
221 |
+
'-f', '--file',
|
222 |
+
help='Input file containing English text'
|
223 |
+
)
|
224 |
+
|
225 |
+
parser.add_argument(
|
226 |
+
'-o', '--output',
|
227 |
+
help='Output file for ASL gloss'
|
228 |
+
)
|
229 |
+
|
230 |
+
parser.add_argument(
|
231 |
+
'-b', '--batch',
|
232 |
+
nargs='+',
|
233 |
+
help='Batch convert multiple files'
|
234 |
+
)
|
235 |
+
|
236 |
+
parser.add_argument(
|
237 |
+
'-d', '--output-dir',
|
238 |
+
help='Output directory for batch conversion'
|
239 |
+
)
|
240 |
+
|
241 |
+
parser.add_argument(
|
242 |
+
'-i', '--interactive',
|
243 |
+
action='store_true',
|
244 |
+
help='Run in interactive mode'
|
245 |
+
)
|
246 |
+
|
247 |
+
parser.add_argument(
|
248 |
+
'--api-key',
|
249 |
+
help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
|
250 |
+
)
|
251 |
+
|
252 |
+
args = parser.parse_args()
|
253 |
+
|
254 |
+
try:
|
255 |
+
# Initialize converter
|
256 |
+
converter = ASLGlossConverter(api_key=args.api_key)
|
257 |
+
|
258 |
+
if args.interactive:
|
259 |
+
print("ASL Gloss Converter - Interactive Mode")
|
260 |
+
print("Enter English text to convert to ASL gloss (or 'quit' to exit):")
|
261 |
+
print("-" * 50)
|
262 |
+
|
263 |
+
while True:
|
264 |
+
try:
|
265 |
+
text = input("\nEnglish text: ").strip()
|
266 |
+
if text.lower() in ['quit', 'exit', 'q']:
|
267 |
+
break
|
268 |
+
|
269 |
+
if not text:
|
270 |
+
continue
|
271 |
+
|
272 |
+
print("Converting...")
|
273 |
+
asl_gloss = converter.convert_text(text)
|
274 |
+
print(f"ASL Gloss: {asl_gloss}")
|
275 |
+
|
276 |
+
except KeyboardInterrupt:
|
277 |
+
print("\nExiting...")
|
278 |
+
break
|
279 |
+
except Exception as e:
|
280 |
+
print(f"Error: {str(e)}")
|
281 |
+
|
282 |
+
elif args.batch:
|
283 |
+
if not args.batch:
|
284 |
+
print("Error: No files specified for batch conversion")
|
285 |
+
return 1
|
286 |
+
|
287 |
+
print(f"Batch converting {len(args.batch)} files...")
|
288 |
+
results = converter.batch_convert(args.batch, args.output_dir)
|
289 |
+
|
290 |
+
print("\nResults:")
|
291 |
+
for input_file, result in results.items():
|
292 |
+
print(f"\n{input_file}:")
|
293 |
+
print(result)
|
294 |
+
|
295 |
+
elif args.file:
|
296 |
+
asl_gloss = converter.convert_file(args.file, args.output)
|
297 |
+
if not args.output:
|
298 |
+
print("ASL Gloss:")
|
299 |
+
print(asl_gloss)
|
300 |
+
|
301 |
+
elif args.text:
|
302 |
+
asl_gloss = converter.convert_text(args.text)
|
303 |
+
print("ASL Gloss:")
|
304 |
+
print(asl_gloss)
|
305 |
+
|
306 |
+
if args.output:
|
307 |
+
with open(args.output, 'w', encoding='utf-8') as f:
|
308 |
+
f.write(asl_gloss)
|
309 |
+
print(f"\nSaved to: {args.output}")
|
310 |
+
|
311 |
+
else:
|
312 |
+
parser.print_help()
|
313 |
+
return 1
|
314 |
+
|
315 |
+
return 0
|
316 |
+
|
317 |
+
except Exception as e:
|
318 |
+
print(f"Error: {str(e)}")
|
319 |
+
return 1
|
320 |
+
|
321 |
+
|
322 |
+
if __name__ == "__main__":
|
323 |
+
sys.exit(main())
|
document_parsing.py
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Optional, Union
|
5 |
+
import logging
|
6 |
+
|
7 |
+
# Import document parsing libraries
|
8 |
+
try:
|
9 |
+
import PyPDF2
|
10 |
+
from docx import Document
|
11 |
+
import ebooklib
|
12 |
+
from ebooklib import epub
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
+
except ImportError as e:
|
15 |
+
print(f"Missing required dependency: {e}")
|
16 |
+
print("Please install dependencies with: pip install -r requirements.txt")
|
17 |
+
sys.exit(1)
|
18 |
+
|
19 |
+
# Configure logging
|
20 |
+
logging.basicConfig(level=logging.INFO)
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
+
|
23 |
+
|
24 |
+
class DocumentParser:
|
25 |
+
"""
|
26 |
+
A class to parse and extract text from various document formats.
|
27 |
+
Supports PDF, TXT, DOC, DOCX, and EPUB files.
|
28 |
+
"""
|
29 |
+
|
30 |
+
def __init__(self):
|
31 |
+
self.supported_formats = {
|
32 |
+
'application/pdf': self._parse_pdf,
|
33 |
+
'text/plain': self._parse_txt,
|
34 |
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._parse_docx,
|
35 |
+
'application/msword': self._parse_doc,
|
36 |
+
'application/epub+zip': self._parse_epub
|
37 |
+
}
|
38 |
+
|
39 |
+
def get_file_type(self, file_path: Union[str, Path]) -> str:
|
40 |
+
"""
|
41 |
+
Detect the MIME type of a file using file extension.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
file_path: Path to the file
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
MIME type string
|
48 |
+
"""
|
49 |
+
return self._get_mime_from_extension(file_path)
|
50 |
+
|
51 |
+
def _get_mime_from_extension(self, file_path: Union[str, Path]) -> str:
|
52 |
+
"""
|
53 |
+
Determine MIME type from file extension.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
file_path: Path to the file
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
MIME type string
|
60 |
+
"""
|
61 |
+
extension = Path(file_path).suffix.lower()
|
62 |
+
extension_map = {
|
63 |
+
'.pdf': 'application/pdf',
|
64 |
+
'.txt': 'text/plain',
|
65 |
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
66 |
+
'.doc': 'application/msword',
|
67 |
+
'.epub': 'application/epub+zip'
|
68 |
+
}
|
69 |
+
return extension_map.get(extension, 'unknown')
|
70 |
+
|
71 |
+
def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
|
72 |
+
"""
|
73 |
+
Extract text from a document file.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
file_path: Path to the document file
|
77 |
+
|
78 |
+
Returns:
|
79 |
+
Extracted text as string, or None if extraction fails
|
80 |
+
"""
|
81 |
+
file_path = Path(file_path)
|
82 |
+
|
83 |
+
if not file_path.exists():
|
84 |
+
logger.error(f"File not found: {file_path}")
|
85 |
+
return None
|
86 |
+
|
87 |
+
try:
|
88 |
+
mime_type = self.get_file_type(file_path)
|
89 |
+
logger.info(f"Detected file type: {mime_type}")
|
90 |
+
|
91 |
+
if mime_type in self.supported_formats:
|
92 |
+
return self.supported_formats[mime_type](file_path)
|
93 |
+
else:
|
94 |
+
logger.error(f"Unsupported file type: {mime_type}")
|
95 |
+
return None
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Error extracting text from {file_path}: {e}")
|
99 |
+
return None
|
100 |
+
|
101 |
+
def _parse_pdf(self, file_path: Path) -> str:
|
102 |
+
"""
|
103 |
+
Extract text from PDF file.
|
104 |
+
|
105 |
+
Args:
|
106 |
+
file_path: Path to PDF file
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
Extracted text
|
110 |
+
"""
|
111 |
+
text = ""
|
112 |
+
try:
|
113 |
+
with open(file_path, 'rb') as file:
|
114 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
115 |
+
|
116 |
+
for page_num in range(len(pdf_reader.pages)):
|
117 |
+
page = pdf_reader.pages[page_num]
|
118 |
+
page_text = page.extract_text()
|
119 |
+
if page_text:
|
120 |
+
text += page_text + "\n"
|
121 |
+
|
122 |
+
except Exception as e:
|
123 |
+
logger.error(f"Error parsing PDF {file_path}: {e}")
|
124 |
+
raise
|
125 |
+
|
126 |
+
return text.strip()
|
127 |
+
|
128 |
+
def _parse_txt(self, file_path: Path) -> str:
|
129 |
+
"""
|
130 |
+
Extract text from plain text file.
|
131 |
+
|
132 |
+
Args:
|
133 |
+
file_path: Path to text file
|
134 |
+
|
135 |
+
Returns:
|
136 |
+
Extracted text
|
137 |
+
"""
|
138 |
+
try:
|
139 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
140 |
+
return file.read()
|
141 |
+
except UnicodeDecodeError:
|
142 |
+
# Try with different encoding
|
143 |
+
try:
|
144 |
+
with open(file_path, 'r', encoding='latin-1') as file:
|
145 |
+
return file.read()
|
146 |
+
except Exception as e:
|
147 |
+
logger.error(f"Error reading text file {file_path}: {e}")
|
148 |
+
raise
|
149 |
+
except Exception as e:
|
150 |
+
logger.error(f"Error reading text file {file_path}: {e}")
|
151 |
+
raise
|
152 |
+
|
153 |
+
def _parse_docx(self, file_path: Path) -> str:
|
154 |
+
"""
|
155 |
+
Extract text from DOCX file.
|
156 |
+
|
157 |
+
Args:
|
158 |
+
file_path: Path to DOCX file
|
159 |
+
|
160 |
+
Returns:
|
161 |
+
Extracted text
|
162 |
+
"""
|
163 |
+
try:
|
164 |
+
doc = Document(file_path)
|
165 |
+
text = ""
|
166 |
+
|
167 |
+
for paragraph in doc.paragraphs:
|
168 |
+
text += paragraph.text + "\n"
|
169 |
+
|
170 |
+
return text.strip()
|
171 |
+
|
172 |
+
except Exception as e:
|
173 |
+
logger.error(f"Error parsing DOCX {file_path}: {e}")
|
174 |
+
raise
|
175 |
+
|
176 |
+
def _parse_doc(self, file_path: Path) -> str:
|
177 |
+
"""
|
178 |
+
Extract text from DOC file (legacy Word format).
|
179 |
+
Note: This requires additional dependencies like antiword or catdoc.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
file_path: Path to DOC file
|
183 |
+
|
184 |
+
Returns:
|
185 |
+
Extracted text
|
186 |
+
"""
|
187 |
+
try:
|
188 |
+
# Try using antiword if available
|
189 |
+
import subprocess
|
190 |
+
result = subprocess.run(['antiword', str(file_path)],
|
191 |
+
capture_output=True, text=True)
|
192 |
+
if result.returncode == 0:
|
193 |
+
return result.stdout.strip()
|
194 |
+
|
195 |
+
# Fallback: try catdoc
|
196 |
+
result = subprocess.run(['catdoc', str(file_path)],
|
197 |
+
capture_output=True, text=True)
|
198 |
+
if result.returncode == 0:
|
199 |
+
return result.stdout.strip()
|
200 |
+
|
201 |
+
raise Exception("Neither antiword nor catdoc found. Please install one of them.")
|
202 |
+
|
203 |
+
except FileNotFoundError:
|
204 |
+
raise Exception("antiword or catdoc not found. Please install one of them for DOC file support.")
|
205 |
+
except Exception as e:
|
206 |
+
logger.error(f"Error parsing DOC {file_path}: {e}")
|
207 |
+
raise
|
208 |
+
|
209 |
+
def _parse_epub(self, file_path: Path) -> str:
|
210 |
+
"""
|
211 |
+
Extract text from EPUB file.
|
212 |
+
|
213 |
+
Args:
|
214 |
+
file_path: Path to EPUB file
|
215 |
+
|
216 |
+
Returns:
|
217 |
+
Extracted text
|
218 |
+
"""
|
219 |
+
try:
|
220 |
+
book = epub.read_epub(file_path)
|
221 |
+
text = ""
|
222 |
+
|
223 |
+
for item in book.get_items():
|
224 |
+
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
225 |
+
content = item.get_content().decode('utf-8')
|
226 |
+
soup = BeautifulSoup(content, 'html.parser')
|
227 |
+
text += soup.get_text() + "\n"
|
228 |
+
|
229 |
+
return text.strip()
|
230 |
+
|
231 |
+
except Exception as e:
|
232 |
+
logger.error(f"Error parsing EPUB {file_path}: {e}")
|
233 |
+
raise
|
234 |
+
|
235 |
+
|
236 |
+
def main():
|
237 |
+
"""
|
238 |
+
Main function to demonstrate usage of the DocumentParser.
|
239 |
+
"""
|
240 |
+
if len(sys.argv) != 2:
|
241 |
+
print("Usage: python document_parsing.py <file_path>")
|
242 |
+
print("Supported formats: PDF, TXT, DOC, DOCX, EPUB")
|
243 |
+
sys.exit(1)
|
244 |
+
|
245 |
+
file_path = sys.argv[1]
|
246 |
+
parser = DocumentParser()
|
247 |
+
|
248 |
+
print(f"Extracting text from: {file_path}")
|
249 |
+
print("-" * 50)
|
250 |
+
|
251 |
+
extracted_text = parser.extract_text(file_path)
|
252 |
+
|
253 |
+
if extracted_text:
|
254 |
+
print("Extracted text:")
|
255 |
+
print(extracted_text)
|
256 |
+
print(f"\nTotal characters: {len(extracted_text)}")
|
257 |
+
else:
|
258 |
+
print("Failed to extract text from the file.")
|
259 |
+
sys.exit(1)
|
260 |
+
|
261 |
+
|
262 |
+
if __name__ == "__main__":
|
263 |
+
main()
|
document_to_gloss.py
ADDED
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Document to ASL Gloss Converter
|
4 |
+
|
5 |
+
This script combines document parsing and ASL glossing to convert
|
6 |
+
uploaded documents (PDF, TXT, DOC, DOCX, EPUB) directly to ASL gloss format.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import sys
|
11 |
+
import argparse
|
12 |
+
from typing import Optional, Dict, Any
|
13 |
+
from pathlib import Path
|
14 |
+
|
15 |
+
# Import our existing modules
|
16 |
+
from document_parsing import DocumentParser
|
17 |
+
from asl_gloss import ASLGlossConverter
|
18 |
+
|
19 |
+
|
20 |
+
class DocumentToASLConverter:
|
21 |
+
"""
|
22 |
+
Combines document parsing and ASL glossing functionality.
|
23 |
+
Extracts text from various document formats and converts to ASL gloss.
|
24 |
+
"""
|
25 |
+
|
26 |
+
def __init__(self, api_key: Optional[str] = None):
|
27 |
+
"""
|
28 |
+
Initialize the document to ASL converter.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
|
32 |
+
"""
|
33 |
+
self.document_parser = DocumentParser()
|
34 |
+
self.asl_converter = ASLGlossConverter(api_key=api_key)
|
35 |
+
|
36 |
+
def convert_document(self, document_path: str, output_file: Optional[str] = None) -> str:
|
37 |
+
"""
|
38 |
+
Convert a document file to ASL gloss.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
document_path: Path to the document file
|
42 |
+
output_file: Path to output file (optional)
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
The ASL gloss text
|
46 |
+
"""
|
47 |
+
try:
|
48 |
+
print(f"Processing document: {document_path}")
|
49 |
+
|
50 |
+
# Step 1: Extract text from document
|
51 |
+
print("Step 1: Extracting text from document...")
|
52 |
+
extracted_text = self.document_parser.extract_text(document_path)
|
53 |
+
|
54 |
+
if not extracted_text:
|
55 |
+
raise Exception("Failed to extract text from document")
|
56 |
+
|
57 |
+
print(f"✓ Extracted {len(extracted_text)} characters")
|
58 |
+
|
59 |
+
# Step 2: Convert text to ASL gloss
|
60 |
+
print("Step 2: Converting to ASL gloss...")
|
61 |
+
asl_gloss = self.asl_converter.convert_text(extracted_text)
|
62 |
+
|
63 |
+
print("✓ ASL gloss conversion completed")
|
64 |
+
|
65 |
+
# Step 3: Save to output file if specified
|
66 |
+
if output_file:
|
67 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
68 |
+
f.write(asl_gloss)
|
69 |
+
print(f"✓ ASL gloss saved to: {output_file}")
|
70 |
+
|
71 |
+
return asl_gloss
|
72 |
+
|
73 |
+
except Exception as e:
|
74 |
+
raise Exception(f"Error processing document: {str(e)}")
|
75 |
+
|
76 |
+
def batch_convert_documents(self, document_paths: list, output_dir: Optional[str] = None) -> Dict[str, str]:
|
77 |
+
"""
|
78 |
+
Convert multiple documents to ASL gloss.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
document_paths: List of document file paths
|
82 |
+
output_dir: Directory to save output files (optional)
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
Dictionary mapping input files to their ASL gloss
|
86 |
+
"""
|
87 |
+
results = {}
|
88 |
+
|
89 |
+
for document_path in document_paths:
|
90 |
+
try:
|
91 |
+
print(f"\n{'='*50}")
|
92 |
+
print(f"Converting: {document_path}")
|
93 |
+
print(f"{'='*50}")
|
94 |
+
|
95 |
+
if output_dir:
|
96 |
+
# Create output filename
|
97 |
+
input_path = Path(document_path)
|
98 |
+
output_filename = f"{input_path.stem}_asl_gloss.txt"
|
99 |
+
output_file = Path(output_dir) / output_filename
|
100 |
+
else:
|
101 |
+
output_file = None
|
102 |
+
|
103 |
+
asl_gloss = self.convert_document(document_path, str(output_file) if output_file else None)
|
104 |
+
results[document_path] = asl_gloss
|
105 |
+
|
106 |
+
print(f"✓ Completed: {document_path}")
|
107 |
+
|
108 |
+
except Exception as e:
|
109 |
+
print(f"✗ Error processing {document_path}: {str(e)}")
|
110 |
+
results[document_path] = f"ERROR: {str(e)}"
|
111 |
+
|
112 |
+
return results
|
113 |
+
|
114 |
+
def get_supported_formats(self) -> list:
|
115 |
+
"""
|
116 |
+
Get list of supported document formats.
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
List of supported file extensions
|
120 |
+
"""
|
121 |
+
return ['.pdf', '.txt', '.docx', '.doc', '.epub']
|
122 |
+
|
123 |
+
|
124 |
+
def main():
|
125 |
+
"""Main function for command-line usage."""
|
126 |
+
parser = argparse.ArgumentParser(
|
127 |
+
description="Convert documents to ASL gloss using Claude's API",
|
128 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
129 |
+
epilog="""
|
130 |
+
Examples:
|
131 |
+
# Convert a single document
|
132 |
+
python document_to_asl.py document.pdf
|
133 |
+
|
134 |
+
# Convert document with output file
|
135 |
+
python document_to_asl.py document.pdf -o output.txt
|
136 |
+
|
137 |
+
# Batch convert multiple documents
|
138 |
+
python document_to_asl.py -b doc1.pdf doc2.docx doc3.txt -d output_dir/
|
139 |
+
|
140 |
+
# Interactive mode
|
141 |
+
python document_to_asl.py -i
|
142 |
+
|
143 |
+
# Show supported formats
|
144 |
+
python document_to_asl.py --formats
|
145 |
+
"""
|
146 |
+
)
|
147 |
+
|
148 |
+
parser.add_argument(
|
149 |
+
'document',
|
150 |
+
nargs='?',
|
151 |
+
help='Document file to convert to ASL gloss'
|
152 |
+
)
|
153 |
+
|
154 |
+
parser.add_argument(
|
155 |
+
'-o', '--output',
|
156 |
+
help='Output file for ASL gloss'
|
157 |
+
)
|
158 |
+
|
159 |
+
parser.add_argument(
|
160 |
+
'-b', '--batch',
|
161 |
+
nargs='+',
|
162 |
+
help='Batch convert multiple documents'
|
163 |
+
)
|
164 |
+
|
165 |
+
parser.add_argument(
|
166 |
+
'-d', '--output-dir',
|
167 |
+
help='Output directory for batch conversion'
|
168 |
+
)
|
169 |
+
|
170 |
+
parser.add_argument(
|
171 |
+
'-i', '--interactive',
|
172 |
+
action='store_true',
|
173 |
+
help='Run in interactive mode'
|
174 |
+
)
|
175 |
+
|
176 |
+
parser.add_argument(
|
177 |
+
'--formats',
|
178 |
+
action='store_true',
|
179 |
+
help='Show supported document formats'
|
180 |
+
)
|
181 |
+
|
182 |
+
parser.add_argument(
|
183 |
+
'--api-key',
|
184 |
+
help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
|
185 |
+
)
|
186 |
+
|
187 |
+
args = parser.parse_args()
|
188 |
+
|
189 |
+
try:
|
190 |
+
# Initialize converter
|
191 |
+
converter = DocumentToASLConverter(api_key=args.api_key)
|
192 |
+
|
193 |
+
if args.formats:
|
194 |
+
print("Supported Document Formats:")
|
195 |
+
print("=" * 30)
|
196 |
+
formats = converter.get_supported_formats()
|
197 |
+
for fmt in formats:
|
198 |
+
print(f" • {fmt}")
|
199 |
+
print("\nExamples: .pdf, .txt, .docx, .doc, .epub")
|
200 |
+
return 0
|
201 |
+
|
202 |
+
if args.interactive:
|
203 |
+
print("Document to ASL Gloss Converter - Interactive Mode")
|
204 |
+
print("Enter document file paths to convert (or 'quit' to exit):")
|
205 |
+
print("-" * 60)
|
206 |
+
|
207 |
+
while True:
|
208 |
+
try:
|
209 |
+
doc_path = input("\nDocument path: ").strip()
|
210 |
+
if doc_path.lower() in ['quit', 'exit', 'q']:
|
211 |
+
break
|
212 |
+
|
213 |
+
if not doc_path:
|
214 |
+
continue
|
215 |
+
|
216 |
+
if not os.path.exists(doc_path):
|
217 |
+
print(f"Error: File not found: {doc_path}")
|
218 |
+
continue
|
219 |
+
|
220 |
+
# Ask for output file
|
221 |
+
output_file = input("Output file (optional, press Enter to skip): ").strip()
|
222 |
+
if not output_file:
|
223 |
+
output_file = None
|
224 |
+
|
225 |
+
print("Converting...")
|
226 |
+
asl_gloss = converter.convert_document(doc_path, output_file)
|
227 |
+
|
228 |
+
if not output_file:
|
229 |
+
print("\nASL Gloss:")
|
230 |
+
print("-" * 20)
|
231 |
+
print(asl_gloss)
|
232 |
+
|
233 |
+
except KeyboardInterrupt:
|
234 |
+
print("\nExiting...")
|
235 |
+
break
|
236 |
+
except Exception as e:
|
237 |
+
print(f"Error: {str(e)}")
|
238 |
+
|
239 |
+
elif args.batch:
|
240 |
+
if not args.batch:
|
241 |
+
print("Error: No documents specified for batch conversion")
|
242 |
+
return 1
|
243 |
+
|
244 |
+
print(f"Batch converting {len(args.batch)} documents...")
|
245 |
+
results = converter.batch_convert_documents(args.batch, args.output_dir)
|
246 |
+
|
247 |
+
print("\n" + "="*60)
|
248 |
+
print("BATCH CONVERSION RESULTS")
|
249 |
+
print("="*60)
|
250 |
+
for doc_path, result in results.items():
|
251 |
+
print(f"\nDocument: {doc_path}")
|
252 |
+
print("-" * 40)
|
253 |
+
if result.startswith("ERROR:"):
|
254 |
+
print(f"❌ {result}")
|
255 |
+
else:
|
256 |
+
print("✅ Conversion successful")
|
257 |
+
if not args.output_dir:
|
258 |
+
print("ASL Gloss:")
|
259 |
+
print(result[:500] + "..." if len(result) > 500 else result)
|
260 |
+
|
261 |
+
elif args.document:
|
262 |
+
asl_gloss = converter.convert_document(args.document, args.output)
|
263 |
+
if not args.output:
|
264 |
+
print("\nASL Gloss:")
|
265 |
+
print("-" * 20)
|
266 |
+
print(asl_gloss)
|
267 |
+
|
268 |
+
else:
|
269 |
+
parser.print_help()
|
270 |
+
return 1
|
271 |
+
|
272 |
+
return 0
|
273 |
+
|
274 |
+
except Exception as e:
|
275 |
+
print(f"Error: {str(e)}")
|
276 |
+
return 1
|
277 |
+
|
278 |
+
|
279 |
+
if __name__ == "__main__":
|
280 |
+
sys.exit(main())
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
anthropic==0.54.0
|
2 |
+
beautifulsoup4==4.13.4
|
3 |
+
boto3==1.38.41
|
4 |
+
botocore==1.38.41
|
5 |
+
ebooklib==0.19
|
6 |
+
gensim==4.3.3
|
7 |
+
gradio==5.34.2
|
8 |
+
mediapipe==0.10.21
|
9 |
+
numpy<2.0
|
10 |
+
opencv-python
|
11 |
+
pandas<2.0
|
12 |
+
PyPDF2==3.0.1
|
13 |
+
python-dotenv==1.1.0
|
14 |
+
python-docx
|
15 |
+
requests==2.32.4
|
16 |
+
supabase==2.15.3
|
test.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify the fixes for the ASL gloss processing
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import re
|
8 |
+
from vectorizer import Vectorizer
|
9 |
+
|
10 |
+
|
11 |
+
def clean_gloss_token(token):
|
12 |
+
"""
|
13 |
+
Clean a gloss token by removing brackets, newlines, and extra whitespace
|
14 |
+
"""
|
15 |
+
# Remove brackets and newlines
|
16 |
+
cleaned = re.sub(r'[\[\]\n\r]', '', token)
|
17 |
+
# Remove extra whitespace
|
18 |
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
19 |
+
return cleaned.lower()
|
20 |
+
|
21 |
+
|
22 |
+
def test_gloss_parsing():
|
23 |
+
"""Test the gloss parsing functionality"""
|
24 |
+
# Sample gloss output from the notebook
|
25 |
+
sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] "
|
26 |
+
"[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n"
|
27 |
+
"[PROBLEM] [SOLVE] [FINISH]")
|
28 |
+
|
29 |
+
print("Original gloss:")
|
30 |
+
print(sample_gloss)
|
31 |
+
print("\n" + "="*50 + "\n")
|
32 |
+
|
33 |
+
# Split by spaces and clean each token
|
34 |
+
gloss_tokens = sample_gloss.split()
|
35 |
+
cleaned_tokens = []
|
36 |
+
|
37 |
+
for token in gloss_tokens:
|
38 |
+
cleaned = clean_gloss_token(token)
|
39 |
+
if cleaned: # Only add non-empty tokens
|
40 |
+
cleaned_tokens.append(cleaned)
|
41 |
+
|
42 |
+
print("Cleaned tokens:")
|
43 |
+
for i, token in enumerate(cleaned_tokens):
|
44 |
+
print(f"{i+1:2d}. {token}")
|
45 |
+
|
46 |
+
return cleaned_tokens
|
47 |
+
|
48 |
+
|
49 |
+
async def test_vectorizer():
|
50 |
+
"""Test the vectorizer functionality"""
|
51 |
+
try:
|
52 |
+
vectorizer = Vectorizer()
|
53 |
+
|
54 |
+
# Test with a simple word that should be in the vocabulary
|
55 |
+
test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"]
|
56 |
+
|
57 |
+
for word in test_words:
|
58 |
+
print(f"\nTesting word: {word}")
|
59 |
+
result = await vectorizer.vector_query_from_supabase(word)
|
60 |
+
print(f"Result: {result}")
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error testing vectorizer: {e}")
|
64 |
+
|
65 |
+
|
66 |
+
async def main():
|
67 |
+
"""Main test function"""
|
68 |
+
print("Testing ASL Gloss Processing Fixes")
|
69 |
+
print("=" * 50)
|
70 |
+
|
71 |
+
# Test 1: Gloss parsing
|
72 |
+
print("\n1. Testing gloss parsing...")
|
73 |
+
cleaned_tokens = test_gloss_parsing()
|
74 |
+
print(f"Total cleaned tokens: {len(cleaned_tokens)}")
|
75 |
+
|
76 |
+
# Test 2: Vectorizer (if environment is set up)
|
77 |
+
print("\n2. Testing vectorizer...")
|
78 |
+
await test_vectorizer()
|
79 |
+
|
80 |
+
print("\n" + "=" * 50)
|
81 |
+
print("Test completed!")
|
82 |
+
|
83 |
+
|
84 |
+
if __name__ == "__main__":
|
85 |
+
asyncio.run(main())
|
vectorizer.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gensim
|
2 |
+
import gensim.downloader
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import os
|
6 |
+
from supabase import acreate_client, AsyncClient
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
class Vectorizer:
|
10 |
+
"""
|
11 |
+
A class to:
|
12 |
+
- Generate embeddings of words
|
13 |
+
- Query for words from Supabase database based on vector similarity
|
14 |
+
- Return matching ASL videos for words
|
15 |
+
"""
|
16 |
+
|
17 |
+
def load_kv(self, model_name='word2vec-google-news-300'):
|
18 |
+
"""
|
19 |
+
Returns a KeyedVector object loaded from gensim
|
20 |
+
"""
|
21 |
+
try:
|
22 |
+
kv = gensim.downloader.load(model_name) # returns a keyedvector
|
23 |
+
return kv
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Unable to load embedding model from gensim: {e}")
|
26 |
+
return None
|
27 |
+
|
28 |
+
async def initialize_supabase(self):
|
29 |
+
url: str = os.environ.get("SUPABASE_URL")
|
30 |
+
key: str = os.environ.get("SUPABASE_KEY")
|
31 |
+
supabase: AsyncClient = await acreate_client(url, key)
|
32 |
+
return supabase
|
33 |
+
|
34 |
+
def __init__(self):
|
35 |
+
load_dotenv()
|
36 |
+
self.kv = self.load_kv()
|
37 |
+
self.supabase = None # Will be initialized when needed
|
38 |
+
|
39 |
+
async def ensure_supabase_initialized(self):
|
40 |
+
"""Ensure Supabase client is initialized"""
|
41 |
+
if self.supabase is None:
|
42 |
+
self.supabase = await self.initialize_supabase()
|
43 |
+
|
44 |
+
def encode(self, word):
|
45 |
+
print(f"encoding {word}")
|
46 |
+
if self.kv is not None and word in self.kv.key_to_index:
|
47 |
+
return self.kv[word]
|
48 |
+
else:
|
49 |
+
print(f"Error: {word} is not in the KeyedVector's vocabulary")
|
50 |
+
return None
|
51 |
+
|
52 |
+
def encode_and_format(self, word):
|
53 |
+
"""
|
54 |
+
Apply encoding function to each word.
|
55 |
+
Prettify the encoding to match expected format for Supabase vectors
|
56 |
+
"""
|
57 |
+
enc = self.encode(word)
|
58 |
+
return "[" + ",".join(map(str, enc.tolist())) + "]" if enc is not None else None
|
59 |
+
|
60 |
+
async def vector_query_from_supabase(self, query):
|
61 |
+
try:
|
62 |
+
await self.ensure_supabase_initialized()
|
63 |
+
query_embedding = self.encode(query)
|
64 |
+
if query_embedding is None:
|
65 |
+
return {
|
66 |
+
"match": False,
|
67 |
+
"error": f"'{query}' not in vocabulary"
|
68 |
+
}
|
69 |
+
|
70 |
+
query_embedding = query_embedding.tolist()
|
71 |
+
|
72 |
+
if self.supabase is not None:
|
73 |
+
result = await self.supabase.rpc(
|
74 |
+
"match_vector",
|
75 |
+
{
|
76 |
+
"query_embedding": query_embedding,
|
77 |
+
"match_threshold": 0.0,
|
78 |
+
"match_count": 1
|
79 |
+
}
|
80 |
+
).execute()
|
81 |
+
|
82 |
+
data = result.data
|
83 |
+
if data:
|
84 |
+
match = data[0]
|
85 |
+
return {
|
86 |
+
"match": True,
|
87 |
+
"query": query,
|
88 |
+
"matching_word": match["word"],
|
89 |
+
"video_url": match["video_url"],
|
90 |
+
"similarity": match["similarity"]
|
91 |
+
}
|
92 |
+
else:
|
93 |
+
return {"match": False}
|
94 |
+
else:
|
95 |
+
return {"match": False, "error": "Supabase not initialized"}
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
print(f"RPC call failed: {e}")
|
99 |
+
return {"match": False, "error": str(e)}
|
100 |
+
|
101 |
+
|
102 |
+
def load_filtered_kv(model_name='word2vec-google-news-300', vocab=None):
|
103 |
+
"""
|
104 |
+
Returns a KeyedVector object whose vocabulary
|
105 |
+
consists of the words in vocab
|
106 |
+
"""
|
107 |
+
if vocab is None:
|
108 |
+
vocab = []
|
109 |
+
try:
|
110 |
+
# gensim.downloader.load returns a KeyedVector
|
111 |
+
original_kv = gensim.downloader.load(model_name)
|
112 |
+
if vocab:
|
113 |
+
filtered_key2vec_map = {}
|
114 |
+
for key in vocab:
|
115 |
+
if key in original_kv.key_to_index:
|
116 |
+
filtered_key2vec_map[key] = original_kv[key]
|
117 |
+
|
118 |
+
new_kv = gensim.models.KeyedVectors(
|
119 |
+
vector_size=original_kv.vector_size)
|
120 |
+
new_kv.add_vectors(list(filtered_key2vec_map.keys()),
|
121 |
+
np.array(list(filtered_key2vec_map.values())))
|
122 |
+
return original_kv
|
123 |
+
else:
|
124 |
+
return original_kv
|
125 |
+
except Exception as e:
|
126 |
+
print(f"Unable to load embedding model from gensim: {e}")
|
127 |
+
return None
|
128 |
+
|
129 |
+
|
130 |
+
async def main():
|
131 |
+
vectorizer = Vectorizer()
|
132 |
+
|
133 |
+
vector = vectorizer.encode("test")
|
134 |
+
print(vector)
|
135 |
+
result = await vectorizer.vector_query_from_supabase("dog")
|
136 |
+
print(result)
|
137 |
+
result = await vectorizer.vector_query_from_supabase("cat")
|
138 |
+
print(result)
|
139 |
+
|
140 |
+
# read word list
|
141 |
+
# df = pd.read_csv('videos_rows.csv')
|
142 |
+
|
143 |
+
# # Add embeddings column - apply encode to each word
|
144 |
+
# df['embedding'] = df['word'].apply(vectorizer.encode_and_format)
|
145 |
+
|
146 |
+
# # Drop any rows that don't have an embedding
|
147 |
+
# df = df.dropna(subset=['embedding'])
|
148 |
+
# print(df.head())
|
149 |
+
|
150 |
+
# df.to_csv("vectors.csv", index=False, columns=["word", "video_url", "embedding"], header=True)
|
151 |
+
|
152 |
+
|
153 |
+
if __name__ == "__main__":
|
154 |
+
import asyncio
|
155 |
+
asyncio.run(main())
|
video_gen.py
ADDED
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import mediapipe as mp
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
def extract_keypoints_from_video(video_path, verbose=False):
|
6 |
+
mp_pose = mp.solutions.pose
|
7 |
+
mp_hands = mp.solutions.hands
|
8 |
+
|
9 |
+
pose_model = mp_pose.Pose()
|
10 |
+
hands_model = mp_hands.Hands(static_image_mode=False, max_num_hands=2)
|
11 |
+
|
12 |
+
cap = cv2.VideoCapture(video_path)
|
13 |
+
keypoints_sequence = []
|
14 |
+
|
15 |
+
frame_idx = 0
|
16 |
+
|
17 |
+
while cap.isOpened():
|
18 |
+
success, frame = cap.read()
|
19 |
+
if not success:
|
20 |
+
break
|
21 |
+
|
22 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
23 |
+
h, w, _ = frame.shape
|
24 |
+
|
25 |
+
# Pose estimation
|
26 |
+
pose_results = pose_model.process(frame_rgb)
|
27 |
+
if not pose_results.pose_landmarks:
|
28 |
+
frame_idx += 1
|
29 |
+
continue
|
30 |
+
|
31 |
+
# Extract 33 body keypoints
|
32 |
+
pose_landmarks = pose_results.pose_landmarks.landmark
|
33 |
+
pose = np.array([[lm.x, lm.y] for lm in pose_landmarks]) # shape (33, 2)
|
34 |
+
|
35 |
+
# Hand tracking
|
36 |
+
left_hand = np.zeros((21, 2))
|
37 |
+
right_hand = np.zeros((21, 2))
|
38 |
+
|
39 |
+
hand_results = hands_model.process(frame_rgb)
|
40 |
+
if hand_results.multi_hand_landmarks and hand_results.multi_handedness:
|
41 |
+
for hand_landmarks, hand_info in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
|
42 |
+
label = hand_info.classification[0].label # 'Left' or 'Right'
|
43 |
+
hand_array = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
|
44 |
+
if label == "Left":
|
45 |
+
left_hand = hand_array
|
46 |
+
else:
|
47 |
+
right_hand = hand_array
|
48 |
+
|
49 |
+
keypoints_sequence.append((pose, left_hand, right_hand))
|
50 |
+
|
51 |
+
if verbose:
|
52 |
+
print(f"Processed frame {frame_idx}")
|
53 |
+
frame_idx += 1
|
54 |
+
|
55 |
+
cap.release()
|
56 |
+
pose_model.close()
|
57 |
+
hands_model.close()
|
58 |
+
|
59 |
+
return keypoints_sequence
|
60 |
+
|
61 |
+
def render_person(frame, pose, left_hand, right_hand):
|
62 |
+
h, w = frame.shape[:2]
|
63 |
+
|
64 |
+
# Define MediaPipe Pose keypoint indices
|
65 |
+
# Face
|
66 |
+
NOSE = 0
|
67 |
+
LEFT_EYE = 2
|
68 |
+
RIGHT_EYE = 5
|
69 |
+
LEFT_EAR = 7
|
70 |
+
RIGHT_EAR = 8
|
71 |
+
|
72 |
+
# Body
|
73 |
+
LEFT_SHOULDER = 11
|
74 |
+
RIGHT_SHOULDER = 12
|
75 |
+
LEFT_ELBOW = 13
|
76 |
+
RIGHT_ELBOW = 14
|
77 |
+
LEFT_WRIST = 15
|
78 |
+
RIGHT_WRIST = 16
|
79 |
+
LEFT_HIP = 23
|
80 |
+
RIGHT_HIP = 24
|
81 |
+
LEFT_KNEE = 25
|
82 |
+
RIGHT_KNEE = 26
|
83 |
+
LEFT_ANKLE = 27
|
84 |
+
RIGHT_ANKLE = 28
|
85 |
+
|
86 |
+
# Define hand keypoint indices for MediaPipe Hands
|
87 |
+
# Thumb: 0-4, Index: 5-8, Middle: 9-12, Ring: 13-16, Pinky: 17-20
|
88 |
+
THUMB_TIP = 4
|
89 |
+
INDEX_TIP = 8
|
90 |
+
MIDDLE_TIP = 12
|
91 |
+
RING_TIP = 16
|
92 |
+
PINKY_TIP = 20
|
93 |
+
|
94 |
+
# Define finger connections
|
95 |
+
finger_connections = [
|
96 |
+
# Thumb
|
97 |
+
(0, 1), (1, 2), (2, 3), (3, 4),
|
98 |
+
# Index finger
|
99 |
+
(0, 5), (5, 6), (6, 7), (7, 8),
|
100 |
+
# Middle finger
|
101 |
+
(0, 9), (9, 10), (10, 11), (11, 12),
|
102 |
+
# Ring finger
|
103 |
+
(0, 13), (13, 14), (14, 15), (15, 16),
|
104 |
+
# Pinky
|
105 |
+
(0, 17), (17, 18), (18, 19), (19, 20)
|
106 |
+
]
|
107 |
+
|
108 |
+
# Enhanced friendly color palette
|
109 |
+
skin_color = (173, 216, 230) # Light brown bear skin
|
110 |
+
outline_color = (40, 40, 40) # Softer outline
|
111 |
+
shirt_color = (205, 170, 125) # Light blue tuxedo jacket
|
112 |
+
|
113 |
+
# shirt_color = (205, 170, 125)
|
114 |
+
# skin_color = (173, 216, 230)
|
115 |
+
|
116 |
+
|
117 |
+
pants_color = (135, 206, 235) # Slightly darker light blue tuxedo pants
|
118 |
+
bow_tie_color = (255, 255, 255) # White bow tie
|
119 |
+
eye_color = (255, 255, 255) # White eyes
|
120 |
+
pupil_color = (0, 0, 0) # Black pupils
|
121 |
+
|
122 |
+
# Draw body parts as filled shapes
|
123 |
+
|
124 |
+
# 1. Head (face) with enhanced friendly styling
|
125 |
+
if len(pose) > max(LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR):
|
126 |
+
# Calculate head center and size
|
127 |
+
head_center_x = pose[NOSE][0] * w
|
128 |
+
head_center_y = pose[NOSE][1] * h
|
129 |
+
|
130 |
+
# Estimate head size based on face keypoints
|
131 |
+
if pose[LEFT_EYE][0] > 0 and pose[RIGHT_EYE][0] > 0:
|
132 |
+
eye_distance = abs(pose[LEFT_EYE][0] - pose[RIGHT_EYE][0]) * w
|
133 |
+
head_radius = eye_distance * 1.8 # Larger head for friendlier look
|
134 |
+
else:
|
135 |
+
head_radius = 35
|
136 |
+
|
137 |
+
# Draw bear ears first (behind the head)
|
138 |
+
ear_radius = int(head_radius * 0.4)
|
139 |
+
# Left ear
|
140 |
+
left_ear_x = int(head_center_x - head_radius * 0.6)
|
141 |
+
left_ear_y = int(head_center_y - head_radius * 0.8)
|
142 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, skin_color, -1)
|
143 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, outline_color, 2)
|
144 |
+
# Inner ear detail
|
145 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
|
146 |
+
|
147 |
+
# Right ear
|
148 |
+
right_ear_x = int(head_center_x + head_radius * 0.6)
|
149 |
+
right_ear_y = int(head_center_y - head_radius * 0.8)
|
150 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, skin_color, -1)
|
151 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, outline_color, 2)
|
152 |
+
# Inner ear detail
|
153 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
|
154 |
+
|
155 |
+
# Draw head with skin color
|
156 |
+
cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), skin_color, -1)
|
157 |
+
cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), outline_color, 2)
|
158 |
+
|
159 |
+
# Draw larger, cuter bear eyes
|
160 |
+
if pose[LEFT_EYE][0] > 0 and pose[LEFT_EYE][1] > 0:
|
161 |
+
eye_x, eye_y = int(pose[LEFT_EYE][0] * w), int(pose[LEFT_EYE][1] * h)
|
162 |
+
# Larger white eye
|
163 |
+
cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
|
164 |
+
# Larger pupil
|
165 |
+
cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
|
166 |
+
# Eye outline
|
167 |
+
cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
|
168 |
+
# Eye shine
|
169 |
+
cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
|
170 |
+
|
171 |
+
if pose[RIGHT_EYE][0] > 0 and pose[RIGHT_EYE][1] > 0:
|
172 |
+
eye_x, eye_y = int(pose[RIGHT_EYE][0] * w), int(pose[RIGHT_EYE][1] * h)
|
173 |
+
# Larger white eye
|
174 |
+
cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
|
175 |
+
# Larger pupil
|
176 |
+
cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
|
177 |
+
# Eye outline
|
178 |
+
cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
|
179 |
+
# Eye shine
|
180 |
+
cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
|
181 |
+
|
182 |
+
# Draw cute bear nose
|
183 |
+
nose_x = int(head_center_x)
|
184 |
+
nose_y = int(head_center_y + head_radius * 0.1)
|
185 |
+
# Draw a cute round nose
|
186 |
+
cv2.circle(frame, (nose_x, nose_y), 6, (80, 40, 20), -1) # Dark brown nose
|
187 |
+
cv2.circle(frame, (nose_x, nose_y), 6, outline_color, 1)
|
188 |
+
|
189 |
+
# Draw friendly smile
|
190 |
+
smile_center_x = int(head_center_x)
|
191 |
+
smile_center_y = int(head_center_y + head_radius * 0.3)
|
192 |
+
smile_radius = int(head_radius * 0.6)
|
193 |
+
# Draw smile arc
|
194 |
+
cv2.ellipse(frame, (smile_center_x, smile_center_y), (smile_radius, smile_radius//2),
|
195 |
+
0, 0, 180, outline_color, 3)
|
196 |
+
|
197 |
+
# 2. Torso with nice shirt
|
198 |
+
if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_HIP, RIGHT_HIP):
|
199 |
+
# Calculate torso points
|
200 |
+
left_shoulder = (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h))
|
201 |
+
right_shoulder = (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h))
|
202 |
+
left_hip = (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h))
|
203 |
+
right_hip = (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h))
|
204 |
+
|
205 |
+
# Draw torso as a filled polygon with nice shirt color
|
206 |
+
torso_points = np.array([left_shoulder, right_shoulder, right_hip, left_hip], np.int32)
|
207 |
+
cv2.fillPoly(frame, [torso_points], shirt_color)
|
208 |
+
cv2.polylines(frame, [torso_points], True, outline_color, 2)
|
209 |
+
|
210 |
+
# 3. Arms with better proportions (non-stick)
|
211 |
+
# Left arm
|
212 |
+
if len(pose) > max(LEFT_SHOULDER, LEFT_ELBOW, LEFT_WRIST):
|
213 |
+
if pose[LEFT_SHOULDER][0] > 0 and pose[LEFT_ELBOW][0] > 0:
|
214 |
+
# Upper arm - 3x thicker and more natural
|
215 |
+
cv2.line(frame,
|
216 |
+
(int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
|
217 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
218 |
+
skin_color, 36)
|
219 |
+
cv2.line(frame,
|
220 |
+
(int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
|
221 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
222 |
+
outline_color, 2)
|
223 |
+
|
224 |
+
# Lower arm
|
225 |
+
if pose[LEFT_WRIST][0] > 0:
|
226 |
+
cv2.line(frame,
|
227 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
228 |
+
(int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
|
229 |
+
skin_color, 30)
|
230 |
+
cv2.line(frame,
|
231 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
232 |
+
(int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
|
233 |
+
outline_color, 2)
|
234 |
+
|
235 |
+
# Right arm
|
236 |
+
if len(pose) > max(RIGHT_SHOULDER, RIGHT_ELBOW, RIGHT_WRIST):
|
237 |
+
if pose[RIGHT_SHOULDER][0] > 0 and pose[RIGHT_ELBOW][0] > 0:
|
238 |
+
# Upper arm - 3x thicker and more natural
|
239 |
+
cv2.line(frame,
|
240 |
+
(int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
|
241 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
242 |
+
skin_color, 36)
|
243 |
+
cv2.line(frame,
|
244 |
+
(int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
|
245 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
246 |
+
outline_color, 2)
|
247 |
+
|
248 |
+
# Lower arm
|
249 |
+
if pose[RIGHT_WRIST][0] > 0:
|
250 |
+
cv2.line(frame,
|
251 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
252 |
+
(int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
|
253 |
+
skin_color, 30)
|
254 |
+
cv2.line(frame,
|
255 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
256 |
+
(int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
|
257 |
+
outline_color, 2)
|
258 |
+
|
259 |
+
# 4. Legs with nice pants
|
260 |
+
# Left leg
|
261 |
+
if len(pose) > max(LEFT_HIP, LEFT_KNEE, LEFT_ANKLE):
|
262 |
+
if pose[LEFT_HIP][0] > 0 and pose[LEFT_KNEE][0] > 0:
|
263 |
+
# Upper leg
|
264 |
+
cv2.line(frame,
|
265 |
+
(int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
|
266 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
267 |
+
pants_color, 14)
|
268 |
+
cv2.line(frame,
|
269 |
+
(int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
|
270 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
271 |
+
outline_color, 2)
|
272 |
+
|
273 |
+
# Lower leg
|
274 |
+
if pose[LEFT_ANKLE][0] > 0:
|
275 |
+
cv2.line(frame,
|
276 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
277 |
+
(int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
|
278 |
+
pants_color, 12)
|
279 |
+
cv2.line(frame,
|
280 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
281 |
+
(int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
|
282 |
+
outline_color, 2)
|
283 |
+
|
284 |
+
# Right leg
|
285 |
+
if len(pose) > max(RIGHT_HIP, RIGHT_KNEE, RIGHT_ANKLE):
|
286 |
+
if pose[RIGHT_HIP][0] > 0 and pose[RIGHT_KNEE][0] > 0:
|
287 |
+
# Upper leg
|
288 |
+
cv2.line(frame,
|
289 |
+
(int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
|
290 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
291 |
+
pants_color, 14)
|
292 |
+
cv2.line(frame,
|
293 |
+
(int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
|
294 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
295 |
+
outline_color, 2)
|
296 |
+
|
297 |
+
# Lower leg
|
298 |
+
if pose[RIGHT_ANKLE][0] > 0:
|
299 |
+
cv2.line(frame,
|
300 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
301 |
+
(int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
|
302 |
+
pants_color, 12)
|
303 |
+
cv2.line(frame,
|
304 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
305 |
+
(int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
|
306 |
+
outline_color, 2)
|
307 |
+
|
308 |
+
# 1.5. Neck connecting head to shoulders
|
309 |
+
if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER):
|
310 |
+
if pose[LEFT_SHOULDER][0] > 0 and pose[RIGHT_SHOULDER][0] > 0:
|
311 |
+
# Calculate neck position and size
|
312 |
+
neck_center_x = (pose[LEFT_SHOULDER][0] + pose[RIGHT_SHOULDER][0]) / 2 * w
|
313 |
+
neck_center_y = (pose[LEFT_SHOULDER][1] + pose[RIGHT_SHOULDER][1]) / 2 * h
|
314 |
+
|
315 |
+
# Position neck slightly above shoulders
|
316 |
+
neck_y = neck_center_y - 15
|
317 |
+
|
318 |
+
# Calculate neck width based on shoulder distance
|
319 |
+
shoulder_distance = abs(pose[LEFT_SHOULDER][0] - pose[RIGHT_SHOULDER][0]) * w
|
320 |
+
neck_width = shoulder_distance * 0.3 # Neck is about 30% of shoulder width
|
321 |
+
neck_height = 25
|
322 |
+
|
323 |
+
# Draw neck as a filled rectangle with rounded corners effect
|
324 |
+
neck_left = int(neck_center_x - neck_width / 2)
|
325 |
+
neck_right = int(neck_center_x + neck_width / 2)
|
326 |
+
neck_top = int(neck_y - neck_height / 2)
|
327 |
+
neck_bottom = int(neck_y + neck_height / 2)
|
328 |
+
|
329 |
+
# Draw neck with skin color
|
330 |
+
cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), skin_color, -1)
|
331 |
+
cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), outline_color, 2)
|
332 |
+
|
333 |
+
# Draw bow tie
|
334 |
+
bow_center_x = int(neck_center_x)
|
335 |
+
bow_center_y = int(neck_y + neck_height / 2 + 5)
|
336 |
+
bow_width = 20
|
337 |
+
bow_height = 12
|
338 |
+
|
339 |
+
# Draw left side of bow tie
|
340 |
+
left_bow_points = np.array([
|
341 |
+
[bow_center_x - bow_width//2, bow_center_y - bow_height//2],
|
342 |
+
[bow_center_x - bow_width//2 - 8, bow_center_y],
|
343 |
+
[bow_center_x - bow_width//2, bow_center_y + bow_height//2],
|
344 |
+
[bow_center_x - 2, bow_center_y + bow_height//2],
|
345 |
+
[bow_center_x - 2, bow_center_y - bow_height//2]
|
346 |
+
], np.int32)
|
347 |
+
cv2.fillPoly(frame, [left_bow_points], bow_tie_color)
|
348 |
+
cv2.polylines(frame, [left_bow_points], True, outline_color, 1)
|
349 |
+
|
350 |
+
# Draw right side of bow tie
|
351 |
+
right_bow_points = np.array([
|
352 |
+
[bow_center_x + bow_width//2, bow_center_y - bow_height//2],
|
353 |
+
[bow_center_x + bow_width//2 + 8, bow_center_y],
|
354 |
+
[bow_center_x + bow_width//2, bow_center_y + bow_height//2],
|
355 |
+
[bow_center_x + 2, bow_center_y + bow_height//2],
|
356 |
+
[bow_center_x + 2, bow_center_y - bow_height//2]
|
357 |
+
], np.int32)
|
358 |
+
cv2.fillPoly(frame, [right_bow_points], bow_tie_color)
|
359 |
+
cv2.polylines(frame, [right_bow_points], True, outline_color, 1)
|
360 |
+
|
361 |
+
# Draw center knot of bow tie
|
362 |
+
knot_points = np.array([
|
363 |
+
[bow_center_x - 2, bow_center_y - 3],
|
364 |
+
[bow_center_x + 2, bow_center_y - 3],
|
365 |
+
[bow_center_x + 2, bow_center_y + 3],
|
366 |
+
[bow_center_x - 2, bow_center_y + 3]
|
367 |
+
], np.int32)
|
368 |
+
cv2.fillPoly(frame, [knot_points], bow_tie_color)
|
369 |
+
cv2.polylines(frame, [knot_points], True, outline_color, 1)
|
370 |
+
|
371 |
+
# 5. Enhanced Hands with clear finger definition (drawn last to ensure they're always in front)
|
372 |
+
for hand, hand_color in [(left_hand, (255, 0, 0)), (right_hand, (0, 0, 255))]:
|
373 |
+
if np.any(hand != 0): # Only draw if hand is detected
|
374 |
+
# Draw hand palm as a filled shape
|
375 |
+
palm_points = []
|
376 |
+
# Use wrist and base of fingers for palm
|
377 |
+
palm_indices = [0, 5, 9, 13, 17] # Wrist and base of each finger
|
378 |
+
for idx in palm_indices:
|
379 |
+
if idx < len(hand) and hand[idx][0] > 0 and hand[idx][1] > 0:
|
380 |
+
palm_points.append([int(hand[idx][0] * w), int(hand[idx][1] * h)])
|
381 |
+
|
382 |
+
if len(palm_points) > 3:
|
383 |
+
palm_points = np.array(palm_points, np.int32)
|
384 |
+
hull = cv2.convexHull(palm_points)
|
385 |
+
cv2.fillPoly(frame, [hull], (255, 182, 193)) # Light pink color for palm
|
386 |
+
cv2.polylines(frame, [hull], True, outline_color, 2)
|
387 |
+
|
388 |
+
# Draw individual fingers with clear connections
|
389 |
+
for connection in finger_connections:
|
390 |
+
start_idx, end_idx = connection
|
391 |
+
if (start_idx < len(hand) and end_idx < len(hand) and
|
392 |
+
hand[start_idx][0] > 0 and hand[start_idx][1] > 0 and
|
393 |
+
hand[end_idx][0] > 0 and hand[end_idx][1] > 0):
|
394 |
+
|
395 |
+
start_point = (int(hand[start_idx][0] * w), int(hand[start_idx][1] * h))
|
396 |
+
end_point = (int(hand[end_idx][0] * w), int(hand[end_idx][1] * h))
|
397 |
+
|
398 |
+
# Draw finger bone
|
399 |
+
cv2.line(frame, start_point, end_point, (255, 182, 193), 9) # Light pink color for finger bones
|
400 |
+
cv2.line(frame, start_point, end_point, outline_color, 1)
|
401 |
+
|
402 |
+
# Draw finger tips with emphasis
|
403 |
+
finger_tips = [THUMB_TIP, INDEX_TIP, MIDDLE_TIP, RING_TIP, PINKY_TIP]
|
404 |
+
for tip_idx in finger_tips:
|
405 |
+
if tip_idx < len(hand) and hand[tip_idx][0] > 0 and hand[tip_idx][1] > 0:
|
406 |
+
tip_x, tip_y = int(hand[tip_idx][0] * w), int(hand[tip_idx][1] * h)
|
407 |
+
# Draw larger, more visible finger tips
|
408 |
+
cv2.circle(frame, (tip_x, tip_y), 4, (255, 182, 193), -1) # Light pink color for finger tips
|
409 |
+
cv2.circle(frame, (tip_x, tip_y), 4, outline_color, 2)
|
410 |
+
# Add a small highlight
|
411 |
+
cv2.circle(frame, (tip_x-1, tip_y-1), 1, (255, 255, 255), -1)
|
412 |
+
|
413 |
+
# Draw all hand keypoints for clarity (keeping original red/blue colors for dots)
|
414 |
+
for i, (x, y) in enumerate(hand):
|
415 |
+
if x > 0 and y > 0:
|
416 |
+
point_x, point_y = int(x * w), int(y * h)
|
417 |
+
# Different colors for different parts of the hand
|
418 |
+
if i in finger_tips:
|
419 |
+
cv2.circle(frame, (point_x, point_y), 2, hand_color, -1)
|
420 |
+
else:
|
421 |
+
cv2.circle(frame, (point_x, point_y), 1, hand_color, -1)
|
422 |
+
|
423 |
+
return frame
|
424 |
+
|
425 |
+
def interpolate_keypoints(kptsA, kptsB, steps):
|
426 |
+
poseA, leftA, rightA = kptsA
|
427 |
+
poseB, leftB, rightB = kptsB
|
428 |
+
|
429 |
+
frames = []
|
430 |
+
for t in range(1, steps + 1):
|
431 |
+
alpha = t / (steps + 1)
|
432 |
+
interp_pose = (1 - alpha) * poseA + alpha * poseB
|
433 |
+
|
434 |
+
# Check if hands are detected (non-zero coordinates)
|
435 |
+
leftA_detected = np.any(leftA != 0)
|
436 |
+
rightA_detected = np.any(rightA != 0)
|
437 |
+
leftB_detected = np.any(leftB != 0)
|
438 |
+
rightB_detected = np.any(rightB != 0)
|
439 |
+
|
440 |
+
# Interpolate left hand only if both frames have detected hands
|
441 |
+
if leftA_detected and leftB_detected:
|
442 |
+
print("leftA_detected and leftB_detected")
|
443 |
+
interp_left = (1 - alpha) * leftA + alpha * leftB
|
444 |
+
elif leftA_detected:
|
445 |
+
interp_left = leftA # Keep the last known position
|
446 |
+
elif leftB_detected:
|
447 |
+
interp_left = leftB # Use the new position
|
448 |
+
else:
|
449 |
+
interp_left = np.zeros((21, 2)) # No hands detected
|
450 |
+
|
451 |
+
# Interpolate right hand only if both frames have detected hands
|
452 |
+
if rightA_detected and rightB_detected:
|
453 |
+
print("rightA_detected and rightB_detected")
|
454 |
+
interp_right = (1 - alpha) * rightA + alpha * rightB
|
455 |
+
elif rightA_detected:
|
456 |
+
interp_right = rightA # Keep the last known position
|
457 |
+
elif rightB_detected:
|
458 |
+
interp_right = rightB # Use the new position
|
459 |
+
else:
|
460 |
+
interp_right = np.zeros((21, 2)) # No hands detected
|
461 |
+
|
462 |
+
frames.append((interp_pose, interp_left, interp_right))
|
463 |
+
return frames
|
464 |
+
|
465 |
+
def create_stitched_video(videoA_path, videoB_path, output_path="stitched_output.mp4"):
|
466 |
+
# Extract keypoints from both videos
|
467 |
+
videoA_keypoints = extract_keypoints_from_video(videoA_path)
|
468 |
+
videoB_keypoints = extract_keypoints_from_video(videoB_path)
|
469 |
+
|
470 |
+
# Create video writer
|
471 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
472 |
+
out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
|
473 |
+
|
474 |
+
# Show original A
|
475 |
+
for pose, l, r in videoA_keypoints:
|
476 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
477 |
+
out.write(render_person(frame, pose, l, r))
|
478 |
+
|
479 |
+
# Interpolation
|
480 |
+
interp = interpolate_keypoints(videoA_keypoints[-1], videoB_keypoints[0], steps=15)
|
481 |
+
for pose, l, r in interp:
|
482 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
483 |
+
out.write(render_person(frame, pose, l, r))
|
484 |
+
|
485 |
+
# Show original B
|
486 |
+
for pose, l, r in videoB_keypoints:
|
487 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
488 |
+
out.write(render_person(frame, pose, l, r))
|
489 |
+
|
490 |
+
out.release()
|
491 |
+
print(f"Video saved to {output_path}")
|
492 |
+
|
493 |
+
def create_multi_stitched_video(video_paths, output_path="multi_stitched_output.mp4", transition_steps=15):
|
494 |
+
"""
|
495 |
+
Create a stitched video from multiple video files.
|
496 |
+
|
497 |
+
Args:
|
498 |
+
video_paths (list): List of paths to MP4 video files
|
499 |
+
output_path (str): Output path for the final video
|
500 |
+
transition_steps (int): Number of frames for transitions between videos
|
501 |
+
"""
|
502 |
+
if len(video_paths) < 2:
|
503 |
+
print("Need at least 2 videos to stitch together!")
|
504 |
+
return
|
505 |
+
|
506 |
+
print(f"Processing {len(video_paths)} videos...")
|
507 |
+
|
508 |
+
# Extract keypoints from all videos
|
509 |
+
all_keypoints = []
|
510 |
+
for i, video_path in enumerate(video_paths):
|
511 |
+
print(f"Extracting keypoints from video {i+1}/{len(video_paths)}: {video_path}")
|
512 |
+
keypoints = extract_keypoints_from_video(video_path)
|
513 |
+
all_keypoints.append(keypoints)
|
514 |
+
print(f" - Extracted {len(keypoints)} frames")
|
515 |
+
|
516 |
+
# Create video writer
|
517 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
518 |
+
out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
|
519 |
+
|
520 |
+
total_frames = 0
|
521 |
+
|
522 |
+
# Process each video
|
523 |
+
for i, keypoints in enumerate(all_keypoints):
|
524 |
+
print(f"Rendering video {i+1}/{len(all_keypoints)}...")
|
525 |
+
|
526 |
+
# Render all frames from current video
|
527 |
+
for pose, l, r in keypoints:
|
528 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
529 |
+
out.write(render_person(frame, pose, l, r))
|
530 |
+
total_frames += 1
|
531 |
+
|
532 |
+
# Add transition to next video (except for the last video)
|
533 |
+
if i < len(all_keypoints) - 1:
|
534 |
+
print(f" Adding transition to next video...")
|
535 |
+
next_keypoints = all_keypoints[i + 1]
|
536 |
+
|
537 |
+
# Interpolate between last frame of current video and first frame of next video
|
538 |
+
interp = interpolate_keypoints(keypoints[-1], next_keypoints[0], steps=transition_steps)
|
539 |
+
for pose, l, r in interp:
|
540 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
541 |
+
out.write(render_person(frame, pose, l, r))
|
542 |
+
total_frames += 1
|
543 |
+
|
544 |
+
out.release()
|
545 |
+
print(f"Multi-stitched video saved to {output_path}")
|
546 |
+
print(f"Total frames rendered: {total_frames}")
|
547 |
+
print(f"Video duration: {total_frames/30:.2f} seconds")
|
548 |
+
|
549 |
+
if __name__ == "__main__":
|
550 |
+
# Example usage for multiple videos
|
551 |
+
video_list = [
|
552 |
+
"/Users/ethantam/desktop/35304.mp4",
|
553 |
+
"/Users/ethantam/desktop/23978.mp4",
|
554 |
+
"/Users/ethantam/desktop/23106.mp4",
|
555 |
+
# Add more video paths here as needed
|
556 |
+
]
|
557 |
+
|
558 |
+
# Create multi-stitched video
|
559 |
+
create_multi_stitched_video(video_list, "multi_stitched_output_1.mp4")
|
560 |
+
|
561 |
+
# Or use the original 2-video function
|
562 |
+
# create_stitched_video("/Users/ethantam/desktop/35304.mp4", "/Users/ethantam/desktop/23978.mp4")
|
videos_rows.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|