Spaces:

deenasun
/

ai-sl-api

Sleeping

File size: 5,664 Bytes

dbca390
 
 
288f6d2
dbca390
 
 
 
 
 
 
 
288f6d2
dbca390
 
288f6d2
dbca390

from document_to_gloss import DocumentToASLConverter
from vectorizer import Vectorizer
from video_gen import create_multi_stitched_video
import gradio as gr
import asyncio
import re
import boto3
import os
from botocore.config import Config
from dotenv import load_dotenv
import requests
import tempfile

# Load environment variables from .env file
load_dotenv()

# Load R2/S3 environment secrets
R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")

# Validate that required environment variables are set
if not all([R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
    raise ValueError("Missing required R2 environment variables. Please check your .env file.")

title = "AI-SL"
description = "Convert text to ASL!"
article = ("<p style='text-align: center'><a href='https://github.com/deenasun' "
           "target='_blank'>Deena Sun on Github</a></p>")
inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
outputs = [
    gr.JSON(label="Processing Results"), 
    gr.Video(label="ASL Video Output")
]

asl_converter = DocumentToASLConverter()
vectorizer = Vectorizer()
session = boto3.session.Session()

s3 = session.client(
    service_name='s3',
    region_name='auto',
    endpoint_url=R2_ENDPOINT,
    aws_access_key_id=R2_ACCESS_KEY_ID,
    aws_secret_access_key=R2_SECRET_ACCESS_KEY,
    config=Config(signature_version='s3v4')
)

def clean_gloss_token(token):
    """
    Clean a gloss token by removing brackets, newlines, and extra whitespace
    """
    # Remove brackets and newlines
    cleaned = re.sub(r'[\[\]\n\r]', '', token)
    # Remove extra whitespace
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
    cleaned = cleaned.lower()
    return cleaned

async def parse_vectorize_and_search(file):
    print(file)
    gloss = asl_converter.convert_document(file)
    print("ASL", gloss)
    
    # Split by spaces and clean each token
    gloss_tokens = gloss.split()
    cleaned_tokens = []
    
    for token in gloss_tokens:
        cleaned = clean_gloss_token(token)
        if cleaned:  # Only add non-empty tokens
            cleaned_tokens.append(cleaned)
    
    print("Cleaned tokens:", cleaned_tokens)

    videos = []
    video_files = []  # Store local file paths for stitching
    
    for g in cleaned_tokens:
        print(f"Processing {g}")
        try:
            result = await vectorizer.vector_query_from_supabase(query=g)
            print("result", result)
            if result.get("match", False):
                video_url = result["video_url"]
                videos.append(video_url)
                
                # Download the video
                local_path = download_video_from_url(video_url)
                if local_path:
                    video_files.append(local_path)
                    
        except Exception as e:
            print(f"Error processing {g}: {e}")
            continue
    
    # Create stitched video if we have multiple videos
    stitched_video_path = None
    if len(video_files) > 1:
        try:
            print(f"Creating stitched video from {len(video_files)} videos...")
            stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
            create_multi_stitched_video(video_files, stitched_video_path)
            print(f"Stitched video created: {stitched_video_path}")
        except Exception as e:
            print(f"Error creating stitched video: {e}")
            stitched_video_path = None
    elif len(video_files) == 1:
        # If only one video, just use it directly
        stitched_video_path = video_files[0]
    
    # Clean up individual video files after stitching
    for video_file in video_files:
        if video_file != stitched_video_path:  # Don't delete the final output
            cleanup_temp_video(video_file)
            
    return {
        "status": "success",
        "videos": videos,
        "video_count": len(videos),
        "gloss": gloss,
        "cleaned_tokens": cleaned_tokens
    }, stitched_video_path

# Create a synchronous wrapper for Gradio
def parse_vectorize_and_search_sync(file):
    return asyncio.run(parse_vectorize_and_search(file))

def download_video_from_url(video_url):
    """
    Download a video from a public R2 URL
    Returns the local file path where the video is saved
    """
    try:
        # Create a temporary file with .mp4 extension
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
        temp_path = temp_file.name
        temp_file.close()
        
        # Download the video
        print(f"Downloading video from: {video_url}")
        response = requests.get(video_url, stream=True)
        response.raise_for_status()
        
        # Save to temporary file
        with open(temp_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        print(f"Video downloaded to: {temp_path}")
        return temp_path
        
    except Exception as e:
        print(f"Error downloading video: {e}")
        return None

def cleanup_temp_video(file_path):
    """
    Clean up temporary video file
    """
    try:
        if file_path and os.path.exists(file_path):
            os.unlink(file_path)
            print(f"Cleaned up: {file_path}")
    except Exception as e:
        print(f"Error cleaning up file: {e}")

intf = gr.Interface(
    fn=parse_vectorize_and_search_sync, 
    inputs=inputs, 
    outputs=outputs, 
    title=title, 
    description=description, 
    article=article
)
intf.launch(share=True)