Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script to verify the fixes for the ASL gloss processing | |
""" | |
import asyncio | |
import re | |
from vectorizer import Vectorizer | |
def clean_gloss_token(token): | |
""" | |
Clean a gloss token by removing brackets, newlines, and extra whitespace | |
""" | |
# Remove brackets and newlines | |
cleaned = re.sub(r'[\[\]\n\r]', '', token) | |
# Remove extra whitespace | |
cleaned = re.sub(r'\s+', ' ', cleaned).strip() | |
return cleaned.lower() | |
def test_gloss_parsing(): | |
"""Test the gloss parsing functionality""" | |
# Sample gloss output from the notebook | |
sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] " | |
"[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n" | |
"[PROBLEM] [SOLVE] [FINISH]") | |
print("Original gloss:") | |
print(sample_gloss) | |
print("\n" + "="*50 + "\n") | |
# Split by spaces and clean each token | |
gloss_tokens = sample_gloss.split() | |
cleaned_tokens = [] | |
for token in gloss_tokens: | |
cleaned = clean_gloss_token(token) | |
if cleaned: # Only add non-empty tokens | |
cleaned_tokens.append(cleaned) | |
print("Cleaned tokens:") | |
for i, token in enumerate(cleaned_tokens): | |
print(f"{i+1:2d}. {token}") | |
return cleaned_tokens | |
async def test_vectorizer(): | |
"""Test the vectorizer functionality""" | |
try: | |
vectorizer = Vectorizer() | |
# Test with a simple word that should be in the vocabulary | |
test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"] | |
for word in test_words: | |
print(f"\nTesting word: {word}") | |
result = await vectorizer.vector_query_from_supabase(word) | |
print(f"Result: {result}") | |
except Exception as e: | |
print(f"Error testing vectorizer: {e}") | |
async def main(): | |
"""Main test function""" | |
print("Testing ASL Gloss Processing Fixes") | |
print("=" * 50) | |
# Test 1: Gloss parsing | |
print("\n1. Testing gloss parsing...") | |
cleaned_tokens = test_gloss_parsing() | |
print(f"Total cleaned tokens: {len(cleaned_tokens)}") | |
# Test 2: Vectorizer (if environment is set up) | |
print("\n2. Testing vectorizer...") | |
await test_vectorizer() | |
print("\n" + "=" * 50) | |
print("Test completed!") | |
if __name__ == "__main__": | |
asyncio.run(main()) |