|
""" |
|
Test script to verify the updated African languages support |
|
""" |
|
|
|
import requests |
|
import json |
|
|
|
def test_african_languages_count(api_url="https://sematech-sema-api.hf.space"): |
|
"""Test that we now have 50+ African languages""" |
|
|
|
print("[INFO] Testing Updated African Languages Support") |
|
print("=" * 60) |
|
|
|
|
|
print("\n[TEST] African Languages Count") |
|
print("-" * 40) |
|
|
|
response = requests.get(f"{api_url}/api/v1/languages/african") |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
african_count = data['total_count'] |
|
|
|
print(f"[RESULT] Found {african_count} African languages") |
|
|
|
if african_count >= 50: |
|
print(f"[PASS] African language count meets FLORES-200 expectations ({african_count} >= 50)") |
|
else: |
|
print(f"[FAIL] African language count below expectations ({african_count} < 50)") |
|
|
|
|
|
print(f"\n[EXAMPLES] Sample African Languages:") |
|
count = 0 |
|
for code, info in data['languages'].items(): |
|
if count < 10: |
|
print(f" {code}: {info['name']} ({info['native_name']}) - {info['script']}") |
|
count += 1 |
|
|
|
if african_count > 10: |
|
print(f" ... and {african_count - 10} more languages") |
|
|
|
else: |
|
print(f"[FAIL] Failed to get African languages: HTTP {response.status_code}") |
|
|
|
|
|
print(f"\n[TEST] Specific New African Languages") |
|
print("-" * 40) |
|
|
|
new_languages_to_test = [ |
|
"aka_Latn", |
|
"bam_Latn", |
|
"bem_Latn", |
|
"dik_Latn", |
|
"dyu_Latn", |
|
"ewe_Latn", |
|
"fon_Latn", |
|
"fuv_Latn", |
|
"kab_Latn", |
|
"kam_Latn", |
|
"kbp_Latn", |
|
"kea_Latn", |
|
"kin_Latn", |
|
"kmb_Latn", |
|
"knc_Latn", |
|
"kon_Latn", |
|
"lua_Latn", |
|
"lus_Latn", |
|
"mos_Latn", |
|
"nso_Latn", |
|
"nus_Latn", |
|
"run_Latn", |
|
"sag_Latn", |
|
"taq_Latn", |
|
"taq_Tfng", |
|
"tum_Latn", |
|
"twi_Latn", |
|
"tzm_Tfng", |
|
] |
|
|
|
found_count = 0 |
|
missing_languages = [] |
|
|
|
for lang_code in new_languages_to_test: |
|
response = requests.get(f"{api_url}/api/v1/languages/{lang_code}") |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
print(f"[PASS] {lang_code}: {data['name']} ({data['native_name']})") |
|
found_count += 1 |
|
else: |
|
print(f"[FAIL] {lang_code}: Not found") |
|
missing_languages.append(lang_code) |
|
|
|
print(f"\n[SUMMARY] New Languages Test Results:") |
|
print(f" Found: {found_count}/{len(new_languages_to_test)} languages") |
|
print(f" Success rate: {(found_count/len(new_languages_to_test))*100:.1f}%") |
|
|
|
if missing_languages: |
|
print(f" Missing languages: {', '.join(missing_languages)}") |
|
|
|
|
|
print(f"\n[TEST] African Language Search") |
|
print("-" * 40) |
|
|
|
search_terms = ["Akan", "Bambara", "Fulfulde", "Tamasheq", "Kanuri"] |
|
|
|
for term in search_terms: |
|
response = requests.get(f"{api_url}/api/v1/languages/search?q={term}") |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
if data['total_count'] > 0: |
|
print(f"[PASS] Search '{term}': Found {data['total_count']} result(s)") |
|
for code, info in data['languages'].items(): |
|
print(f" {code}: {info['name']} ({info['native_name']})") |
|
else: |
|
print(f"[FAIL] Search '{term}': No results found") |
|
else: |
|
print(f"[FAIL] Search '{term}': HTTP {response.status_code}") |
|
|
|
|
|
print(f"\n[TEST] Updated Language Statistics") |
|
print("-" * 40) |
|
|
|
response = requests.get(f"{api_url}/api/v1/languages/stats") |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
|
|
print(f"[RESULT] Language Statistics:") |
|
print(f" Total languages: {data['total_languages']}") |
|
print(f" Total regions: {data['regions']}") |
|
print(f" Total scripts: {data['scripts']}") |
|
|
|
if 'by_region' in data: |
|
print(f" Languages by region:") |
|
for region, count in data['by_region'].items(): |
|
print(f" {region}: {count} languages") |
|
|
|
|
|
africa_count = data.get('by_region', {}).get('Africa', 0) |
|
if africa_count >= 50: |
|
print(f"[PASS] Africa region has {africa_count} languages (>= 50)") |
|
else: |
|
print(f"[FAIL] Africa region has only {africa_count} languages (< 50)") |
|
|
|
else: |
|
print(f"[FAIL] Failed to get language statistics: HTTP {response.status_code}") |
|
|
|
def test_translation_with_new_languages(api_url="https://sematech-sema-api.hf.space"): |
|
"""Test translation with some of the newly added African languages""" |
|
|
|
print(f"\n[TEST] Translation with New African Languages") |
|
print("-" * 50) |
|
|
|
|
|
test_cases = [ |
|
{ |
|
"text": "Hello world", |
|
"target_language": "aka_Latn", |
|
"expected_lang": "Akan" |
|
}, |
|
{ |
|
"text": "Good morning", |
|
"target_language": "bam_Latn", |
|
"expected_lang": "Bambara" |
|
}, |
|
{ |
|
"text": "How are you?", |
|
"target_language": "fon_Latn", |
|
"expected_lang": "Fon" |
|
}, |
|
{ |
|
"text": "Thank you", |
|
"target_language": "twi_Latn", |
|
"expected_lang": "Twi" |
|
} |
|
] |
|
|
|
successful_translations = 0 |
|
|
|
for i, test_case in enumerate(test_cases, 1): |
|
print(f"\n{i}. Testing translation to {test_case['expected_lang']} ({test_case['target_language']})") |
|
|
|
response = requests.post( |
|
f"{api_url}/api/v1/translate", |
|
headers={"Content-Type": "application/json"}, |
|
json={ |
|
"text": test_case["text"], |
|
"target_language": test_case["target_language"] |
|
}, |
|
timeout=30 |
|
) |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
print(f" Original: '{test_case['text']}'") |
|
print(f" Translation: '{data['translated_text']}'") |
|
print(f" Source: {data['source_language']}") |
|
print(f" Target: {data['target_language']}") |
|
print(f" [PASS] Translation successful") |
|
successful_translations += 1 |
|
else: |
|
print(f" [FAIL] Translation failed: HTTP {response.status_code}") |
|
try: |
|
error_data = response.json() |
|
print(f" Error: {error_data.get('detail', 'Unknown error')}") |
|
except: |
|
print(f" Error: {response.text}") |
|
|
|
print(f"\n[SUMMARY] Translation Test Results:") |
|
print(f" Successful: {successful_translations}/{len(test_cases)} translations") |
|
print(f" Success rate: {(successful_translations/len(test_cases))*100:.1f}%") |
|
|
|
if __name__ == "__main__": |
|
import sys |
|
|
|
api_url = "https://sematech-sema-api.hf.space" |
|
if len(sys.argv) > 1: |
|
api_url = sys.argv[1] |
|
|
|
print(f"[INFO] Testing updated African languages at: {api_url}") |
|
|
|
|
|
test_african_languages_count(api_url) |
|
|
|
|
|
test_translation_with_new_languages(api_url) |
|
|
|
print(f"\n[SUCCESS] African languages update tests completed!") |
|
print(f"\nExpected results:") |
|
print(f"- African languages count should be 50+ (was 23)") |
|
print(f"- New languages like Akan, Bambara, Fon, Twi should be available") |
|
print(f"- Search should find new African languages") |
|
print(f"- Translation should work with new language codes") |
|
|