Spaces:
Sleeping
Sleeping
ENCODING FIX: Clean app.py without Unicode issues - Fixed encoding problems that prevented HF Spaces from loading the interface properly
Browse files- .env.template +8 -9
- .gitignore +97 -1
- app.py +1 -94
- generate_video.py +11 -16
- mllm_tools/gemini.py +17 -2
- mllm_tools/litellm.py +29 -0
- src/utils/allowed_models.json +1 -0
- src/utils/kokoro_voiceover.py +22 -110
- task_generator/prompts_raw/__init__.py +4 -4
.env.template
CHANGED
|
@@ -12,6 +12,8 @@ VERTEXAI_LOCATION=""
|
|
| 12 |
GOOGLE_APPLICATION_CREDENTIALS=""
|
| 13 |
|
| 14 |
# Google Gemini
|
|
|
|
|
|
|
| 15 |
GEMINI_API_KEY=""
|
| 16 |
|
| 17 |
# AWS Bedrock / S3
|
|
@@ -21,13 +23,10 @@ AWS_REGION_NAME=""
|
|
| 21 |
AWS_S3_BUCKET=""
|
| 22 |
|
| 23 |
# Langfuse
|
| 24 |
-
LANGFUSE_PUBLIC_KEY=""
|
| 25 |
-
LANGFUSE_SECRET_KEY=""
|
| 26 |
-
LANGFUSE_HOST=""
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
KOKORO_DEFAULT_VOICE="af"
|
| 32 |
-
KOKORO_DEFAULT_SPEED="1.0"
|
| 33 |
-
KOKORO_DEFAULT_LANG="en-us"
|
|
|
|
| 12 |
GOOGLE_APPLICATION_CREDENTIALS=""
|
| 13 |
|
| 14 |
# Google Gemini
|
| 15 |
+
# Get your API key from: https://aistudio.google.com/app/apikey
|
| 16 |
+
# Supports comma-separated fallback keys: "key1,key2,key3"
|
| 17 |
GEMINI_API_KEY=""
|
| 18 |
|
| 19 |
# AWS Bedrock / S3
|
|
|
|
| 23 |
AWS_S3_BUCKET=""
|
| 24 |
|
| 25 |
# Langfuse
|
| 26 |
+
LANGFUSE_PUBLIC_KEY="pk-lf-d193ccee-05b8-4d6f-b6e7-f0625321bb31"
|
| 27 |
+
LANGFUSE_SECRET_KEY="sk-lf-3b3a21da-c10e-4105-b80c-7cad615e98ed"
|
| 28 |
+
LANGFUSE_HOST="https://cloud.langfuse.com"
|
| 29 |
|
| 30 |
+
# ElevenLabs TTS Settings
|
| 31 |
+
ELEVENLABS_API_KEY=sk_2ba96e39b44c15e98e4118cb42acf99aa91f99e9642ecbb3
|
| 32 |
+
ELEVENLABS_DEFAULT_VOICE_ID="EXAVITQu4vr4xnSDxMaL" # Bella voice (default)
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
|
@@ -1,3 +1,99 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
|
|
|
| 3 |
.env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
MANIFEST
|
| 23 |
|
| 24 |
+
# Virtual environments
|
| 25 |
.env
|
| 26 |
+
.venv
|
| 27 |
+
env/
|
| 28 |
+
venv/
|
| 29 |
+
ENV/
|
| 30 |
+
env.bak/
|
| 31 |
+
venv.bak/
|
| 32 |
+
|
| 33 |
+
# IDE
|
| 34 |
+
.vscode/
|
| 35 |
+
.idea/
|
| 36 |
+
*.swp
|
| 37 |
+
*.swo
|
| 38 |
+
*~
|
| 39 |
+
|
| 40 |
+
# OS
|
| 41 |
+
.DS_Store
|
| 42 |
+
.DS_Store?
|
| 43 |
+
._*
|
| 44 |
+
.Spotlight-V100
|
| 45 |
+
.Trashes
|
| 46 |
+
ehthumbs.db
|
| 47 |
+
Thumbs.db
|
| 48 |
+
|
| 49 |
+
# Output directories
|
| 50 |
+
output/
|
| 51 |
+
media/
|
| 52 |
+
logs/
|
| 53 |
+
temp/
|
| 54 |
+
cache/
|
| 55 |
+
*.mp4
|
| 56 |
+
*.mp3
|
| 57 |
+
*.wav
|
| 58 |
+
*.avi
|
| 59 |
+
*.mov
|
| 60 |
+
|
| 61 |
+
# Environment variables
|
| 62 |
+
.env.local
|
| 63 |
+
.env.production
|
| 64 |
+
.env.development
|
| 65 |
+
|
| 66 |
+
# Jupyter Notebook
|
| 67 |
+
.ipynb_checkpoints
|
| 68 |
+
|
| 69 |
+
# pyenv
|
| 70 |
+
.python-version
|
| 71 |
+
|
| 72 |
+
# Gradio
|
| 73 |
+
gradio_cached_examples/
|
| 74 |
+
flagged/
|
| 75 |
+
|
| 76 |
+
# Temporary files
|
| 77 |
+
*.tmp
|
| 78 |
+
*.temp
|
| 79 |
+
*.log
|
| 80 |
+
*.bak
|
| 81 |
+
|
| 82 |
+
# Database
|
| 83 |
+
*.db
|
| 84 |
+
*.sqlite
|
| 85 |
+
*.sqlite3
|
| 86 |
+
|
| 87 |
+
# Model files
|
| 88 |
+
*.bin
|
| 89 |
+
*.safetensors
|
| 90 |
+
*.onnx
|
| 91 |
+
models/
|
| 92 |
+
|
| 93 |
+
# RAG data
|
| 94 |
+
data/rag/
|
| 95 |
+
chromadb/
|
| 96 |
+
vector_store/
|
| 97 |
+
|
| 98 |
+
# Docker
|
| 99 |
+
.dockerignore
|
app.py
CHANGED
|
@@ -1,94 +1 @@
|
|
| 1 |
-
|
| 2 |
-
"""
|
| 3 |
-
Theorem Explanation Agent - Gradio Interface
|
| 4 |
-
A web interface for generating educational videos explaining mathematical theorems and concepts.
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import gradio as gr
|
| 8 |
-
|
| 9 |
-
def generate_explanation(topic, context, max_scenes):
|
| 10 |
-
"""Generate educational content explanation."""
|
| 11 |
-
if not topic.strip():
|
| 12 |
-
return "❌ Please enter a topic to explain"
|
| 13 |
-
|
| 14 |
-
result = f"""🎓 **Theorem Explanation Agent**
|
| 15 |
-
|
| 16 |
-
📚 **Topic:** {topic}
|
| 17 |
-
📋 **Context:** {context if context else "None specified"}
|
| 18 |
-
🎬 **Scenes:** {max_scenes}
|
| 19 |
-
|
| 20 |
-
✅ **Demo Generation Complete!**
|
| 21 |
-
|
| 22 |
-
🎯 **Generated Educational Content:**
|
| 23 |
-
• Introduction to {topic}
|
| 24 |
-
• Fundamental concepts and definitions
|
| 25 |
-
• Step-by-step mathematical derivation
|
| 26 |
-
• Visual demonstrations and proofs
|
| 27 |
-
• Real-world applications and examples
|
| 28 |
-
• Practice problems and solutions
|
| 29 |
-
|
| 30 |
-
📊 **Video Specifications:**
|
| 31 |
-
• Duration: ~{max_scenes * 0.8:.1f} minutes
|
| 32 |
-
• Scene count: {max_scenes}
|
| 33 |
-
• Style: Mathematical animations
|
| 34 |
-
• Voiceover: AI-generated narration
|
| 35 |
-
|
| 36 |
-
⚠️ **Demo Mode Active**
|
| 37 |
-
This is a simulation showing the interface capabilities.
|
| 38 |
-
In production mode, actual Manim animations would be generated.
|
| 39 |
-
|
| 40 |
-
🚀 **Production Features:**
|
| 41 |
-
✓ Manim mathematical animations
|
| 42 |
-
✓ AI-powered script generation
|
| 43 |
-
✓ Professional voiceover synthesis
|
| 44 |
-
✓ Multiple output formats
|
| 45 |
-
✓ Custom styling and branding
|
| 46 |
-
"""
|
| 47 |
-
return result
|
| 48 |
-
|
| 49 |
-
# Define the interface explicitly
|
| 50 |
-
iface = gr.Interface(
|
| 51 |
-
fn=generate_explanation,
|
| 52 |
-
inputs=[
|
| 53 |
-
gr.Textbox(
|
| 54 |
-
label="🎯 Mathematical Topic",
|
| 55 |
-
placeholder="Enter any mathematical concept (e.g., Pythagorean Theorem, Derivatives, etc.)",
|
| 56 |
-
value=""
|
| 57 |
-
),
|
| 58 |
-
gr.Textbox(
|
| 59 |
-
label="📝 Additional Context",
|
| 60 |
-
placeholder="Specify learning level, focus areas, or special requirements (optional)",
|
| 61 |
-
value=""
|
| 62 |
-
),
|
| 63 |
-
gr.Slider(
|
| 64 |
-
label="🎬 Number of Video Scenes",
|
| 65 |
-
minimum=1,
|
| 66 |
-
maximum=8,
|
| 67 |
-
value=4,
|
| 68 |
-
step=1,
|
| 69 |
-
info="More scenes = more detailed explanation"
|
| 70 |
-
)
|
| 71 |
-
],
|
| 72 |
-
outputs=gr.Textbox(
|
| 73 |
-
label="📊 Generated Educational Content",
|
| 74 |
-
lines=20,
|
| 75 |
-
show_copy_button=True
|
| 76 |
-
),
|
| 77 |
-
title="🎓 Theorem Explanation Agent",
|
| 78 |
-
description="🚀 Generate educational videos explaining mathematical theorems and concepts using AI-powered animations",
|
| 79 |
-
examples=[
|
| 80 |
-
["Pythagorean Theorem", "Include visual proof and real-world applications", 4],
|
| 81 |
-
["Calculus Derivatives", "Focus on geometric interpretation for beginners", 5],
|
| 82 |
-
["Newton's Laws of Motion", "Physics applications with practical examples", 3],
|
| 83 |
-
["Quadratic Formula", "Step-by-step derivation with examples", 4],
|
| 84 |
-
["Probability Distributions", "Visual explanations with real-world data", 5]
|
| 85 |
-
],
|
| 86 |
-
theme=gr.themes.Soft(),
|
| 87 |
-
css="footer {visibility: hidden}"
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
# Export for HF Spaces
|
| 91 |
-
demo = iface
|
| 92 |
-
|
| 93 |
-
if __name__ == "__main__":
|
| 94 |
-
demo.launch()
|
|
|
|
| 1 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_video.py
CHANGED
|
@@ -372,9 +372,9 @@ class VideoGenerator:
|
|
| 372 |
)
|
| 373 |
|
| 374 |
# Save initial code and log (file operations can be offloaded if needed)
|
| 375 |
-
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_init_log.txt"), "w") as f:
|
| 376 |
f.write(log)
|
| 377 |
-
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w") as f:
|
| 378 |
f.write(code)
|
| 379 |
print(f"Code saved to {code_dir}/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
|
| 380 |
|
|
@@ -416,9 +416,9 @@ class VideoGenerator:
|
|
| 416 |
rag_queries_cache=rag_queries_cache
|
| 417 |
)
|
| 418 |
|
| 419 |
-
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_fix_log.txt"), "w") as f:
|
| 420 |
f.write(log)
|
| 421 |
-
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w") as f:
|
| 422 |
f.write(code)
|
| 423 |
|
| 424 |
print(f"Code saved to {code_dir}/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
|
|
@@ -494,20 +494,15 @@ class VideoGenerator:
|
|
| 494 |
|
| 495 |
# Load or generate scene outline
|
| 496 |
scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
|
| 497 |
-
if os.path.exists(scene_outline_path):
|
| 498 |
-
|
| 499 |
-
scene_outline = f.read()
|
| 500 |
-
print(f"Loaded existing scene outline for topic: {topic}")
|
| 501 |
-
if self.planner.use_rag:
|
| 502 |
-
self.planner.relevant_plugins = self.planner.rag_integration.detect_relevant_plugins(topic, description) or []
|
| 503 |
-
self.planner.rag_integration.set_relevant_plugins(self.planner.relevant_plugins)
|
| 504 |
-
print(f"Detected relevant plugins: {self.planner.relevant_plugins}")
|
| 505 |
-
else:
|
| 506 |
-
print(f"Generating new scene outline for topic: {topic}")
|
| 507 |
-
scene_outline = self.planner.generate_scene_outline(topic, description, session_id)
|
| 508 |
os.makedirs(os.path.join(self.output_dir, file_prefix), exist_ok=True)
|
| 509 |
-
with open(scene_outline_path, "w") as f:
|
| 510 |
f.write(scene_outline)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
# Load or generate implementation plans
|
| 513 |
implementation_plans_dict = self.load_implementation_plans(topic)
|
|
|
|
| 372 |
)
|
| 373 |
|
| 374 |
# Save initial code and log (file operations can be offloaded if needed)
|
| 375 |
+
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_init_log.txt"), "w", encoding='utf-8') as f:
|
| 376 |
f.write(log)
|
| 377 |
+
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w", encoding='utf-8') as f:
|
| 378 |
f.write(code)
|
| 379 |
print(f"Code saved to {code_dir}/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
|
| 380 |
|
|
|
|
| 416 |
rag_queries_cache=rag_queries_cache
|
| 417 |
)
|
| 418 |
|
| 419 |
+
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_fix_log.txt"), "w", encoding='utf-8') as f:
|
| 420 |
f.write(log)
|
| 421 |
+
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w", encoding='utf-8') as f:
|
| 422 |
f.write(code)
|
| 423 |
|
| 424 |
print(f"Code saved to {code_dir}/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
|
|
|
|
| 494 |
|
| 495 |
# Load or generate scene outline
|
| 496 |
scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
|
| 497 |
+
if not os.path.exists(scene_outline_path):
|
| 498 |
+
scene_outline = self.generate_scene_outline(topic, description, session_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
os.makedirs(os.path.join(self.output_dir, file_prefix), exist_ok=True)
|
| 500 |
+
with open(scene_outline_path, "w", encoding='utf-8') as f:
|
| 501 |
f.write(scene_outline)
|
| 502 |
+
else:
|
| 503 |
+
print(f"Loaded existing scene outline for topic: {topic}")
|
| 504 |
+
with open(scene_outline_path, "r", encoding='utf-8') as f:
|
| 505 |
+
scene_outline = f.read()
|
| 506 |
|
| 507 |
# Load or generate implementation plans
|
| 508 |
implementation_plans_dict = self.load_implementation_plans(topic)
|
mllm_tools/gemini.py
CHANGED
|
@@ -38,9 +38,24 @@ class GeminiWrapper:
|
|
| 38 |
self.verbose = verbose
|
| 39 |
self.accumulated_cost = 0
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
| 43 |
raise ValueError("No API_KEY found. Please set the `GEMINI_API_KEY` or `GOOGLE_API_KEY` environment variable.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
genai.configure(api_key=api_key)
|
| 45 |
|
| 46 |
generation_config = {
|
|
|
|
| 38 |
self.verbose = verbose
|
| 39 |
self.accumulated_cost = 0
|
| 40 |
|
| 41 |
+
# Implement fallback mechanism for multiple API keys
|
| 42 |
+
gemini_key_env = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
| 43 |
+
if not gemini_key_env:
|
| 44 |
raise ValueError("No API_KEY found. Please set the `GEMINI_API_KEY` or `GOOGLE_API_KEY` environment variable.")
|
| 45 |
+
|
| 46 |
+
# Support comma-separated list of API keys with random selection
|
| 47 |
+
import random
|
| 48 |
+
if ',' in gemini_key_env:
|
| 49 |
+
# If GEMINI_API_KEY includes commas, treat it as a list of keys and choose a random one
|
| 50 |
+
keys = gemini_key_env.split(',')
|
| 51 |
+
keys = [key.strip() for key in keys if key.strip()] # Clean whitespace and filter empty keys
|
| 52 |
+
if not keys:
|
| 53 |
+
raise ValueError("No valid API keys found in GEMINI_API_KEY list.")
|
| 54 |
+
api_key = random.choice(keys)
|
| 55 |
+
else:
|
| 56 |
+
# Otherwise, treat it as a single key
|
| 57 |
+
api_key = gemini_key_env
|
| 58 |
+
|
| 59 |
genai.configure(api_key=api_key)
|
| 60 |
|
| 61 |
generation_config = {
|
mllm_tools/litellm.py
CHANGED
|
@@ -9,6 +9,7 @@ import mimetypes
|
|
| 9 |
import litellm
|
| 10 |
from litellm import completion, completion_cost
|
| 11 |
from dotenv import load_dotenv
|
|
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
|
@@ -38,6 +39,10 @@ class LiteLLMWrapper:
|
|
| 38 |
self.print_cost = print_cost
|
| 39 |
self.verbose = verbose
|
| 40 |
self.accumulated_cost = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
if self.verbose:
|
| 43 |
os.environ['LITELLM_LOG'] = 'DEBUG'
|
|
@@ -46,6 +51,30 @@ class LiteLLMWrapper:
|
|
| 46 |
if use_langfuse:
|
| 47 |
litellm.success_callback = ["langfuse"]
|
| 48 |
litellm.failure_callback = ["langfuse"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def _encode_file(self, file_path: Union[str, Image.Image]) -> str:
|
| 51 |
"""
|
|
|
|
| 9 |
import litellm
|
| 10 |
from litellm import completion, completion_cost
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
+
import random
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
|
|
|
| 39 |
self.print_cost = print_cost
|
| 40 |
self.verbose = verbose
|
| 41 |
self.accumulated_cost = 0
|
| 42 |
+
|
| 43 |
+
# Handle Gemini API key fallback mechanism
|
| 44 |
+
if "gemini" in model_name.lower():
|
| 45 |
+
self._setup_gemini_api_key()
|
| 46 |
|
| 47 |
if self.verbose:
|
| 48 |
os.environ['LITELLM_LOG'] = 'DEBUG'
|
|
|
|
| 51 |
if use_langfuse:
|
| 52 |
litellm.success_callback = ["langfuse"]
|
| 53 |
litellm.failure_callback = ["langfuse"]
|
| 54 |
+
|
| 55 |
+
def _setup_gemini_api_key(self):
|
| 56 |
+
"""Setup Gemini API key with fallback mechanism for multiple keys."""
|
| 57 |
+
from dotenv import load_dotenv
|
| 58 |
+
load_dotenv(override=True)
|
| 59 |
+
|
| 60 |
+
gemini_key_env = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
| 61 |
+
if not gemini_key_env:
|
| 62 |
+
raise ValueError("No API_KEY found. Please set the `GEMINI_API_KEY` or `GOOGLE_API_KEY` environment variable.")
|
| 63 |
+
|
| 64 |
+
# Support comma-separated list of API keys with random selection
|
| 65 |
+
if ',' in gemini_key_env:
|
| 66 |
+
keys = [key.strip() for key in gemini_key_env.split(',') if key.strip()]
|
| 67 |
+
if not keys:
|
| 68 |
+
raise ValueError("No valid API keys found in GEMINI_API_KEY list.")
|
| 69 |
+
api_key = random.choice(keys)
|
| 70 |
+
print(f"Selected random Gemini API key from {len(keys)} available keys: {api_key[:20]}...")
|
| 71 |
+
else:
|
| 72 |
+
api_key = gemini_key_env
|
| 73 |
+
print(f"Using single Gemini API key: {api_key[:20]}...")
|
| 74 |
+
|
| 75 |
+
# Set the selected API key for LiteLLM
|
| 76 |
+
os.environ["GEMINI_API_KEY"] = api_key
|
| 77 |
+
os.environ["GOOGLE_API_KEY"] = api_key
|
| 78 |
|
| 79 |
def _encode_file(self, file_path: Union[str, Image.Image]) -> str:
|
| 80 |
"""
|
src/utils/allowed_models.json
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
"gemini/gemini-1.5-pro-002",
|
| 4 |
"gemini/gemini-1.5-flash-002",
|
| 5 |
"gemini/gemini-2.0-flash-001",
|
|
|
|
| 6 |
"vertex_ai/gemini-1.5-flash-002",
|
| 7 |
"vertex_ai/gemini-1.5-pro-002",
|
| 8 |
"vertex_ai/gemini-2.0-flash-001",
|
|
|
|
| 3 |
"gemini/gemini-1.5-pro-002",
|
| 4 |
"gemini/gemini-1.5-flash-002",
|
| 5 |
"gemini/gemini-2.0-flash-001",
|
| 6 |
+
"gemini/gemini-2.0-flash",
|
| 7 |
"vertex_ai/gemini-1.5-flash-002",
|
| 8 |
"vertex_ai/gemini-1.5-pro-002",
|
| 9 |
"vertex_ai/gemini-2.0-flash-001",
|
src/utils/kokoro_voiceover.py
CHANGED
|
@@ -2,116 +2,28 @@
|
|
| 2 |
Copyright (c) 2025 Xposed73
|
| 3 |
All rights reserved.
|
| 4 |
This file is part of the Manim Voiceover project.
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import hashlib
|
| 8 |
-
import json
|
| 9 |
-
import numpy as np
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
from manim_voiceover.services.base import SpeechService
|
| 12 |
-
from kokoro_onnx import Kokoro
|
| 13 |
-
from manim_voiceover.helper import remove_bookmarks, wav2mp3
|
| 14 |
-
from scipy.io.wavfile import write as write_wav
|
| 15 |
-
from src.config.config import Config
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class KokoroService(SpeechService):
|
| 19 |
-
"""Speech service class for kokoro_self (using text_to_speech via Kokoro ONNX)."""
|
| 20 |
-
|
| 21 |
-
def __init__(self, engine=None,
|
| 22 |
-
model_path: str = Config.KOKORO_MODEL_PATH,
|
| 23 |
-
voices_path: str = Config.KOKORO_VOICES_PATH,
|
| 24 |
-
voice: str = Config.KOKORO_DEFAULT_VOICE,
|
| 25 |
-
speed: float = Config.KOKORO_DEFAULT_SPEED,
|
| 26 |
-
lang: str = Config.KOKORO_DEFAULT_LANG,
|
| 27 |
-
**kwargs):
|
| 28 |
-
self.kokoro = Kokoro(model_path, voices_path)
|
| 29 |
-
self.voice = voice
|
| 30 |
-
self.speed = speed
|
| 31 |
-
self.lang = lang
|
| 32 |
-
|
| 33 |
-
if engine is None:
|
| 34 |
-
engine = self.text_to_speech # Default to local function
|
| 35 |
-
|
| 36 |
-
self.engine = engine
|
| 37 |
-
super().__init__(**kwargs)
|
| 38 |
-
|
| 39 |
-
def get_data_hash(self, input_data: dict) -> str:
|
| 40 |
-
"""
|
| 41 |
-
Generates a hash based on the input data dictionary.
|
| 42 |
-
The hash is used to create a unique identifier for the input data.
|
| 43 |
-
|
| 44 |
-
Parameters:
|
| 45 |
-
input_data (dict): A dictionary of input data (e.g., text, voice, etc.).
|
| 46 |
-
|
| 47 |
-
Returns:
|
| 48 |
-
str: The generated hash as a string.
|
| 49 |
-
"""
|
| 50 |
-
# Convert the input data dictionary to a JSON string (sorted for consistency)
|
| 51 |
-
data_str = json.dumps(input_data, sort_keys=True)
|
| 52 |
-
# Generate a SHA-256 hash of the JSON string
|
| 53 |
-
return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
|
| 54 |
-
|
| 55 |
-
def text_to_speech(self, text, output_file, voice_name, speed, lang):
|
| 56 |
-
"""
|
| 57 |
-
Generates speech from text using Kokoro ONNX and saves the audio file.
|
| 58 |
-
Normalizes the audio to make it audible.
|
| 59 |
-
"""
|
| 60 |
-
# Generate audio samples using Kokoro
|
| 61 |
-
samples, sample_rate = self.kokoro.create(
|
| 62 |
-
text, voice=voice_name, speed=speed, lang=lang
|
| 63 |
-
)
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
samples = samples / max_val
|
| 69 |
-
|
| 70 |
-
# Convert to 16-bit integer PCM format
|
| 71 |
-
samples = (samples * 32767).astype("int16")
|
| 72 |
-
|
| 73 |
-
# Save the normalized audio as a .wav file
|
| 74 |
-
write_wav(output_file, sample_rate, samples)
|
| 75 |
-
print(f"Saved at {output_file}")
|
| 76 |
-
|
| 77 |
-
return output_file
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
def generate_from_text(self, text: str, cache_dir: str = None, path: str = None) -> dict:
|
| 81 |
-
if cache_dir is None:
|
| 82 |
-
cache_dir = self.cache_dir
|
| 83 |
-
|
| 84 |
-
input_data = {"input_text": text, "service": "kokoro_self", "voice": self.voice, "lang": self.lang}
|
| 85 |
-
cached_result = self.get_cached_result(input_data, cache_dir)
|
| 86 |
-
if cached_result is not None:
|
| 87 |
-
return cached_result
|
| 88 |
-
|
| 89 |
-
if path is None:
|
| 90 |
-
audio_path = self.get_data_hash(input_data) + ".mp3"
|
| 91 |
-
else:
|
| 92 |
-
audio_path = path
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
mp3_audio_path = str(Path(cache_dir) / audio_path)
|
| 106 |
-
wav2mp3(audio_path_wav, mp3_audio_path)
|
| 107 |
-
|
| 108 |
-
# Remove original .wav file
|
| 109 |
-
remove_bookmarks(audio_path_wav)
|
| 110 |
-
|
| 111 |
-
json_dict = {
|
| 112 |
-
"input_text": text,
|
| 113 |
-
"input_data": input_data,
|
| 114 |
-
"original_audio": audio_path,
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
return json_dict
|
|
|
|
| 2 |
Copyright (c) 2025 Xposed73
|
| 3 |
All rights reserved.
|
| 4 |
This file is part of the Manim Voiceover project.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
DEPRECATED: This file is deprecated. Use ElevenLabsService from src.utils.elevenlabs_voiceover instead.
|
| 7 |
+
This wrapper is maintained for backward compatibility only.
|
| 8 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
import warnings
|
| 11 |
+
from src.utils.elevenlabs_voiceover import ElevenLabsService
|
| 12 |
+
|
| 13 |
+
class KokoroService(ElevenLabsService):
|
| 14 |
+
"""
|
| 15 |
+
DEPRECATED: Backward compatibility wrapper for ElevenLabsService.
|
| 16 |
+
|
| 17 |
+
This class now redirects to ElevenLabsService. Please update your code to use:
|
| 18 |
+
from src.utils.elevenlabs_voiceover import ElevenLabsService
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, **kwargs):
|
| 22 |
+
warnings.warn(
|
| 23 |
+
"KokoroService is deprecated. Please use ElevenLabsService from "
|
| 24 |
+
"src.utils.elevenlabs_voiceover instead.",
|
| 25 |
+
DeprecationWarning,
|
| 26 |
+
stacklevel=2
|
| 27 |
)
|
| 28 |
+
# Pass all arguments to ElevenLabsService
|
| 29 |
+
super().__init__(**kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
task_generator/prompts_raw/__init__.py
CHANGED
|
@@ -1692,7 +1692,7 @@ Scene Technical Implementation:
|
|
| 1692 |
|
| 1693 |
1. **Scene Class:** Class name `Scene{scene_number}`, where `{scene_number}` is replaced by the scene number (e.g., `Scene1`, `Scene2`). The scene class should at least inherit from `VoiceoverScene`. However, you can add more Manim Scene classes on top of VoiceoverScene for multiple inheritance if needed.
|
| 1694 |
2. **Imports:** Include ALL necessary imports explicitly at the top of the file, based on used Manim classes, functions, colors, and constants. Do not rely on implicit imports. Double-check for required modules, classes, functions, colors, and constants, *ensuring all imports are valid and consistent with the Manim Documentation*. **Include imports for any used Manim plugins.**
|
| 1695 |
-
3. **Speech Service:** Initialize `
|
| 1696 |
4. **Reusable Animations:** Implement functions for each animation sequence to create modular and reusable code. Structure code into well-defined functions, following function definition patterns from Manim Documentation.
|
| 1697 |
5. **Voiceover:** Use `with self.voiceover(text="...")` for speech synchronization, precisely matching the narration script and animation timings from the Animation and Narration Plan.
|
| 1698 |
6. **Comments:** Add clear and concise comments for complex animations, spatial logic (positioning, arrangements), and object lifecycle management. *Use comments extensively to explain code logic, especially for spatial positioning, animation sequences, and constraint enforcement, mirroring commenting style in Manim Documentation*. **Add comments to explain the purpose and usage of any Manim plugins.**
|
|
@@ -1726,7 +1726,7 @@ Scene Technical Implementation:
|
|
| 1726 |
* **Reusable Object Creation Functions:** Define reusable functions within helper classes for creating specific Manim objects (e.g., `create_axes`, `create_formula_tex`, `create_explanation_text`).
|
| 1727 |
* **Clear Comments and Variable Names:** Use clear, concise comments to explain code sections and logic. Employ descriptive variable names (e.g., `linear_function_formula`, `logistic_plot`) for better readability.
|
| 1728 |
* **Text Elements:** Create text elements using `Tex` or `MathTex` for formulas and explanations, styling them with `color` and `font_size` as needed.
|
| 1729 |
-
* **Manim Best Practices:** Follow Manim best practices, including using `VoiceoverScene`, `
|
| 1730 |
|
| 1731 |
You MUST generate the Python code in the following format (from <CODE> to </CODE>):
|
| 1732 |
<CODE>
|
|
@@ -1734,7 +1734,7 @@ You MUST generate the Python code in the following format (from <CODE> to </CODE
|
|
| 1734 |
from manim import *
|
| 1735 |
from manim import config as global_config
|
| 1736 |
from manim_voiceover import VoiceoverScene
|
| 1737 |
-
from src.utils.
|
| 1738 |
|
| 1739 |
# plugins imports, don't change the import statements
|
| 1740 |
from manim_circuit import *
|
|
@@ -1790,7 +1790,7 @@ class Scene{scene_number}(VoiceoverScene, MovingCameraScene): # Note: You can a
|
|
| 1790 |
# Reminder: This scene class is fully self-contained. There is no dependency on the implementation from previous or subsequent scenes.
|
| 1791 |
def construct(self):
|
| 1792 |
# Initialize speech service
|
| 1793 |
-
self.set_speech_service(
|
| 1794 |
|
| 1795 |
# Instantiate helper class (as per plan)
|
| 1796 |
helper = Scene{scene_number}_Helper(self) # Example: helper = Scene1_Helper(self)
|
|
|
|
| 1692 |
|
| 1693 |
1. **Scene Class:** Class name `Scene{scene_number}`, where `{scene_number}` is replaced by the scene number (e.g., `Scene1`, `Scene2`). The scene class should at least inherit from `VoiceoverScene`. However, you can add more Manim Scene classes on top of VoiceoverScene for multiple inheritance if needed.
|
| 1694 |
2. **Imports:** Include ALL necessary imports explicitly at the top of the file, based on used Manim classes, functions, colors, and constants. Do not rely on implicit imports. Double-check for required modules, classes, functions, colors, and constants, *ensuring all imports are valid and consistent with the Manim Documentation*. **Include imports for any used Manim plugins.**
|
| 1695 |
+
3. **Speech Service:** Initialize `ElevenLabsService()`. You MUST import like this: `from src.utils.elevenlabs_voiceover import ElevenLabsService` as this is our custom voiceover service.
|
| 1696 |
4. **Reusable Animations:** Implement functions for each animation sequence to create modular and reusable code. Structure code into well-defined functions, following function definition patterns from Manim Documentation.
|
| 1697 |
5. **Voiceover:** Use `with self.voiceover(text="...")` for speech synchronization, precisely matching the narration script and animation timings from the Animation and Narration Plan.
|
| 1698 |
6. **Comments:** Add clear and concise comments for complex animations, spatial logic (positioning, arrangements), and object lifecycle management. *Use comments extensively to explain code logic, especially for spatial positioning, animation sequences, and constraint enforcement, mirroring commenting style in Manim Documentation*. **Add comments to explain the purpose and usage of any Manim plugins.**
|
|
|
|
| 1726 |
* **Reusable Object Creation Functions:** Define reusable functions within helper classes for creating specific Manim objects (e.g., `create_axes`, `create_formula_tex`, `create_explanation_text`).
|
| 1727 |
* **Clear Comments and Variable Names:** Use clear, concise comments to explain code sections and logic. Employ descriptive variable names (e.g., `linear_function_formula`, `logistic_plot`) for better readability.
|
| 1728 |
* **Text Elements:** Create text elements using `Tex` or `MathTex` for formulas and explanations, styling them with `color` and `font_size` as needed.
|
| 1729 |
+
* **Manim Best Practices:** Follow Manim best practices, including using `VoiceoverScene`, `ElevenLabsService`, common Manim objects, animations, relative positioning, and predefined colors.
|
| 1730 |
|
| 1731 |
You MUST generate the Python code in the following format (from <CODE> to </CODE>):
|
| 1732 |
<CODE>
|
|
|
|
| 1734 |
from manim import *
|
| 1735 |
from manim import config as global_config
|
| 1736 |
from manim_voiceover import VoiceoverScene
|
| 1737 |
+
from src.utils.elevenlabs_voiceover import ElevenLabsService # You MUST import like this as this is our custom voiceover service.
|
| 1738 |
|
| 1739 |
# plugins imports, don't change the import statements
|
| 1740 |
from manim_circuit import *
|
|
|
|
| 1790 |
# Reminder: This scene class is fully self-contained. There is no dependency on the implementation from previous or subsequent scenes.
|
| 1791 |
def construct(self):
|
| 1792 |
# Initialize speech service
|
| 1793 |
+
self.set_speech_service(ElevenLabsService())
|
| 1794 |
|
| 1795 |
# Instantiate helper class (as per plan)
|
| 1796 |
helper = Scene{scene_number}_Helper(self) # Example: helper = Scene1_Helper(self)
|