Spaces:
Running
Running
Pravin Barapatre
commited on
Commit
·
db8251f
0
Parent(s):
Pin dependencies for Hugging Face Spaces compatibility and remove submodule issue
Browse files- .gitignore +79 -0
- .gradio/certificate.pem +31 -0
- README.md +282 -0
- app.py +651 -0
- demo.py +57 -0
- requirements.txt +16 -0
- simple_generator.py +134 -0
- test_app.py +479 -0
- text-to-video-generator/.gitattributes +35 -0
- text-to-video-generator/README.md +12 -0
- text-to-video-generator/app.py +651 -0
- text-to-video-generator/requirements.txt +16 -0
- text_to_video.py +289 -0
.gitignore
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated videos
|
2 |
+
*.mp4
|
3 |
+
*.avi
|
4 |
+
*.mov
|
5 |
+
*.mkv
|
6 |
+
*.webm
|
7 |
+
|
8 |
+
# Model caches
|
9 |
+
.cache/
|
10 |
+
models/
|
11 |
+
checkpoints/
|
12 |
+
|
13 |
+
# Python
|
14 |
+
__pycache__/
|
15 |
+
*.py[cod]
|
16 |
+
*$py.class
|
17 |
+
*.so
|
18 |
+
.Python
|
19 |
+
build/
|
20 |
+
develop-eggs/
|
21 |
+
dist/
|
22 |
+
downloads/
|
23 |
+
eggs/
|
24 |
+
.eggs/
|
25 |
+
lib/
|
26 |
+
lib64/
|
27 |
+
parts/
|
28 |
+
sdist/
|
29 |
+
var/
|
30 |
+
wheels/
|
31 |
+
*.egg-info/
|
32 |
+
.installed.cfg
|
33 |
+
*.egg
|
34 |
+
MANIFEST
|
35 |
+
|
36 |
+
# Virtual environments
|
37 |
+
venv/
|
38 |
+
env/
|
39 |
+
ENV/
|
40 |
+
env.bak/
|
41 |
+
venv.bak/
|
42 |
+
|
43 |
+
# IDE
|
44 |
+
.vscode/
|
45 |
+
.idea/
|
46 |
+
*.swp
|
47 |
+
*.swo
|
48 |
+
*~
|
49 |
+
|
50 |
+
# OS
|
51 |
+
.DS_Store
|
52 |
+
.DS_Store?
|
53 |
+
._*
|
54 |
+
.Spotlight-V100
|
55 |
+
.Trashes
|
56 |
+
ehthumbs.db
|
57 |
+
Thumbs.db
|
58 |
+
|
59 |
+
# Logs
|
60 |
+
*.log
|
61 |
+
logs/
|
62 |
+
|
63 |
+
# Temporary files
|
64 |
+
tmp/
|
65 |
+
temp/
|
66 |
+
*.tmp
|
67 |
+
|
68 |
+
# Hugging Face cache
|
69 |
+
.huggingface/
|
70 |
+
|
71 |
+
# Jupyter Notebook
|
72 |
+
.ipynb_checkpoints
|
73 |
+
|
74 |
+
# Environment variables
|
75 |
+
.env
|
76 |
+
.env.local
|
77 |
+
.env.development.local
|
78 |
+
.env.test.local
|
79 |
+
.env.production.local
|
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
README.md
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Text-to-Video Generation with Hugging Face Models
|
2 |
+
|
3 |
+
A powerful text-to-video generation application using state-of-the-art AI models from Hugging Face. Generate high-quality videos from text descriptions with an intuitive web interface or command-line tool.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- **Multiple Models**: Support for various text-to-video models including DAMO, Zeroscope, and Stable Video Diffusion
|
8 |
+
- **Web Interface**: Beautiful Gradio-based web UI for easy interaction
|
9 |
+
- **Command Line**: Simple command-line interface for automation and scripting
|
10 |
+
- **GPU Optimization**: Automatic GPU detection and memory optimization
|
11 |
+
- **Customizable Parameters**: Control video length, quality, and generation parameters
|
12 |
+
- **Reproducible Results**: Seed-based generation for consistent outputs
|
13 |
+
|
14 |
+
## Supported Models
|
15 |
+
|
16 |
+
| Model | Description | Max Frames | FPS | Quality |
|
17 |
+
|-------|-------------|------------|-----|---------|
|
18 |
+
| `damo-vilab/text-to-video-ms-1.7b` | Fast and efficient text-to-video model | 16 | 8 | Good |
|
19 |
+
| `cerspense/zeroscope_v2_XL` | High-quality text-to-video model | 24 | 6 | Excellent |
|
20 |
+
| `stabilityai/stable-video-diffusion-img2vid-xt` | Image-to-video model (requires initial image) | 25 | 6 | Excellent |
|
21 |
+
|
22 |
+
## Installation
|
23 |
+
|
24 |
+
### Prerequisites
|
25 |
+
|
26 |
+
- Python 3.8 or higher
|
27 |
+
- CUDA-compatible GPU (recommended for faster generation)
|
28 |
+
- At least 8GB RAM (16GB+ recommended)
|
29 |
+
|
30 |
+
### Setup
|
31 |
+
|
32 |
+
1. **Clone or download this repository**
|
33 |
+
|
34 |
+
2. **Install dependencies**:
|
35 |
+
```bash
|
36 |
+
pip install -r requirements.txt
|
37 |
+
```
|
38 |
+
|
39 |
+
3. **Verify installation**:
|
40 |
+
```bash
|
41 |
+
python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')"
|
42 |
+
```
|
43 |
+
|
44 |
+
## Usage
|
45 |
+
|
46 |
+
### Web Interface (Recommended)
|
47 |
+
|
48 |
+
Launch the interactive web interface:
|
49 |
+
|
50 |
+
```bash
|
51 |
+
python text_to_video.py
|
52 |
+
```
|
53 |
+
|
54 |
+
The interface will be available at `http://localhost:7860` and will also provide a public shareable link.
|
55 |
+
|
56 |
+
**Features of the web interface:**
|
57 |
+
- Intuitive parameter controls
|
58 |
+
- Real-time model information
|
59 |
+
- Example prompts to get started
|
60 |
+
- Live video preview
|
61 |
+
- Easy parameter adjustment
|
62 |
+
|
63 |
+
### Command Line Interface
|
64 |
+
|
65 |
+
For automation or scripting, use the command-line interface:
|
66 |
+
|
67 |
+
```bash
|
68 |
+
python simple_generator.py "A beautiful sunset over the ocean"
|
69 |
+
```
|
70 |
+
|
71 |
+
**Command-line options:**
|
72 |
+
```bash
|
73 |
+
python simple_generator.py --help
|
74 |
+
```
|
75 |
+
|
76 |
+
**Example commands:**
|
77 |
+
|
78 |
+
Basic generation:
|
79 |
+
```bash
|
80 |
+
python simple_generator.py "A cat playing with a ball of yarn"
|
81 |
+
```
|
82 |
+
|
83 |
+
Advanced generation with custom parameters:
|
84 |
+
```bash
|
85 |
+
python simple_generator.py "A futuristic city with flying cars" \
|
86 |
+
--model cerspense/zeroscope_v2_XL \
|
87 |
+
--frames 24 \
|
88 |
+
--fps 6 \
|
89 |
+
--steps 30 \
|
90 |
+
--guidance 8.0 \
|
91 |
+
--seed 42 \
|
92 |
+
--output my_video.mp4
|
93 |
+
```
|
94 |
+
|
95 |
+
## Free Hosting Options
|
96 |
+
|
97 |
+
### 1. Hugging Face Spaces (Recommended)
|
98 |
+
|
99 |
+
**Pros:**
|
100 |
+
- Completely free
|
101 |
+
- Optimized for AI applications
|
102 |
+
- Automatic GPU allocation
|
103 |
+
- Easy deployment
|
104 |
+
- Built-in model caching
|
105 |
+
|
106 |
+
**Deployment Steps:**
|
107 |
+
|
108 |
+
1. **Create a Hugging Face account** at https://huggingface.co
|
109 |
+
|
110 |
+
2. **Create a new Space:**
|
111 |
+
- Go to https://huggingface.co/spaces
|
112 |
+
- Click "Create new Space"
|
113 |
+
- Choose "Gradio" as the SDK
|
114 |
+
- Select "CPU" or "GPU" (GPU requires verification)
|
115 |
+
|
116 |
+
3. **Upload your files:**
|
117 |
+
- Upload `app.py` (already created for you)
|
118 |
+
- Upload `requirements.txt`
|
119 |
+
- Upload `README.md`
|
120 |
+
|
121 |
+
4. **Your app will be live** at: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
|
122 |
+
|
123 |
+
### 2. Streamlit Cloud
|
124 |
+
|
125 |
+
**Pros:**
|
126 |
+
- Free tier available
|
127 |
+
- Easy deployment
|
128 |
+
- Good for data science apps
|
129 |
+
|
130 |
+
**Deployment:**
|
131 |
+
```bash
|
132 |
+
pip install streamlit
|
133 |
+
streamlit deploy
|
134 |
+
```
|
135 |
+
|
136 |
+
### 3. Railway
|
137 |
+
|
138 |
+
**Pros:**
|
139 |
+
- Free tier with $5 credit
|
140 |
+
- Easy deployment
|
141 |
+
- Good performance
|
142 |
+
|
143 |
+
**Deployment:**
|
144 |
+
```bash
|
145 |
+
pip install railway
|
146 |
+
railway login
|
147 |
+
railway init
|
148 |
+
railway deploy
|
149 |
+
```
|
150 |
+
|
151 |
+
### 4. Render
|
152 |
+
|
153 |
+
**Pros:**
|
154 |
+
- Free tier available
|
155 |
+
- Easy deployment
|
156 |
+
- Good documentation
|
157 |
+
|
158 |
+
**Deployment:**
|
159 |
+
- Connect your GitHub repository
|
160 |
+
- Choose "Web Service"
|
161 |
+
- Set build command and start command
|
162 |
+
|
163 |
+
### 5. Google Colab (For Testing)
|
164 |
+
|
165 |
+
**Pros:**
|
166 |
+
- Free GPU access
|
167 |
+
- Good for testing
|
168 |
+
- Jupyter notebook interface
|
169 |
+
|
170 |
+
**Usage:**
|
171 |
+
```python
|
172 |
+
!pip install gradio diffusers transformers
|
173 |
+
!git clone https://github.com/your-repo/text-to-video
|
174 |
+
%cd text-to-video
|
175 |
+
!python app.py
|
176 |
+
```
|
177 |
+
|
178 |
+
## Parameters Explained
|
179 |
+
|
180 |
+
- **Text Prompt**: The description of the video you want to generate
|
181 |
+
- **Model**: Choose from available Hugging Face models
|
182 |
+
- **Number of Frames**: Controls video length (more frames = longer video)
|
183 |
+
- **FPS**: Frames per second (affects playback speed)
|
184 |
+
- **Inference Steps**: Number of denoising steps (more steps = better quality but slower)
|
185 |
+
- **Guidance Scale**: How closely to follow the prompt (higher = more adherence)
|
186 |
+
- **Seed**: Random seed for reproducible results
|
187 |
+
|
188 |
+
## Performance Tips
|
189 |
+
|
190 |
+
### For Faster Generation:
|
191 |
+
- Use fewer inference steps (10-20)
|
192 |
+
- Use the DAMO model for speed
|
193 |
+
- Reduce number of frames
|
194 |
+
- Use GPU if available
|
195 |
+
|
196 |
+
### For Better Quality:
|
197 |
+
- Increase inference steps (30-50)
|
198 |
+
- Use Zeroscope or Stable Video Diffusion models
|
199 |
+
- Increase guidance scale (8-12)
|
200 |
+
- Use more frames for longer videos
|
201 |
+
|
202 |
+
### Memory Optimization:
|
203 |
+
- The application automatically enables memory optimizations on GPU
|
204 |
+
- For limited GPU memory, use fewer frames and steps
|
205 |
+
- Consider using CPU if GPU memory is insufficient
|
206 |
+
|
207 |
+
## Troubleshooting
|
208 |
+
|
209 |
+
### Common Issues
|
210 |
+
|
211 |
+
1. **CUDA Out of Memory**:
|
212 |
+
- Reduce number of frames or inference steps
|
213 |
+
- Use CPU instead of GPU
|
214 |
+
- Close other GPU-intensive applications
|
215 |
+
|
216 |
+
2. **Model Loading Errors**:
|
217 |
+
- Check internet connection
|
218 |
+
- Ensure sufficient disk space for model downloads
|
219 |
+
- Try a different model
|
220 |
+
|
221 |
+
3. **Slow Generation**:
|
222 |
+
- Use GPU if available
|
223 |
+
- Reduce inference steps
|
224 |
+
- Use the DAMO model for speed
|
225 |
+
|
226 |
+
### System Requirements
|
227 |
+
|
228 |
+
- **Minimum**: 8GB RAM, CPU-only
|
229 |
+
- **Recommended**: 16GB+ RAM, CUDA-compatible GPU with 8GB+ VRAM
|
230 |
+
- **Optimal**: 32GB+ RAM, RTX 3080/4080 or better
|
231 |
+
|
232 |
+
## Model Information
|
233 |
+
|
234 |
+
### DAMO Text-to-Video MS-1.7B
|
235 |
+
- **Best for**: Fast prototyping and quick results
|
236 |
+
- **Speed**: Very fast
|
237 |
+
- **Quality**: Good
|
238 |
+
- **Use case**: Quick demos, iterative testing
|
239 |
+
|
240 |
+
### Zeroscope v2 XL
|
241 |
+
- **Best for**: High-quality production videos
|
242 |
+
- **Speed**: Medium
|
243 |
+
- **Quality**: Excellent
|
244 |
+
- **Use case**: Final outputs, professional content
|
245 |
+
|
246 |
+
### Stable Video Diffusion XT
|
247 |
+
- **Best for**: Image-to-video generation
|
248 |
+
- **Speed**: Medium
|
249 |
+
- **Quality**: Excellent
|
250 |
+
- **Use case**: Animating static images
|
251 |
+
|
252 |
+
## Examples
|
253 |
+
|
254 |
+
Try these example prompts to get started:
|
255 |
+
|
256 |
+
- "A beautiful sunset over the ocean with waves crashing on the shore"
|
257 |
+
- "A cat playing with a ball of yarn in a cozy living room"
|
258 |
+
- "A futuristic city with flying cars and neon lights"
|
259 |
+
- "A butterfly emerging from a cocoon in a garden"
|
260 |
+
- "A rocket launching into space with fire and smoke"
|
261 |
+
- "A dancer performing ballet in a grand theater"
|
262 |
+
- "A robot walking through a snowy forest"
|
263 |
+
- "A flower blooming in time-lapse"
|
264 |
+
|
265 |
+
## Contributing
|
266 |
+
|
267 |
+
Feel free to contribute by:
|
268 |
+
- Adding new models
|
269 |
+
- Improving the interface
|
270 |
+
- Optimizing performance
|
271 |
+
- Adding new features
|
272 |
+
|
273 |
+
## License
|
274 |
+
|
275 |
+
This project uses open-source models and libraries. Please check the individual model licenses on Hugging Face for commercial usage restrictions.
|
276 |
+
|
277 |
+
## Resources
|
278 |
+
|
279 |
+
- [Hugging Face Diffusers Documentation](https://huggingface.co/docs/diffusers/index)
|
280 |
+
- [Text-to-Video Models on Hugging Face](https://huggingface.co/models?pipeline_tag=text-to-video)
|
281 |
+
- [PyTorch Documentation](https://pytorch.org/docs/)
|
282 |
+
- [Gradio Documentation](https://gradio.app/docs/)
|
app.py
ADDED
@@ -0,0 +1,651 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import gradio as gr
|
3 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
4 |
+
from diffusers.utils import export_to_video
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
from gtts import gTTS
|
9 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
|
10 |
+
import tempfile
|
11 |
+
|
12 |
+
# Set up logging
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
class TextToVideoGenerator:
|
17 |
+
def __init__(self):
|
18 |
+
self.pipeline = None
|
19 |
+
self.current_model = None
|
20 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
logger.info(f"Using device: {self.device}")
|
22 |
+
|
23 |
+
# Available models - including the advanced Wan2.1 model
|
24 |
+
self.models = {
|
25 |
+
"damo-vilab/text-to-video-ms-1.7b": {
|
26 |
+
"name": "DAMO Text-to-Video MS-1.7B",
|
27 |
+
"description": "Fast and efficient text-to-video model",
|
28 |
+
"max_frames": 16,
|
29 |
+
"fps": 8,
|
30 |
+
"quality": "Good",
|
31 |
+
"speed": "Fast"
|
32 |
+
},
|
33 |
+
"cerspense/zeroscope_v2_XL": {
|
34 |
+
"name": "Zeroscope v2 XL",
|
35 |
+
"description": "High-quality text-to-video model",
|
36 |
+
"max_frames": 24,
|
37 |
+
"fps": 6,
|
38 |
+
"quality": "Excellent",
|
39 |
+
"speed": "Medium"
|
40 |
+
},
|
41 |
+
"Wan-AI/Wan2.1-T2V-14B": {
|
42 |
+
"name": "Wan2.1-T2V-14B (SOTA)",
|
43 |
+
"description": "State-of-the-art text-to-video model with 14B parameters",
|
44 |
+
"max_frames": 32,
|
45 |
+
"fps": 8,
|
46 |
+
"quality": "SOTA",
|
47 |
+
"speed": "Medium",
|
48 |
+
"resolutions": ["480P", "720P"],
|
49 |
+
"features": ["Chinese & English text", "High motion dynamics", "Best quality"]
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
# Voice options (gTTS only supports language, not gender/age)
|
54 |
+
self.voices = {
|
55 |
+
"Default (English)": "en"
|
56 |
+
}
|
57 |
+
|
58 |
+
def generate_audio(self, text, voice_type):
|
59 |
+
"""Generate audio from text using gTTS"""
|
60 |
+
try:
|
61 |
+
lang = self.voices[voice_type]
|
62 |
+
tts = gTTS(text=text, lang=lang)
|
63 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
64 |
+
audio_path = temp_audio.name
|
65 |
+
tts.save(audio_path)
|
66 |
+
logger.info(f"Audio generated successfully: {audio_path}")
|
67 |
+
return audio_path
|
68 |
+
except Exception as e:
|
69 |
+
logger.error(f"Error generating audio: {str(e)}")
|
70 |
+
return None
|
71 |
+
|
72 |
+
def merge_audio_video(self, video_path, audio_path, output_path):
|
73 |
+
"""Merge audio and video using moviepy"""
|
74 |
+
try:
|
75 |
+
# Load video and audio
|
76 |
+
video_clip = VideoFileClip(video_path)
|
77 |
+
audio_clip = AudioFileClip(audio_path)
|
78 |
+
|
79 |
+
# Ensure audio duration matches video duration
|
80 |
+
if audio_clip.duration > video_clip.duration:
|
81 |
+
audio_clip = audio_clip.subclip(0, video_clip.duration)
|
82 |
+
elif audio_clip.duration < video_clip.duration:
|
83 |
+
# Loop audio if it's shorter than video
|
84 |
+
loops_needed = int(video_clip.duration / audio_clip.duration) + 1
|
85 |
+
audio_clip = CompositeAudioClip([audio_clip] * loops_needed).subclip(0, video_clip.duration)
|
86 |
+
|
87 |
+
# Merge audio and video
|
88 |
+
final_clip = video_clip.set_audio(audio_clip)
|
89 |
+
|
90 |
+
# Write final video with audio
|
91 |
+
final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
|
92 |
+
|
93 |
+
# Clean up
|
94 |
+
video_clip.close()
|
95 |
+
audio_clip.close()
|
96 |
+
final_clip.close()
|
97 |
+
|
98 |
+
logger.info(f"Audio and video merged successfully: {output_path}")
|
99 |
+
return output_path
|
100 |
+
|
101 |
+
except Exception as e:
|
102 |
+
logger.error(f"Error merging audio and video: {str(e)}")
|
103 |
+
return None
|
104 |
+
|
105 |
+
def load_model(self, model_id):
|
106 |
+
"""Load the specified model"""
|
107 |
+
if self.current_model == model_id and self.pipeline is not None:
|
108 |
+
return f"Model {self.models[model_id]['name']} is already loaded"
|
109 |
+
|
110 |
+
try:
|
111 |
+
logger.info(f"Loading model: {model_id}")
|
112 |
+
|
113 |
+
# Clear GPU memory if needed
|
114 |
+
if torch.cuda.is_available():
|
115 |
+
torch.cuda.empty_cache()
|
116 |
+
|
117 |
+
# Special handling for Wan2.1 model
|
118 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
119 |
+
# Wan2.1 requires specific configuration
|
120 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
121 |
+
model_id,
|
122 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
123 |
+
variant="fp16" if self.device == "cuda" else None,
|
124 |
+
use_safetensors=True
|
125 |
+
)
|
126 |
+
else:
|
127 |
+
# Standard loading for other models
|
128 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
129 |
+
model_id,
|
130 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
131 |
+
variant="fp16" if self.device == "cuda" else None
|
132 |
+
)
|
133 |
+
|
134 |
+
# Move to device
|
135 |
+
self.pipeline = self.pipeline.to(self.device)
|
136 |
+
|
137 |
+
# Optimize scheduler for faster inference
|
138 |
+
if hasattr(self.pipeline, 'scheduler'):
|
139 |
+
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
|
140 |
+
self.pipeline.scheduler.config
|
141 |
+
)
|
142 |
+
|
143 |
+
# Enable memory efficient attention if available
|
144 |
+
if self.device == "cuda":
|
145 |
+
self.pipeline.enable_model_cpu_offload()
|
146 |
+
self.pipeline.enable_vae_slicing()
|
147 |
+
|
148 |
+
self.current_model = model_id
|
149 |
+
logger.info(f"Successfully loaded model: {model_id}")
|
150 |
+
return f"Successfully loaded {self.models[model_id]['name']}"
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(f"Error loading model: {str(e)}")
|
154 |
+
return f"Error loading model: {str(e)}"
|
155 |
+
|
156 |
+
def generate_video(self, prompt, model_id, num_frames=16, fps=8, num_inference_steps=25, guidance_scale=7.5, seed=None, resolution="480P", voice_script="", voice_type="Default (English)", add_voice=True):
|
157 |
+
"""Generate video from text prompt with optional voice"""
|
158 |
+
try:
|
159 |
+
# Use prompt as voice script if voice_script is empty
|
160 |
+
if not voice_script.strip() and add_voice:
|
161 |
+
voice_script = prompt
|
162 |
+
|
163 |
+
# Load model if not already loaded
|
164 |
+
if self.current_model != model_id:
|
165 |
+
load_result = self.load_model(model_id)
|
166 |
+
if "Error" in load_result:
|
167 |
+
return None, load_result
|
168 |
+
|
169 |
+
# Set seed for reproducibility
|
170 |
+
if seed is not None:
|
171 |
+
torch.manual_seed(seed)
|
172 |
+
if torch.cuda.is_available():
|
173 |
+
torch.cuda.manual_seed(seed)
|
174 |
+
|
175 |
+
# Get model config
|
176 |
+
model_config = self.models[model_id]
|
177 |
+
num_frames = min(num_frames, model_config["max_frames"])
|
178 |
+
fps = model_config["fps"]
|
179 |
+
|
180 |
+
# Special handling for Wan2.1 model
|
181 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
182 |
+
# Wan2.1 specific parameters
|
183 |
+
if resolution == "720P":
|
184 |
+
width, height = 1280, 720
|
185 |
+
else: # 480P
|
186 |
+
width, height = 832, 480
|
187 |
+
|
188 |
+
logger.info(f"Generating Wan2.1 video with prompt: {prompt}")
|
189 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}, resolution={resolution}")
|
190 |
+
|
191 |
+
# Generate video with Wan2.1 specific settings
|
192 |
+
result = self.pipeline(
|
193 |
+
prompt,
|
194 |
+
num_inference_steps=num_inference_steps,
|
195 |
+
guidance_scale=guidance_scale,
|
196 |
+
num_frames=num_frames,
|
197 |
+
width=width,
|
198 |
+
height=height
|
199 |
+
)
|
200 |
+
video_frames = result['frames'] if isinstance(result, dict) else result.frames
|
201 |
+
else:
|
202 |
+
# Standard generation for other models
|
203 |
+
logger.info(f"Generating video with prompt: {prompt}")
|
204 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}")
|
205 |
+
|
206 |
+
result = self.pipeline(
|
207 |
+
prompt,
|
208 |
+
num_inference_steps=num_inference_steps,
|
209 |
+
guidance_scale=guidance_scale,
|
210 |
+
num_frames=num_frames
|
211 |
+
)
|
212 |
+
video_frames = result['frames'] if isinstance(result, dict) else result.frames
|
213 |
+
|
214 |
+
# Convert to numpy array
|
215 |
+
video_frames = np.array(video_frames)
|
216 |
+
|
217 |
+
# Save video
|
218 |
+
output_path = f"generated_video_{seed if seed else 'random'}.mp4"
|
219 |
+
export_to_video(video_frames, output_path, fps=fps)
|
220 |
+
|
221 |
+
logger.info(f"Video saved to: {output_path}")
|
222 |
+
|
223 |
+
# Add voice if requested
|
224 |
+
if add_voice and voice_script.strip():
|
225 |
+
logger.info(f"Generating voice for script: {voice_script}")
|
226 |
+
|
227 |
+
# Generate audio
|
228 |
+
audio_path = self.generate_audio(voice_script, voice_type)
|
229 |
+
|
230 |
+
if audio_path:
|
231 |
+
# Create final output path with voice
|
232 |
+
final_output_path = f"generated_video_with_voice_{seed if seed else 'random'}.mp4"
|
233 |
+
|
234 |
+
# Merge audio and video
|
235 |
+
final_path = self.merge_audio_video(output_path, audio_path, final_output_path)
|
236 |
+
|
237 |
+
# Clean up temporary files
|
238 |
+
try:
|
239 |
+
os.unlink(audio_path)
|
240 |
+
os.unlink(output_path)
|
241 |
+
except:
|
242 |
+
pass
|
243 |
+
|
244 |
+
if final_path:
|
245 |
+
return final_path, f"Video with voice generated successfully! Saved as {final_path}"
|
246 |
+
else:
|
247 |
+
return output_path, f"Video generated but voice merging failed. Saved as {output_path}"
|
248 |
+
else:
|
249 |
+
return output_path, f"Video generated but voice generation failed. Saved as {output_path}"
|
250 |
+
else:
|
251 |
+
return output_path, f"Video generated successfully! Saved as {output_path}"
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
logger.error(f"Error generating video: {str(e)}")
|
255 |
+
return None, f"Error generating video: {str(e)}"
|
256 |
+
|
257 |
+
def get_available_models(self):
|
258 |
+
"""Get list of available models"""
|
259 |
+
return list(self.models.keys())
|
260 |
+
|
261 |
+
def get_model_info(self, model_id):
|
262 |
+
"""Get information about a specific model"""
|
263 |
+
if model_id in self.models:
|
264 |
+
return self.models[model_id]
|
265 |
+
return None
|
266 |
+
|
267 |
+
def get_available_voices(self):
|
268 |
+
"""Get list of available voices"""
|
269 |
+
return list(self.voices.keys())
|
270 |
+
|
271 |
+
# Initialize the generator
|
272 |
+
generator = TextToVideoGenerator()
|
273 |
+
|
274 |
+
def create_interface():
|
275 |
+
"""Create Gradio interface"""
|
276 |
+
|
277 |
+
def generate_video_interface(prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice):
|
278 |
+
if not prompt.strip():
|
279 |
+
return None, "Please enter a prompt"
|
280 |
+
|
281 |
+
return generator.generate_video(
|
282 |
+
prompt=prompt,
|
283 |
+
model_id=model_id,
|
284 |
+
num_frames=num_frames,
|
285 |
+
fps=fps,
|
286 |
+
num_inference_steps=num_inference_steps,
|
287 |
+
guidance_scale=guidance_scale,
|
288 |
+
seed=seed,
|
289 |
+
resolution=resolution,
|
290 |
+
voice_script=voice_script,
|
291 |
+
voice_type=voice_type,
|
292 |
+
add_voice=add_voice
|
293 |
+
)
|
294 |
+
|
295 |
+
# Custom CSS for professional styling
|
296 |
+
custom_css = """
|
297 |
+
.gradio-container {
|
298 |
+
max-width: 1200px !important;
|
299 |
+
margin: 0 auto !important;
|
300 |
+
}
|
301 |
+
|
302 |
+
.header {
|
303 |
+
text-align: center;
|
304 |
+
padding: 2rem 0;
|
305 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
306 |
+
color: white;
|
307 |
+
border-radius: 15px;
|
308 |
+
margin-bottom: 2rem;
|
309 |
+
}
|
310 |
+
|
311 |
+
.header h1 {
|
312 |
+
font-size: 2.5rem;
|
313 |
+
font-weight: 700;
|
314 |
+
margin: 0;
|
315 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
316 |
+
}
|
317 |
+
|
318 |
+
.header p {
|
319 |
+
font-size: 1.1rem;
|
320 |
+
margin: 0.5rem 0 0 0;
|
321 |
+
opacity: 0.9;
|
322 |
+
}
|
323 |
+
|
324 |
+
.feature-card {
|
325 |
+
background: white;
|
326 |
+
border-radius: 10px;
|
327 |
+
padding: 1.5rem;
|
328 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
329 |
+
margin-bottom: 1rem;
|
330 |
+
border-left: 4px solid #667eea;
|
331 |
+
}
|
332 |
+
|
333 |
+
.feature-card h3 {
|
334 |
+
color: #333;
|
335 |
+
margin: 0 0 0.5rem 0;
|
336 |
+
font-size: 1.2rem;
|
337 |
+
}
|
338 |
+
|
339 |
+
.feature-card p {
|
340 |
+
color: #666;
|
341 |
+
margin: 0;
|
342 |
+
font-size: 0.9rem;
|
343 |
+
}
|
344 |
+
|
345 |
+
.model-info {
|
346 |
+
background: #f8f9fa;
|
347 |
+
border-radius: 8px;
|
348 |
+
padding: 1rem;
|
349 |
+
border: 1px solid #e9ecef;
|
350 |
+
}
|
351 |
+
|
352 |
+
.model-info h4 {
|
353 |
+
color: #495057;
|
354 |
+
margin: 0 0 0.5rem 0;
|
355 |
+
font-size: 1rem;
|
356 |
+
}
|
357 |
+
|
358 |
+
.model-info p {
|
359 |
+
color: #6c757d;
|
360 |
+
margin: 0.25rem 0;
|
361 |
+
font-size: 0.85rem;
|
362 |
+
}
|
363 |
+
|
364 |
+
.generate-btn {
|
365 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
366 |
+
border: none !important;
|
367 |
+
color: white !important;
|
368 |
+
font-weight: 600 !important;
|
369 |
+
padding: 1rem 2rem !important;
|
370 |
+
border-radius: 10px !important;
|
371 |
+
font-size: 1.1rem !important;
|
372 |
+
transition: all 0.3s ease !important;
|
373 |
+
}
|
374 |
+
|
375 |
+
.generate-btn:hover {
|
376 |
+
transform: translateY(-2px) !important;
|
377 |
+
box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
|
378 |
+
}
|
379 |
+
|
380 |
+
.example-card {
|
381 |
+
background: #f8f9fa;
|
382 |
+
border-radius: 8px;
|
383 |
+
padding: 1rem;
|
384 |
+
margin: 0.5rem 0;
|
385 |
+
border: 1px solid #e9ecef;
|
386 |
+
cursor: pointer;
|
387 |
+
transition: all 0.2s ease;
|
388 |
+
}
|
389 |
+
|
390 |
+
.example-card:hover {
|
391 |
+
background: #e9ecef;
|
392 |
+
transform: translateX(5px);
|
393 |
+
}
|
394 |
+
|
395 |
+
.status-box {
|
396 |
+
background: #e3f2fd;
|
397 |
+
border: 1px solid #2196f3;
|
398 |
+
border-radius: 8px;
|
399 |
+
padding: 1rem;
|
400 |
+
}
|
401 |
+
|
402 |
+
.pricing-info {
|
403 |
+
background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
|
404 |
+
border-radius: 10px;
|
405 |
+
padding: 1rem;
|
406 |
+
text-align: center;
|
407 |
+
margin: 1rem 0;
|
408 |
+
}
|
409 |
+
|
410 |
+
.pricing-info h4 {
|
411 |
+
color: #d84315;
|
412 |
+
margin: 0 0 0.5rem 0;
|
413 |
+
}
|
414 |
+
|
415 |
+
.pricing-info p {
|
416 |
+
color: #bf360c;
|
417 |
+
margin: 0;
|
418 |
+
font-size: 0.9rem;
|
419 |
+
}
|
420 |
+
"""
|
421 |
+
|
422 |
+
# Create interface
|
423 |
+
with gr.Blocks(title="AI Video Creator Pro", theme=gr.themes.Soft(), css=custom_css) as interface:
|
424 |
+
|
425 |
+
# Professional Header
|
426 |
+
with gr.Group(elem_classes="header"):
|
427 |
+
gr.Markdown("""
|
428 |
+
# 🎬 AI Video Creator Pro
|
429 |
+
### Transform Your Ideas Into Stunning Videos with AI-Powered Generation
|
430 |
+
""")
|
431 |
+
|
432 |
+
with gr.Row():
|
433 |
+
with gr.Column(scale=2):
|
434 |
+
# Main Input Section
|
435 |
+
with gr.Group(elem_classes="feature-card"):
|
436 |
+
gr.Markdown("## 🎯 Video Generation")
|
437 |
+
|
438 |
+
prompt = gr.Textbox(
|
439 |
+
label="📝 Video Description",
|
440 |
+
placeholder="Describe the video you want to create... (e.g., 'A majestic dragon soaring through a mystical forest with glowing mushrooms')",
|
441 |
+
lines=3,
|
442 |
+
max_lines=5,
|
443 |
+
container=True
|
444 |
+
)
|
445 |
+
|
446 |
+
with gr.Row():
|
447 |
+
model_id = gr.Dropdown(
|
448 |
+
choices=generator.get_available_models(),
|
449 |
+
value=generator.get_available_models()[0],
|
450 |
+
label="🤖 AI Model",
|
451 |
+
info="Choose the AI model for video generation",
|
452 |
+
container=True
|
453 |
+
)
|
454 |
+
|
455 |
+
resolution = gr.Dropdown(
|
456 |
+
choices=["480P", "720P"],
|
457 |
+
value="480P",
|
458 |
+
label="📐 Resolution (Wan2.1 only)",
|
459 |
+
info="Select video resolution",
|
460 |
+
visible=False,
|
461 |
+
container=True
|
462 |
+
)
|
463 |
+
|
464 |
+
with gr.Row():
|
465 |
+
num_frames = gr.Slider(
|
466 |
+
minimum=8,
|
467 |
+
maximum=32,
|
468 |
+
value=16,
|
469 |
+
step=1,
|
470 |
+
label="🎞️ Video Length (Frames)",
|
471 |
+
info="More frames = longer video"
|
472 |
+
)
|
473 |
+
|
474 |
+
fps = gr.Slider(
|
475 |
+
minimum=4,
|
476 |
+
maximum=12,
|
477 |
+
value=8,
|
478 |
+
step=1,
|
479 |
+
label="⚡ FPS",
|
480 |
+
info="Frames per second"
|
481 |
+
)
|
482 |
+
|
483 |
+
with gr.Row():
|
484 |
+
num_inference_steps = gr.Slider(
|
485 |
+
minimum=10,
|
486 |
+
maximum=50,
|
487 |
+
value=25,
|
488 |
+
step=1,
|
489 |
+
label="🎨 Quality Steps",
|
490 |
+
info="More steps = better quality but slower"
|
491 |
+
)
|
492 |
+
|
493 |
+
guidance_scale = gr.Slider(
|
494 |
+
minimum=1.0,
|
495 |
+
maximum=20.0,
|
496 |
+
value=7.5,
|
497 |
+
step=0.5,
|
498 |
+
label="🎯 Guidance Scale",
|
499 |
+
info="Higher values = more prompt adherence"
|
500 |
+
)
|
501 |
+
|
502 |
+
seed = gr.Number(
|
503 |
+
label="🎲 Seed (Optional)",
|
504 |
+
value=None,
|
505 |
+
info="Set for reproducible results",
|
506 |
+
container=True
|
507 |
+
)
|
508 |
+
|
509 |
+
# Voice Section
|
510 |
+
with gr.Group(elem_classes="feature-card"):
|
511 |
+
gr.Markdown("## 🎤 Voice & Audio")
|
512 |
+
|
513 |
+
with gr.Row():
|
514 |
+
add_voice = gr.Checkbox(
|
515 |
+
label="🎵 Add Voice Narration",
|
516 |
+
value=True,
|
517 |
+
info="Enable to add professional voice-over"
|
518 |
+
)
|
519 |
+
|
520 |
+
voice_type = gr.Dropdown(
|
521 |
+
choices=generator.get_available_voices(),
|
522 |
+
value="Default (English)",
|
523 |
+
label="🗣️ Voice Type",
|
524 |
+
info="Select the voice for narration",
|
525 |
+
container=True
|
526 |
+
)
|
527 |
+
|
528 |
+
voice_script = gr.Textbox(
|
529 |
+
label="📜 Narration Script (Optional)",
|
530 |
+
placeholder="Enter your narration script here... (Leave blank to use video description)",
|
531 |
+
lines=2,
|
532 |
+
max_lines=3,
|
533 |
+
info="If left blank, the video description will be used as narration",
|
534 |
+
container=True
|
535 |
+
)
|
536 |
+
|
537 |
+
# Generate Button
|
538 |
+
generate_btn = gr.Button("🚀 Generate Professional Video", variant="primary", size="lg", elem_classes="generate-btn")
|
539 |
+
|
540 |
+
# Output Section
|
541 |
+
with gr.Group(elem_classes="feature-card"):
|
542 |
+
gr.Markdown("## 📺 Generated Video")
|
543 |
+
status_text = gr.Textbox(label="📊 Status", interactive=False, elem_classes="status-box")
|
544 |
+
video_output = gr.Video(label="🎬 Your Video", elem_classes="status-box")
|
545 |
+
|
546 |
+
with gr.Column(scale=1):
|
547 |
+
# Model Information
|
548 |
+
with gr.Group(elem_classes="model-info"):
|
549 |
+
gr.Markdown("## 🤖 AI Model Details")
|
550 |
+
model_info = gr.JSON(label="Current Model Specifications", elem_classes="model-info")
|
551 |
+
|
552 |
+
# Pricing Information
|
553 |
+
with gr.Group(elem_classes="pricing-info"):
|
554 |
+
gr.Markdown("## 💰 Pricing")
|
555 |
+
gr.Markdown("""
|
556 |
+
**Free Tier:** 5 videos per day
|
557 |
+
|
558 |
+
**Pro Plan:** $9.99/month
|
559 |
+
- Unlimited videos
|
560 |
+
- Priority processing
|
561 |
+
- HD quality
|
562 |
+
- Advanced features
|
563 |
+
|
564 |
+
**Enterprise:** Contact us
|
565 |
+
""")
|
566 |
+
|
567 |
+
# Examples
|
568 |
+
with gr.Group():
|
569 |
+
gr.Markdown("## 💡 Inspiration Examples")
|
570 |
+
examples = [
|
571 |
+
["A beautiful sunset over the ocean with waves crashing on the shore"],
|
572 |
+
["A cat playing with a ball of yarn in a cozy living room"],
|
573 |
+
["A futuristic city with flying cars and neon lights"],
|
574 |
+
["A butterfly emerging from a cocoon in a garden"],
|
575 |
+
["A rocket launching into space with fire and smoke"],
|
576 |
+
["Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"],
|
577 |
+
["A majestic dragon soaring through a mystical forest with glowing mushrooms"]
|
578 |
+
]
|
579 |
+
gr.Examples(
|
580 |
+
examples=examples,
|
581 |
+
inputs=prompt,
|
582 |
+
label="Click to try these examples",
|
583 |
+
elem_classes="example-card"
|
584 |
+
)
|
585 |
+
|
586 |
+
# Features
|
587 |
+
with gr.Group():
|
588 |
+
gr.Markdown("## ✨ Features")
|
589 |
+
gr.Markdown("""
|
590 |
+
🎬 **Multiple AI Models**
|
591 |
+
- State-of-the-art video generation
|
592 |
+
- Quality vs speed options
|
593 |
+
|
594 |
+
🎤 **Professional Voice-Over**
|
595 |
+
- Multiple voice types
|
596 |
+
- Custom narration scripts
|
597 |
+
|
598 |
+
🎨 **Advanced Controls**
|
599 |
+
- Quality settings
|
600 |
+
- Resolution options
|
601 |
+
- Reproducible results
|
602 |
+
|
603 |
+
⚡ **Fast Processing**
|
604 |
+
- GPU acceleration
|
605 |
+
- Optimized pipelines
|
606 |
+
""")
|
607 |
+
|
608 |
+
# Event handlers
|
609 |
+
generate_btn.click(
|
610 |
+
fn=generate_video_interface,
|
611 |
+
inputs=[prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice],
|
612 |
+
outputs=[video_output, status_text]
|
613 |
+
)
|
614 |
+
|
615 |
+
# Update model info when model changes
|
616 |
+
def update_model_info(model_id):
|
617 |
+
info = generator.get_model_info(model_id)
|
618 |
+
return info
|
619 |
+
|
620 |
+
# Show/hide resolution selector based on model
|
621 |
+
def update_resolution_visibility(model_id):
|
622 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
623 |
+
return gr.Dropdown(visible=True)
|
624 |
+
else:
|
625 |
+
return gr.Dropdown(visible=False)
|
626 |
+
|
627 |
+
model_id.change(
|
628 |
+
fn=update_model_info,
|
629 |
+
inputs=model_id,
|
630 |
+
outputs=model_info
|
631 |
+
)
|
632 |
+
|
633 |
+
model_id.change(
|
634 |
+
fn=update_resolution_visibility,
|
635 |
+
inputs=model_id,
|
636 |
+
outputs=resolution
|
637 |
+
)
|
638 |
+
|
639 |
+
# Load initial model info
|
640 |
+
interface.load(lambda: generator.get_model_info(generator.get_available_models()[0]), outputs=model_info)
|
641 |
+
|
642 |
+
return interface
|
643 |
+
|
644 |
+
# Create and launch the interface
|
645 |
+
interface = create_interface()
|
646 |
+
interface.launch(
|
647 |
+
server_name="0.0.0.0",
|
648 |
+
server_port=7860,
|
649 |
+
share=True,
|
650 |
+
show_error=True
|
651 |
+
)
|
demo.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Demo script for text-to-video generation
|
4 |
+
This script demonstrates how to use the text-to-video generator with a simple example.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import os
|
8 |
+
import sys
|
9 |
+
from simple_generator import generate_video_from_text
|
10 |
+
|
11 |
+
def main():
|
12 |
+
print("Text-to-Video Generation Demo")
|
13 |
+
print("=" * 40)
|
14 |
+
|
15 |
+
# Demo prompt
|
16 |
+
demo_prompt = "A beautiful butterfly flying through a colorful garden with flowers"
|
17 |
+
|
18 |
+
print(f"Generating video for prompt: '{demo_prompt}'")
|
19 |
+
print("This may take a few minutes depending on your hardware...")
|
20 |
+
print()
|
21 |
+
|
22 |
+
try:
|
23 |
+
# Generate video with default settings
|
24 |
+
output_path = generate_video_from_text(
|
25 |
+
prompt=demo_prompt,
|
26 |
+
model_id="damo-vilab/text-to-video-ms-1.7b", # Fast model for demo
|
27 |
+
num_frames=16,
|
28 |
+
fps=8,
|
29 |
+
num_inference_steps=20, # Reduced for faster demo
|
30 |
+
guidance_scale=7.5,
|
31 |
+
seed=42, # Fixed seed for reproducible demo
|
32 |
+
output_path="demo_video.mp4"
|
33 |
+
)
|
34 |
+
|
35 |
+
print("=" * 40)
|
36 |
+
print("Demo completed successfully!")
|
37 |
+
print(f"Video saved as: {output_path}")
|
38 |
+
print()
|
39 |
+
print("You can now:")
|
40 |
+
print("1. Open the video file to view the result")
|
41 |
+
print("2. Run 'python text_to_video.py' for the web interface")
|
42 |
+
print("3. Try different prompts with 'python simple_generator.py'")
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
print(f"Error during demo: {str(e)}")
|
46 |
+
print()
|
47 |
+
print("Troubleshooting tips:")
|
48 |
+
print("- Make sure all dependencies are installed: pip install -r requirements.txt")
|
49 |
+
print("- Check if you have sufficient disk space")
|
50 |
+
print("- Ensure you have a stable internet connection for model download")
|
51 |
+
print("- Try running with CPU if GPU memory is insufficient")
|
52 |
+
return 1
|
53 |
+
|
54 |
+
return 0
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
exit(main())
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.2.2
|
2 |
+
torchvision==0.17.2
|
3 |
+
diffusers==0.27.2
|
4 |
+
transformers==4.39.3
|
5 |
+
accelerate==0.28.0
|
6 |
+
safetensors==0.4.2
|
7 |
+
opencv-python==4.9.0.80
|
8 |
+
pillow==10.3.0
|
9 |
+
numpy==1.24.4
|
10 |
+
gradio==4.25.0
|
11 |
+
huggingface-hub==0.23.0
|
12 |
+
xformers==0.0.25
|
13 |
+
imageio==2.34.0
|
14 |
+
imageio-ffmpeg==0.4.9
|
15 |
+
gTTS==2.5.1
|
16 |
+
moviepy==1.0.3
|
simple_generator.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
3 |
+
from diffusers.utils import export_to_video
|
4 |
+
import numpy as np
|
5 |
+
import argparse
|
6 |
+
import logging
|
7 |
+
|
8 |
+
# Set up logging
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
def generate_video_from_text(
|
13 |
+
prompt,
|
14 |
+
model_id="damo-vilab/text-to-video-ms-1.7b",
|
15 |
+
num_frames=16,
|
16 |
+
fps=8,
|
17 |
+
num_inference_steps=25,
|
18 |
+
guidance_scale=7.5,
|
19 |
+
seed=None,
|
20 |
+
output_path="generated_video.mp4"
|
21 |
+
):
|
22 |
+
"""
|
23 |
+
Generate a video from text prompt using Hugging Face models
|
24 |
+
|
25 |
+
Args:
|
26 |
+
prompt (str): Text description of the video
|
27 |
+
model_id (str): Hugging Face model ID
|
28 |
+
num_frames (int): Number of frames to generate
|
29 |
+
fps (int): Frames per second
|
30 |
+
num_inference_steps (int): Number of denoising steps
|
31 |
+
guidance_scale (float): Guidance scale for generation
|
32 |
+
seed (int): Random seed for reproducibility
|
33 |
+
output_path (str): Output video file path
|
34 |
+
"""
|
35 |
+
|
36 |
+
# Check device
|
37 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
38 |
+
logger.info(f"Using device: {device}")
|
39 |
+
|
40 |
+
try:
|
41 |
+
# Set seed for reproducibility
|
42 |
+
if seed is not None:
|
43 |
+
torch.manual_seed(seed)
|
44 |
+
if torch.cuda.is_available():
|
45 |
+
torch.cuda.manual_seed(seed)
|
46 |
+
|
47 |
+
logger.info(f"Loading model: {model_id}")
|
48 |
+
|
49 |
+
# Load pipeline
|
50 |
+
pipeline = DiffusionPipeline.from_pretrained(
|
51 |
+
model_id,
|
52 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
53 |
+
variant="fp16" if device == "cuda" else None
|
54 |
+
)
|
55 |
+
|
56 |
+
# Move to device
|
57 |
+
pipeline = pipeline.to(device)
|
58 |
+
|
59 |
+
# Optimize scheduler for faster inference
|
60 |
+
if hasattr(pipeline, 'scheduler'):
|
61 |
+
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
|
62 |
+
pipeline.scheduler.config
|
63 |
+
)
|
64 |
+
|
65 |
+
# Enable memory efficient attention if available
|
66 |
+
if device == "cuda":
|
67 |
+
pipeline.enable_model_cpu_offload()
|
68 |
+
pipeline.enable_vae_slicing()
|
69 |
+
|
70 |
+
logger.info(f"Generating video with prompt: {prompt}")
|
71 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}")
|
72 |
+
|
73 |
+
# Generate video
|
74 |
+
video_frames = pipeline(
|
75 |
+
prompt,
|
76 |
+
num_inference_steps=num_inference_steps,
|
77 |
+
guidance_scale=guidance_scale,
|
78 |
+
num_frames=num_frames
|
79 |
+
).frames
|
80 |
+
|
81 |
+
# Convert to numpy array
|
82 |
+
video_frames = np.array(video_frames)
|
83 |
+
|
84 |
+
# Save video
|
85 |
+
export_to_video(video_frames, output_path, fps=fps)
|
86 |
+
|
87 |
+
logger.info(f"Video saved to: {output_path}")
|
88 |
+
return output_path
|
89 |
+
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Error generating video: {str(e)}")
|
92 |
+
raise
|
93 |
+
|
94 |
+
def main():
|
95 |
+
parser = argparse.ArgumentParser(description="Generate video from text using Hugging Face models")
|
96 |
+
parser.add_argument("prompt", help="Text description of the video to generate")
|
97 |
+
parser.add_argument("--model", default="damo-vilab/text-to-video-ms-1.7b",
|
98 |
+
help="Hugging Face model ID to use")
|
99 |
+
parser.add_argument("--frames", type=int, default=16,
|
100 |
+
help="Number of frames to generate (default: 16)")
|
101 |
+
parser.add_argument("--fps", type=int, default=8,
|
102 |
+
help="Frames per second (default: 8)")
|
103 |
+
parser.add_argument("--steps", type=int, default=25,
|
104 |
+
help="Number of inference steps (default: 25)")
|
105 |
+
parser.add_argument("--guidance", type=float, default=7.5,
|
106 |
+
help="Guidance scale (default: 7.5)")
|
107 |
+
parser.add_argument("--seed", type=int, default=None,
|
108 |
+
help="Random seed for reproducibility")
|
109 |
+
parser.add_argument("--output", default="generated_video.mp4",
|
110 |
+
help="Output video file path (default: generated_video.mp4)")
|
111 |
+
|
112 |
+
args = parser.parse_args()
|
113 |
+
|
114 |
+
try:
|
115 |
+
output_path = generate_video_from_text(
|
116 |
+
prompt=args.prompt,
|
117 |
+
model_id=args.model,
|
118 |
+
num_frames=args.frames,
|
119 |
+
fps=args.fps,
|
120 |
+
num_inference_steps=args.steps,
|
121 |
+
guidance_scale=args.guidance,
|
122 |
+
seed=args.seed,
|
123 |
+
output_path=args.output
|
124 |
+
)
|
125 |
+
print(f"Video generated successfully: {output_path}")
|
126 |
+
|
127 |
+
except Exception as e:
|
128 |
+
print(f"Error: {str(e)}")
|
129 |
+
return 1
|
130 |
+
|
131 |
+
return 0
|
132 |
+
|
133 |
+
if __name__ == "__main__":
|
134 |
+
exit(main())
|
test_app.py
ADDED
@@ -0,0 +1,479 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import logging
|
3 |
+
import tempfile
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Set up logging
|
7 |
+
logging.basicConfig(level=logging.INFO)
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
class TextToVideoGenerator:
|
11 |
+
def __init__(self):
|
12 |
+
self.device = "cpu" # Simplified for testing
|
13 |
+
|
14 |
+
# Available models - including the advanced Wan2.1 model
|
15 |
+
self.models = {
|
16 |
+
"damo-vilab/text-to-video-ms-1.7b": {
|
17 |
+
"name": "DAMO Text-to-Video MS-1.7B",
|
18 |
+
"description": "Fast and efficient text-to-video model",
|
19 |
+
"max_frames": 16,
|
20 |
+
"fps": 8,
|
21 |
+
"quality": "Good",
|
22 |
+
"speed": "Fast"
|
23 |
+
},
|
24 |
+
"cerspense/zeroscope_v2_XL": {
|
25 |
+
"name": "Zeroscope v2 XL",
|
26 |
+
"description": "High-quality text-to-video model",
|
27 |
+
"max_frames": 24,
|
28 |
+
"fps": 6,
|
29 |
+
"quality": "Excellent",
|
30 |
+
"speed": "Medium"
|
31 |
+
},
|
32 |
+
"Wan-AI/Wan2.1-T2V-14B": {
|
33 |
+
"name": "Wan2.1-T2V-14B (SOTA)",
|
34 |
+
"description": "State-of-the-art text-to-video model with 14B parameters",
|
35 |
+
"max_frames": 32,
|
36 |
+
"fps": 8,
|
37 |
+
"quality": "SOTA",
|
38 |
+
"speed": "Medium",
|
39 |
+
"resolutions": ["480P", "720P"],
|
40 |
+
"features": ["Chinese & English text", "High motion dynamics", "Best quality"]
|
41 |
+
}
|
42 |
+
}
|
43 |
+
|
44 |
+
# Voice options (gTTS only supports language, not gender/age)
|
45 |
+
self.voices = {
|
46 |
+
"Default (English)": "en"
|
47 |
+
}
|
48 |
+
|
49 |
+
def generate_video(self, prompt, model_id, num_frames=16, fps=8, num_inference_steps=25, guidance_scale=7.5, seed=None, resolution="480P", voice_script="", voice_type="Default (English)", add_voice=True):
|
50 |
+
"""Generate video from text prompt with optional voice (DEMO VERSION)"""
|
51 |
+
try:
|
52 |
+
# This is a demo version that simulates video generation
|
53 |
+
logger.info(f"DEMO: Would generate video with prompt: {prompt}")
|
54 |
+
logger.info(f"DEMO: Model: {model_id}, Frames: {num_frames}, FPS: {fps}")
|
55 |
+
|
56 |
+
if add_voice and voice_script.strip():
|
57 |
+
logger.info(f"DEMO: Would add voice narration: {voice_script}")
|
58 |
+
|
59 |
+
# Create a dummy video file for demonstration
|
60 |
+
dummy_video_path = "demo_video.mp4"
|
61 |
+
|
62 |
+
# For demo purposes, return a success message
|
63 |
+
return dummy_video_path, f"DEMO: Video generation completed! (This is a test version - no actual video generated)"
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
logger.error(f"Error in demo video generation: {str(e)}")
|
67 |
+
return None, f"Demo error: {str(e)}"
|
68 |
+
|
69 |
+
def get_available_models(self):
|
70 |
+
"""Get list of available models"""
|
71 |
+
return list(self.models.keys())
|
72 |
+
|
73 |
+
def get_model_info(self, model_id):
|
74 |
+
"""Get information about a specific model"""
|
75 |
+
if model_id in self.models:
|
76 |
+
return self.models[model_id]
|
77 |
+
return None
|
78 |
+
|
79 |
+
def get_available_voices(self):
|
80 |
+
"""Get list of available voices"""
|
81 |
+
return list(self.voices.keys())
|
82 |
+
|
83 |
+
# Initialize the generator
|
84 |
+
generator = TextToVideoGenerator()
|
85 |
+
|
86 |
+
def create_interface():
|
87 |
+
"""Create Gradio interface"""
|
88 |
+
|
89 |
+
def generate_video_interface(prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice):
|
90 |
+
if not prompt.strip():
|
91 |
+
return None, "Please enter a prompt"
|
92 |
+
|
93 |
+
return generator.generate_video(
|
94 |
+
prompt=prompt,
|
95 |
+
model_id=model_id,
|
96 |
+
num_frames=num_frames,
|
97 |
+
fps=fps,
|
98 |
+
num_inference_steps=num_inference_steps,
|
99 |
+
guidance_scale=guidance_scale,
|
100 |
+
seed=seed,
|
101 |
+
resolution=resolution,
|
102 |
+
voice_script=voice_script,
|
103 |
+
voice_type=voice_type,
|
104 |
+
add_voice=add_voice
|
105 |
+
)
|
106 |
+
|
107 |
+
# Custom CSS for professional styling
|
108 |
+
custom_css = """
|
109 |
+
.gradio-container {
|
110 |
+
max-width: 1200px !important;
|
111 |
+
margin: 0 auto !important;
|
112 |
+
}
|
113 |
+
|
114 |
+
.header {
|
115 |
+
text-align: center;
|
116 |
+
padding: 2rem 0;
|
117 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
118 |
+
color: white;
|
119 |
+
border-radius: 15px;
|
120 |
+
margin-bottom: 2rem;
|
121 |
+
}
|
122 |
+
|
123 |
+
.header h1 {
|
124 |
+
font-size: 2.5rem;
|
125 |
+
font-weight: 700;
|
126 |
+
margin: 0;
|
127 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
128 |
+
}
|
129 |
+
|
130 |
+
.header p {
|
131 |
+
font-size: 1.1rem;
|
132 |
+
margin: 0.5rem 0 0 0;
|
133 |
+
opacity: 0.9;
|
134 |
+
}
|
135 |
+
|
136 |
+
.feature-card {
|
137 |
+
background: white;
|
138 |
+
border-radius: 10px;
|
139 |
+
padding: 1.5rem;
|
140 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
141 |
+
margin-bottom: 1rem;
|
142 |
+
border-left: 4px solid #667eea;
|
143 |
+
}
|
144 |
+
|
145 |
+
.feature-card h3 {
|
146 |
+
color: #333;
|
147 |
+
margin: 0 0 0.5rem 0;
|
148 |
+
font-size: 1.2rem;
|
149 |
+
}
|
150 |
+
|
151 |
+
.feature-card p {
|
152 |
+
color: #666;
|
153 |
+
margin: 0;
|
154 |
+
font-size: 0.9rem;
|
155 |
+
}
|
156 |
+
|
157 |
+
.model-info {
|
158 |
+
background: #f8f9fa;
|
159 |
+
border-radius: 8px;
|
160 |
+
padding: 1rem;
|
161 |
+
border: 1px solid #e9ecef;
|
162 |
+
}
|
163 |
+
|
164 |
+
.model-info h4 {
|
165 |
+
color: #495057;
|
166 |
+
margin: 0 0 0.5rem 0;
|
167 |
+
font-size: 1rem;
|
168 |
+
}
|
169 |
+
|
170 |
+
.model-info p {
|
171 |
+
color: #6c757d;
|
172 |
+
margin: 0.25rem 0;
|
173 |
+
font-size: 0.85rem;
|
174 |
+
}
|
175 |
+
|
176 |
+
.generate-btn {
|
177 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
178 |
+
border: none !important;
|
179 |
+
color: white !important;
|
180 |
+
font-weight: 600 !important;
|
181 |
+
padding: 1rem 2rem !important;
|
182 |
+
border-radius: 10px !important;
|
183 |
+
font-size: 1.1rem !important;
|
184 |
+
transition: all 0.3s ease !important;
|
185 |
+
}
|
186 |
+
|
187 |
+
.generate-btn:hover {
|
188 |
+
transform: translateY(-2px) !important;
|
189 |
+
box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
|
190 |
+
}
|
191 |
+
|
192 |
+
.example-card {
|
193 |
+
background: #f8f9fa;
|
194 |
+
border-radius: 8px;
|
195 |
+
padding: 1rem;
|
196 |
+
margin: 0.5rem 0;
|
197 |
+
border: 1px solid #e9ecef;
|
198 |
+
cursor: pointer;
|
199 |
+
transition: all 0.2s ease;
|
200 |
+
}
|
201 |
+
|
202 |
+
.example-card:hover {
|
203 |
+
background: #e9ecef;
|
204 |
+
transform: translateX(5px);
|
205 |
+
}
|
206 |
+
|
207 |
+
.status-box {
|
208 |
+
background: #e3f2fd;
|
209 |
+
border: 1px solid #2196f3;
|
210 |
+
border-radius: 8px;
|
211 |
+
padding: 1rem;
|
212 |
+
}
|
213 |
+
|
214 |
+
.pricing-info {
|
215 |
+
background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
|
216 |
+
border-radius: 10px;
|
217 |
+
padding: 1rem;
|
218 |
+
text-align: center;
|
219 |
+
margin: 1rem 0;
|
220 |
+
}
|
221 |
+
|
222 |
+
.pricing-info h4 {
|
223 |
+
color: #d84315;
|
224 |
+
margin: 0 0 0.5rem 0;
|
225 |
+
}
|
226 |
+
|
227 |
+
.pricing-info p {
|
228 |
+
color: #bf360c;
|
229 |
+
margin: 0;
|
230 |
+
font-size: 0.9rem;
|
231 |
+
}
|
232 |
+
|
233 |
+
.demo-notice {
|
234 |
+
background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);
|
235 |
+
border: 1px solid #ffc107;
|
236 |
+
border-radius: 8px;
|
237 |
+
padding: 1rem;
|
238 |
+
margin: 1rem 0;
|
239 |
+
text-align: center;
|
240 |
+
}
|
241 |
+
"""
|
242 |
+
|
243 |
+
# Create interface
|
244 |
+
with gr.Blocks(title="AI Video Creator Pro - DEMO", theme=gr.themes.Soft(), css=custom_css) as interface:
|
245 |
+
|
246 |
+
# Professional Header
|
247 |
+
with gr.Group(elem_classes="header"):
|
248 |
+
gr.Markdown("""
|
249 |
+
# 🎬 AI Video Creator Pro
|
250 |
+
### Transform Your Ideas Into Stunning Videos with AI-Powered Generation
|
251 |
+
""")
|
252 |
+
|
253 |
+
# Demo Notice
|
254 |
+
with gr.Group(elem_classes="demo-notice"):
|
255 |
+
gr.Markdown("""
|
256 |
+
## 🚧 DEMO VERSION
|
257 |
+
This is a demonstration of the professional UI. Video generation is simulated for testing purposes.
|
258 |
+
The full version with actual AI video generation will be available once dependencies are resolved.
|
259 |
+
""")
|
260 |
+
|
261 |
+
with gr.Row():
|
262 |
+
with gr.Column(scale=2):
|
263 |
+
# Main Input Section
|
264 |
+
with gr.Group(elem_classes="feature-card"):
|
265 |
+
gr.Markdown("## 🎯 Video Generation")
|
266 |
+
|
267 |
+
prompt = gr.Textbox(
|
268 |
+
label="📝 Video Description",
|
269 |
+
placeholder="Describe the video you want to create... (e.g., 'A majestic dragon soaring through a mystical forest with glowing mushrooms')",
|
270 |
+
lines=3,
|
271 |
+
max_lines=5,
|
272 |
+
container=True
|
273 |
+
)
|
274 |
+
|
275 |
+
with gr.Row():
|
276 |
+
model_id = gr.Dropdown(
|
277 |
+
choices=generator.get_available_models(),
|
278 |
+
value=generator.get_available_models()[0],
|
279 |
+
label="🤖 AI Model",
|
280 |
+
info="Choose the AI model for video generation",
|
281 |
+
container=True
|
282 |
+
)
|
283 |
+
|
284 |
+
resolution = gr.Dropdown(
|
285 |
+
choices=["480P", "720P"],
|
286 |
+
value="480P",
|
287 |
+
label="📐 Resolution (Wan2.1 only)",
|
288 |
+
info="Select video resolution",
|
289 |
+
visible=False,
|
290 |
+
container=True
|
291 |
+
)
|
292 |
+
|
293 |
+
with gr.Row():
|
294 |
+
num_frames = gr.Slider(
|
295 |
+
minimum=8,
|
296 |
+
maximum=32,
|
297 |
+
value=16,
|
298 |
+
step=1,
|
299 |
+
label="🎞️ Video Length (Frames)",
|
300 |
+
info="More frames = longer video"
|
301 |
+
)
|
302 |
+
|
303 |
+
fps = gr.Slider(
|
304 |
+
minimum=4,
|
305 |
+
maximum=12,
|
306 |
+
value=8,
|
307 |
+
step=1,
|
308 |
+
label="⚡ FPS",
|
309 |
+
info="Frames per second"
|
310 |
+
)
|
311 |
+
|
312 |
+
with gr.Row():
|
313 |
+
num_inference_steps = gr.Slider(
|
314 |
+
minimum=10,
|
315 |
+
maximum=50,
|
316 |
+
value=25,
|
317 |
+
step=1,
|
318 |
+
label="🎨 Quality Steps",
|
319 |
+
info="More steps = better quality but slower"
|
320 |
+
)
|
321 |
+
|
322 |
+
guidance_scale = gr.Slider(
|
323 |
+
minimum=1.0,
|
324 |
+
maximum=20.0,
|
325 |
+
value=7.5,
|
326 |
+
step=0.5,
|
327 |
+
label="🎯 Guidance Scale",
|
328 |
+
info="Higher values = more prompt adherence"
|
329 |
+
)
|
330 |
+
|
331 |
+
seed = gr.Number(
|
332 |
+
label="🎲 Seed (Optional)",
|
333 |
+
value=None,
|
334 |
+
info="Set for reproducible results",
|
335 |
+
container=True
|
336 |
+
)
|
337 |
+
|
338 |
+
# Voice Section
|
339 |
+
with gr.Group(elem_classes="feature-card"):
|
340 |
+
gr.Markdown("## 🎤 Voice & Audio")
|
341 |
+
|
342 |
+
with gr.Row():
|
343 |
+
add_voice = gr.Checkbox(
|
344 |
+
label="🎵 Add Voice Narration",
|
345 |
+
value=True,
|
346 |
+
info="Enable to add professional voice-over"
|
347 |
+
)
|
348 |
+
|
349 |
+
voice_type = gr.Dropdown(
|
350 |
+
choices=generator.get_available_voices(),
|
351 |
+
value="Default (English)",
|
352 |
+
label="🗣️ Voice Type",
|
353 |
+
info="Select the voice for narration",
|
354 |
+
container=True
|
355 |
+
)
|
356 |
+
|
357 |
+
voice_script = gr.Textbox(
|
358 |
+
label="📜 Narration Script (Optional)",
|
359 |
+
placeholder="Enter your narration script here... (Leave blank to use video description)",
|
360 |
+
lines=2,
|
361 |
+
max_lines=3,
|
362 |
+
info="If left blank, the video description will be used as narration",
|
363 |
+
container=True
|
364 |
+
)
|
365 |
+
|
366 |
+
# Generate Button
|
367 |
+
generate_btn = gr.Button("🚀 Generate Professional Video (DEMO)", variant="primary", size="lg", elem_classes="generate-btn")
|
368 |
+
|
369 |
+
# Output Section
|
370 |
+
with gr.Group(elem_classes="feature-card"):
|
371 |
+
gr.Markdown("## 📺 Generated Video")
|
372 |
+
status_text = gr.Textbox(label="📊 Status", interactive=False, elem_classes="status-box")
|
373 |
+
video_output = gr.Video(label="🎬 Your Video", elem_classes="status-box")
|
374 |
+
|
375 |
+
with gr.Column(scale=1):
|
376 |
+
# Model Information
|
377 |
+
with gr.Group(elem_classes="model-info"):
|
378 |
+
gr.Markdown("## 🤖 AI Model Details")
|
379 |
+
model_info = gr.JSON(label="Current Model Specifications", elem_classes="model-info")
|
380 |
+
|
381 |
+
# Pricing Information
|
382 |
+
with gr.Group(elem_classes="pricing-info"):
|
383 |
+
gr.Markdown("## 💰 Pricing")
|
384 |
+
gr.Markdown("""
|
385 |
+
**Free Tier:** 5 videos per day
|
386 |
+
|
387 |
+
**Pro Plan:** $9.99/month
|
388 |
+
- Unlimited videos
|
389 |
+
- Priority processing
|
390 |
+
- HD quality
|
391 |
+
- Advanced features
|
392 |
+
|
393 |
+
**Enterprise:** Contact us
|
394 |
+
""")
|
395 |
+
|
396 |
+
# Examples
|
397 |
+
with gr.Group():
|
398 |
+
gr.Markdown("## 💡 Inspiration Examples")
|
399 |
+
examples = [
|
400 |
+
["A beautiful sunset over the ocean with waves crashing on the shore"],
|
401 |
+
["A cat playing with a ball of yarn in a cozy living room"],
|
402 |
+
["A futuristic city with flying cars and neon lights"],
|
403 |
+
["A butterfly emerging from a cocoon in a garden"],
|
404 |
+
["A rocket launching into space with fire and smoke"],
|
405 |
+
["Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"],
|
406 |
+
["A majestic dragon soaring through a mystical forest with glowing mushrooms"]
|
407 |
+
]
|
408 |
+
gr.Examples(
|
409 |
+
examples=examples,
|
410 |
+
inputs=prompt,
|
411 |
+
label="Click to try these examples"
|
412 |
+
)
|
413 |
+
|
414 |
+
# Features
|
415 |
+
with gr.Group():
|
416 |
+
gr.Markdown("## ✨ Features")
|
417 |
+
gr.Markdown("""
|
418 |
+
🎬 **Multiple AI Models**
|
419 |
+
- State-of-the-art video generation
|
420 |
+
- Quality vs speed options
|
421 |
+
|
422 |
+
🎤 **Professional Voice-Over**
|
423 |
+
- Multiple voice types
|
424 |
+
- Custom narration scripts
|
425 |
+
|
426 |
+
🎨 **Advanced Controls**
|
427 |
+
- Quality settings
|
428 |
+
- Resolution options
|
429 |
+
- Reproducible results
|
430 |
+
|
431 |
+
⚡ **Fast Processing**
|
432 |
+
- GPU acceleration
|
433 |
+
- Optimized pipelines
|
434 |
+
""")
|
435 |
+
|
436 |
+
# Event handlers
|
437 |
+
generate_btn.click(
|
438 |
+
fn=generate_video_interface,
|
439 |
+
inputs=[prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice],
|
440 |
+
outputs=[video_output, status_text]
|
441 |
+
)
|
442 |
+
|
443 |
+
# Update model info when model changes
|
444 |
+
def update_model_info(model_id):
|
445 |
+
info = generator.get_model_info(model_id)
|
446 |
+
return info
|
447 |
+
|
448 |
+
# Show/hide resolution selector based on model
|
449 |
+
def update_resolution_visibility(model_id):
|
450 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
451 |
+
return gr.Dropdown(visible=True)
|
452 |
+
else:
|
453 |
+
return gr.Dropdown(visible=False)
|
454 |
+
|
455 |
+
model_id.change(
|
456 |
+
fn=update_model_info,
|
457 |
+
inputs=model_id,
|
458 |
+
outputs=model_info
|
459 |
+
)
|
460 |
+
|
461 |
+
model_id.change(
|
462 |
+
fn=update_resolution_visibility,
|
463 |
+
inputs=model_id,
|
464 |
+
outputs=resolution
|
465 |
+
)
|
466 |
+
|
467 |
+
# Load initial model info
|
468 |
+
interface.load(lambda: generator.get_model_info(generator.get_available_models()[0]), outputs=model_info)
|
469 |
+
|
470 |
+
return interface
|
471 |
+
|
472 |
+
# Create and launch the interface
|
473 |
+
interface = create_interface()
|
474 |
+
interface.launch(
|
475 |
+
server_name="0.0.0.0",
|
476 |
+
server_port=7861,
|
477 |
+
share=True,
|
478 |
+
show_error=True
|
479 |
+
)
|
text-to-video-generator/.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
text-to-video-generator/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Text To Video Generator
|
3 |
+
emoji: 🔥
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: gray
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.37.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
text-to-video-generator/app.py
ADDED
@@ -0,0 +1,651 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import gradio as gr
|
3 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
4 |
+
from diffusers.utils import export_to_video
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
from gtts import gTTS
|
9 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
|
10 |
+
import tempfile
|
11 |
+
|
12 |
+
# Set up logging
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
class TextToVideoGenerator:
|
17 |
+
def __init__(self):
|
18 |
+
self.pipeline = None
|
19 |
+
self.current_model = None
|
20 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
logger.info(f"Using device: {self.device}")
|
22 |
+
|
23 |
+
# Available models - including the advanced Wan2.1 model
|
24 |
+
self.models = {
|
25 |
+
"damo-vilab/text-to-video-ms-1.7b": {
|
26 |
+
"name": "DAMO Text-to-Video MS-1.7B",
|
27 |
+
"description": "Fast and efficient text-to-video model",
|
28 |
+
"max_frames": 16,
|
29 |
+
"fps": 8,
|
30 |
+
"quality": "Good",
|
31 |
+
"speed": "Fast"
|
32 |
+
},
|
33 |
+
"cerspense/zeroscope_v2_XL": {
|
34 |
+
"name": "Zeroscope v2 XL",
|
35 |
+
"description": "High-quality text-to-video model",
|
36 |
+
"max_frames": 24,
|
37 |
+
"fps": 6,
|
38 |
+
"quality": "Excellent",
|
39 |
+
"speed": "Medium"
|
40 |
+
},
|
41 |
+
"Wan-AI/Wan2.1-T2V-14B": {
|
42 |
+
"name": "Wan2.1-T2V-14B (SOTA)",
|
43 |
+
"description": "State-of-the-art text-to-video model with 14B parameters",
|
44 |
+
"max_frames": 32,
|
45 |
+
"fps": 8,
|
46 |
+
"quality": "SOTA",
|
47 |
+
"speed": "Medium",
|
48 |
+
"resolutions": ["480P", "720P"],
|
49 |
+
"features": ["Chinese & English text", "High motion dynamics", "Best quality"]
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
# Voice options (gTTS only supports language, not gender/age)
|
54 |
+
self.voices = {
|
55 |
+
"Default (English)": "en"
|
56 |
+
}
|
57 |
+
|
58 |
+
def generate_audio(self, text, voice_type):
|
59 |
+
"""Generate audio from text using gTTS"""
|
60 |
+
try:
|
61 |
+
lang = self.voices[voice_type]
|
62 |
+
tts = gTTS(text=text, lang=lang)
|
63 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
64 |
+
audio_path = temp_audio.name
|
65 |
+
tts.save(audio_path)
|
66 |
+
logger.info(f"Audio generated successfully: {audio_path}")
|
67 |
+
return audio_path
|
68 |
+
except Exception as e:
|
69 |
+
logger.error(f"Error generating audio: {str(e)}")
|
70 |
+
return None
|
71 |
+
|
72 |
+
def merge_audio_video(self, video_path, audio_path, output_path):
|
73 |
+
"""Merge audio and video using moviepy"""
|
74 |
+
try:
|
75 |
+
# Load video and audio
|
76 |
+
video_clip = VideoFileClip(video_path)
|
77 |
+
audio_clip = AudioFileClip(audio_path)
|
78 |
+
|
79 |
+
# Ensure audio duration matches video duration
|
80 |
+
if audio_clip.duration > video_clip.duration:
|
81 |
+
audio_clip = audio_clip.subclip(0, video_clip.duration)
|
82 |
+
elif audio_clip.duration < video_clip.duration:
|
83 |
+
# Loop audio if it's shorter than video
|
84 |
+
loops_needed = int(video_clip.duration / audio_clip.duration) + 1
|
85 |
+
audio_clip = CompositeAudioClip([audio_clip] * loops_needed).subclip(0, video_clip.duration)
|
86 |
+
|
87 |
+
# Merge audio and video
|
88 |
+
final_clip = video_clip.set_audio(audio_clip)
|
89 |
+
|
90 |
+
# Write final video with audio
|
91 |
+
final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
|
92 |
+
|
93 |
+
# Clean up
|
94 |
+
video_clip.close()
|
95 |
+
audio_clip.close()
|
96 |
+
final_clip.close()
|
97 |
+
|
98 |
+
logger.info(f"Audio and video merged successfully: {output_path}")
|
99 |
+
return output_path
|
100 |
+
|
101 |
+
except Exception as e:
|
102 |
+
logger.error(f"Error merging audio and video: {str(e)}")
|
103 |
+
return None
|
104 |
+
|
105 |
+
def load_model(self, model_id):
|
106 |
+
"""Load the specified model"""
|
107 |
+
if self.current_model == model_id and self.pipeline is not None:
|
108 |
+
return f"Model {self.models[model_id]['name']} is already loaded"
|
109 |
+
|
110 |
+
try:
|
111 |
+
logger.info(f"Loading model: {model_id}")
|
112 |
+
|
113 |
+
# Clear GPU memory if needed
|
114 |
+
if torch.cuda.is_available():
|
115 |
+
torch.cuda.empty_cache()
|
116 |
+
|
117 |
+
# Special handling for Wan2.1 model
|
118 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
119 |
+
# Wan2.1 requires specific configuration
|
120 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
121 |
+
model_id,
|
122 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
123 |
+
variant="fp16" if self.device == "cuda" else None,
|
124 |
+
use_safetensors=True
|
125 |
+
)
|
126 |
+
else:
|
127 |
+
# Standard loading for other models
|
128 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
129 |
+
model_id,
|
130 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
131 |
+
variant="fp16" if self.device == "cuda" else None
|
132 |
+
)
|
133 |
+
|
134 |
+
# Move to device
|
135 |
+
self.pipeline = self.pipeline.to(self.device)
|
136 |
+
|
137 |
+
# Optimize scheduler for faster inference
|
138 |
+
if hasattr(self.pipeline, 'scheduler'):
|
139 |
+
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
|
140 |
+
self.pipeline.scheduler.config
|
141 |
+
)
|
142 |
+
|
143 |
+
# Enable memory efficient attention if available
|
144 |
+
if self.device == "cuda":
|
145 |
+
self.pipeline.enable_model_cpu_offload()
|
146 |
+
self.pipeline.enable_vae_slicing()
|
147 |
+
|
148 |
+
self.current_model = model_id
|
149 |
+
logger.info(f"Successfully loaded model: {model_id}")
|
150 |
+
return f"Successfully loaded {self.models[model_id]['name']}"
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(f"Error loading model: {str(e)}")
|
154 |
+
return f"Error loading model: {str(e)}"
|
155 |
+
|
156 |
+
def generate_video(self, prompt, model_id, num_frames=16, fps=8, num_inference_steps=25, guidance_scale=7.5, seed=None, resolution="480P", voice_script="", voice_type="Default (English)", add_voice=True):
|
157 |
+
"""Generate video from text prompt with optional voice"""
|
158 |
+
try:
|
159 |
+
# Use prompt as voice script if voice_script is empty
|
160 |
+
if not voice_script.strip() and add_voice:
|
161 |
+
voice_script = prompt
|
162 |
+
|
163 |
+
# Load model if not already loaded
|
164 |
+
if self.current_model != model_id:
|
165 |
+
load_result = self.load_model(model_id)
|
166 |
+
if "Error" in load_result:
|
167 |
+
return None, load_result
|
168 |
+
|
169 |
+
# Set seed for reproducibility
|
170 |
+
if seed is not None:
|
171 |
+
torch.manual_seed(seed)
|
172 |
+
if torch.cuda.is_available():
|
173 |
+
torch.cuda.manual_seed(seed)
|
174 |
+
|
175 |
+
# Get model config
|
176 |
+
model_config = self.models[model_id]
|
177 |
+
num_frames = min(num_frames, model_config["max_frames"])
|
178 |
+
fps = model_config["fps"]
|
179 |
+
|
180 |
+
# Special handling for Wan2.1 model
|
181 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
182 |
+
# Wan2.1 specific parameters
|
183 |
+
if resolution == "720P":
|
184 |
+
width, height = 1280, 720
|
185 |
+
else: # 480P
|
186 |
+
width, height = 832, 480
|
187 |
+
|
188 |
+
logger.info(f"Generating Wan2.1 video with prompt: {prompt}")
|
189 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}, resolution={resolution}")
|
190 |
+
|
191 |
+
# Generate video with Wan2.1 specific settings
|
192 |
+
result = self.pipeline(
|
193 |
+
prompt,
|
194 |
+
num_inference_steps=num_inference_steps,
|
195 |
+
guidance_scale=guidance_scale,
|
196 |
+
num_frames=num_frames,
|
197 |
+
width=width,
|
198 |
+
height=height
|
199 |
+
)
|
200 |
+
video_frames = result['frames'] if isinstance(result, dict) else result.frames
|
201 |
+
else:
|
202 |
+
# Standard generation for other models
|
203 |
+
logger.info(f"Generating video with prompt: {prompt}")
|
204 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}")
|
205 |
+
|
206 |
+
result = self.pipeline(
|
207 |
+
prompt,
|
208 |
+
num_inference_steps=num_inference_steps,
|
209 |
+
guidance_scale=guidance_scale,
|
210 |
+
num_frames=num_frames
|
211 |
+
)
|
212 |
+
video_frames = result['frames'] if isinstance(result, dict) else result.frames
|
213 |
+
|
214 |
+
# Convert to numpy array
|
215 |
+
video_frames = np.array(video_frames)
|
216 |
+
|
217 |
+
# Save video
|
218 |
+
output_path = f"generated_video_{seed if seed else 'random'}.mp4"
|
219 |
+
export_to_video(video_frames, output_path, fps=fps)
|
220 |
+
|
221 |
+
logger.info(f"Video saved to: {output_path}")
|
222 |
+
|
223 |
+
# Add voice if requested
|
224 |
+
if add_voice and voice_script.strip():
|
225 |
+
logger.info(f"Generating voice for script: {voice_script}")
|
226 |
+
|
227 |
+
# Generate audio
|
228 |
+
audio_path = self.generate_audio(voice_script, voice_type)
|
229 |
+
|
230 |
+
if audio_path:
|
231 |
+
# Create final output path with voice
|
232 |
+
final_output_path = f"generated_video_with_voice_{seed if seed else 'random'}.mp4"
|
233 |
+
|
234 |
+
# Merge audio and video
|
235 |
+
final_path = self.merge_audio_video(output_path, audio_path, final_output_path)
|
236 |
+
|
237 |
+
# Clean up temporary files
|
238 |
+
try:
|
239 |
+
os.unlink(audio_path)
|
240 |
+
os.unlink(output_path)
|
241 |
+
except:
|
242 |
+
pass
|
243 |
+
|
244 |
+
if final_path:
|
245 |
+
return final_path, f"Video with voice generated successfully! Saved as {final_path}"
|
246 |
+
else:
|
247 |
+
return output_path, f"Video generated but voice merging failed. Saved as {output_path}"
|
248 |
+
else:
|
249 |
+
return output_path, f"Video generated but voice generation failed. Saved as {output_path}"
|
250 |
+
else:
|
251 |
+
return output_path, f"Video generated successfully! Saved as {output_path}"
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
logger.error(f"Error generating video: {str(e)}")
|
255 |
+
return None, f"Error generating video: {str(e)}"
|
256 |
+
|
257 |
+
def get_available_models(self):
|
258 |
+
"""Get list of available models"""
|
259 |
+
return list(self.models.keys())
|
260 |
+
|
261 |
+
def get_model_info(self, model_id):
|
262 |
+
"""Get information about a specific model"""
|
263 |
+
if model_id in self.models:
|
264 |
+
return self.models[model_id]
|
265 |
+
return None
|
266 |
+
|
267 |
+
def get_available_voices(self):
|
268 |
+
"""Get list of available voices"""
|
269 |
+
return list(self.voices.keys())
|
270 |
+
|
271 |
+
# Initialize the generator
|
272 |
+
generator = TextToVideoGenerator()
|
273 |
+
|
274 |
+
def create_interface():
|
275 |
+
"""Create Gradio interface"""
|
276 |
+
|
277 |
+
def generate_video_interface(prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice):
|
278 |
+
if not prompt.strip():
|
279 |
+
return None, "Please enter a prompt"
|
280 |
+
|
281 |
+
return generator.generate_video(
|
282 |
+
prompt=prompt,
|
283 |
+
model_id=model_id,
|
284 |
+
num_frames=num_frames,
|
285 |
+
fps=fps,
|
286 |
+
num_inference_steps=num_inference_steps,
|
287 |
+
guidance_scale=guidance_scale,
|
288 |
+
seed=seed,
|
289 |
+
resolution=resolution,
|
290 |
+
voice_script=voice_script,
|
291 |
+
voice_type=voice_type,
|
292 |
+
add_voice=add_voice
|
293 |
+
)
|
294 |
+
|
295 |
+
# Custom CSS for professional styling
|
296 |
+
custom_css = """
|
297 |
+
.gradio-container {
|
298 |
+
max-width: 1200px !important;
|
299 |
+
margin: 0 auto !important;
|
300 |
+
}
|
301 |
+
|
302 |
+
.header {
|
303 |
+
text-align: center;
|
304 |
+
padding: 2rem 0;
|
305 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
306 |
+
color: white;
|
307 |
+
border-radius: 15px;
|
308 |
+
margin-bottom: 2rem;
|
309 |
+
}
|
310 |
+
|
311 |
+
.header h1 {
|
312 |
+
font-size: 2.5rem;
|
313 |
+
font-weight: 700;
|
314 |
+
margin: 0;
|
315 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
316 |
+
}
|
317 |
+
|
318 |
+
.header p {
|
319 |
+
font-size: 1.1rem;
|
320 |
+
margin: 0.5rem 0 0 0;
|
321 |
+
opacity: 0.9;
|
322 |
+
}
|
323 |
+
|
324 |
+
.feature-card {
|
325 |
+
background: white;
|
326 |
+
border-radius: 10px;
|
327 |
+
padding: 1.5rem;
|
328 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
329 |
+
margin-bottom: 1rem;
|
330 |
+
border-left: 4px solid #667eea;
|
331 |
+
}
|
332 |
+
|
333 |
+
.feature-card h3 {
|
334 |
+
color: #333;
|
335 |
+
margin: 0 0 0.5rem 0;
|
336 |
+
font-size: 1.2rem;
|
337 |
+
}
|
338 |
+
|
339 |
+
.feature-card p {
|
340 |
+
color: #666;
|
341 |
+
margin: 0;
|
342 |
+
font-size: 0.9rem;
|
343 |
+
}
|
344 |
+
|
345 |
+
.model-info {
|
346 |
+
background: #f8f9fa;
|
347 |
+
border-radius: 8px;
|
348 |
+
padding: 1rem;
|
349 |
+
border: 1px solid #e9ecef;
|
350 |
+
}
|
351 |
+
|
352 |
+
.model-info h4 {
|
353 |
+
color: #495057;
|
354 |
+
margin: 0 0 0.5rem 0;
|
355 |
+
font-size: 1rem;
|
356 |
+
}
|
357 |
+
|
358 |
+
.model-info p {
|
359 |
+
color: #6c757d;
|
360 |
+
margin: 0.25rem 0;
|
361 |
+
font-size: 0.85rem;
|
362 |
+
}
|
363 |
+
|
364 |
+
.generate-btn {
|
365 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
366 |
+
border: none !important;
|
367 |
+
color: white !important;
|
368 |
+
font-weight: 600 !important;
|
369 |
+
padding: 1rem 2rem !important;
|
370 |
+
border-radius: 10px !important;
|
371 |
+
font-size: 1.1rem !important;
|
372 |
+
transition: all 0.3s ease !important;
|
373 |
+
}
|
374 |
+
|
375 |
+
.generate-btn:hover {
|
376 |
+
transform: translateY(-2px) !important;
|
377 |
+
box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
|
378 |
+
}
|
379 |
+
|
380 |
+
.example-card {
|
381 |
+
background: #f8f9fa;
|
382 |
+
border-radius: 8px;
|
383 |
+
padding: 1rem;
|
384 |
+
margin: 0.5rem 0;
|
385 |
+
border: 1px solid #e9ecef;
|
386 |
+
cursor: pointer;
|
387 |
+
transition: all 0.2s ease;
|
388 |
+
}
|
389 |
+
|
390 |
+
.example-card:hover {
|
391 |
+
background: #e9ecef;
|
392 |
+
transform: translateX(5px);
|
393 |
+
}
|
394 |
+
|
395 |
+
.status-box {
|
396 |
+
background: #e3f2fd;
|
397 |
+
border: 1px solid #2196f3;
|
398 |
+
border-radius: 8px;
|
399 |
+
padding: 1rem;
|
400 |
+
}
|
401 |
+
|
402 |
+
.pricing-info {
|
403 |
+
background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
|
404 |
+
border-radius: 10px;
|
405 |
+
padding: 1rem;
|
406 |
+
text-align: center;
|
407 |
+
margin: 1rem 0;
|
408 |
+
}
|
409 |
+
|
410 |
+
.pricing-info h4 {
|
411 |
+
color: #d84315;
|
412 |
+
margin: 0 0 0.5rem 0;
|
413 |
+
}
|
414 |
+
|
415 |
+
.pricing-info p {
|
416 |
+
color: #bf360c;
|
417 |
+
margin: 0;
|
418 |
+
font-size: 0.9rem;
|
419 |
+
}
|
420 |
+
"""
|
421 |
+
|
422 |
+
# Create interface
|
423 |
+
with gr.Blocks(title="AI Video Creator Pro", theme=gr.themes.Soft(), css=custom_css) as interface:
|
424 |
+
|
425 |
+
# Professional Header
|
426 |
+
with gr.Group(elem_classes="header"):
|
427 |
+
gr.Markdown("""
|
428 |
+
# 🎬 AI Video Creator Pro
|
429 |
+
### Transform Your Ideas Into Stunning Videos with AI-Powered Generation
|
430 |
+
""")
|
431 |
+
|
432 |
+
with gr.Row():
|
433 |
+
with gr.Column(scale=2):
|
434 |
+
# Main Input Section
|
435 |
+
with gr.Group(elem_classes="feature-card"):
|
436 |
+
gr.Markdown("## 🎯 Video Generation")
|
437 |
+
|
438 |
+
prompt = gr.Textbox(
|
439 |
+
label="📝 Video Description",
|
440 |
+
placeholder="Describe the video you want to create... (e.g., 'A majestic dragon soaring through a mystical forest with glowing mushrooms')",
|
441 |
+
lines=3,
|
442 |
+
max_lines=5,
|
443 |
+
container=True
|
444 |
+
)
|
445 |
+
|
446 |
+
with gr.Row():
|
447 |
+
model_id = gr.Dropdown(
|
448 |
+
choices=generator.get_available_models(),
|
449 |
+
value=generator.get_available_models()[0],
|
450 |
+
label="🤖 AI Model",
|
451 |
+
info="Choose the AI model for video generation",
|
452 |
+
container=True
|
453 |
+
)
|
454 |
+
|
455 |
+
resolution = gr.Dropdown(
|
456 |
+
choices=["480P", "720P"],
|
457 |
+
value="480P",
|
458 |
+
label="📐 Resolution (Wan2.1 only)",
|
459 |
+
info="Select video resolution",
|
460 |
+
visible=False,
|
461 |
+
container=True
|
462 |
+
)
|
463 |
+
|
464 |
+
with gr.Row():
|
465 |
+
num_frames = gr.Slider(
|
466 |
+
minimum=8,
|
467 |
+
maximum=32,
|
468 |
+
value=16,
|
469 |
+
step=1,
|
470 |
+
label="🎞️ Video Length (Frames)",
|
471 |
+
info="More frames = longer video"
|
472 |
+
)
|
473 |
+
|
474 |
+
fps = gr.Slider(
|
475 |
+
minimum=4,
|
476 |
+
maximum=12,
|
477 |
+
value=8,
|
478 |
+
step=1,
|
479 |
+
label="⚡ FPS",
|
480 |
+
info="Frames per second"
|
481 |
+
)
|
482 |
+
|
483 |
+
with gr.Row():
|
484 |
+
num_inference_steps = gr.Slider(
|
485 |
+
minimum=10,
|
486 |
+
maximum=50,
|
487 |
+
value=25,
|
488 |
+
step=1,
|
489 |
+
label="🎨 Quality Steps",
|
490 |
+
info="More steps = better quality but slower"
|
491 |
+
)
|
492 |
+
|
493 |
+
guidance_scale = gr.Slider(
|
494 |
+
minimum=1.0,
|
495 |
+
maximum=20.0,
|
496 |
+
value=7.5,
|
497 |
+
step=0.5,
|
498 |
+
label="🎯 Guidance Scale",
|
499 |
+
info="Higher values = more prompt adherence"
|
500 |
+
)
|
501 |
+
|
502 |
+
seed = gr.Number(
|
503 |
+
label="🎲 Seed (Optional)",
|
504 |
+
value=None,
|
505 |
+
info="Set for reproducible results",
|
506 |
+
container=True
|
507 |
+
)
|
508 |
+
|
509 |
+
# Voice Section
|
510 |
+
with gr.Group(elem_classes="feature-card"):
|
511 |
+
gr.Markdown("## 🎤 Voice & Audio")
|
512 |
+
|
513 |
+
with gr.Row():
|
514 |
+
add_voice = gr.Checkbox(
|
515 |
+
label="🎵 Add Voice Narration",
|
516 |
+
value=True,
|
517 |
+
info="Enable to add professional voice-over"
|
518 |
+
)
|
519 |
+
|
520 |
+
voice_type = gr.Dropdown(
|
521 |
+
choices=generator.get_available_voices(),
|
522 |
+
value="Default (English)",
|
523 |
+
label="🗣️ Voice Type",
|
524 |
+
info="Select the voice for narration",
|
525 |
+
container=True
|
526 |
+
)
|
527 |
+
|
528 |
+
voice_script = gr.Textbox(
|
529 |
+
label="📜 Narration Script (Optional)",
|
530 |
+
placeholder="Enter your narration script here... (Leave blank to use video description)",
|
531 |
+
lines=2,
|
532 |
+
max_lines=3,
|
533 |
+
info="If left blank, the video description will be used as narration",
|
534 |
+
container=True
|
535 |
+
)
|
536 |
+
|
537 |
+
# Generate Button
|
538 |
+
generate_btn = gr.Button("🚀 Generate Professional Video", variant="primary", size="lg", elem_classes="generate-btn")
|
539 |
+
|
540 |
+
# Output Section
|
541 |
+
with gr.Group(elem_classes="feature-card"):
|
542 |
+
gr.Markdown("## 📺 Generated Video")
|
543 |
+
status_text = gr.Textbox(label="📊 Status", interactive=False, elem_classes="status-box")
|
544 |
+
video_output = gr.Video(label="🎬 Your Video", elem_classes="status-box")
|
545 |
+
|
546 |
+
with gr.Column(scale=1):
|
547 |
+
# Model Information
|
548 |
+
with gr.Group(elem_classes="model-info"):
|
549 |
+
gr.Markdown("## 🤖 AI Model Details")
|
550 |
+
model_info = gr.JSON(label="Current Model Specifications", elem_classes="model-info")
|
551 |
+
|
552 |
+
# Pricing Information
|
553 |
+
with gr.Group(elem_classes="pricing-info"):
|
554 |
+
gr.Markdown("## 💰 Pricing")
|
555 |
+
gr.Markdown("""
|
556 |
+
**Free Tier:** 5 videos per day
|
557 |
+
|
558 |
+
**Pro Plan:** $9.99/month
|
559 |
+
- Unlimited videos
|
560 |
+
- Priority processing
|
561 |
+
- HD quality
|
562 |
+
- Advanced features
|
563 |
+
|
564 |
+
**Enterprise:** Contact us
|
565 |
+
""")
|
566 |
+
|
567 |
+
# Examples
|
568 |
+
with gr.Group():
|
569 |
+
gr.Markdown("## 💡 Inspiration Examples")
|
570 |
+
examples = [
|
571 |
+
["A beautiful sunset over the ocean with waves crashing on the shore"],
|
572 |
+
["A cat playing with a ball of yarn in a cozy living room"],
|
573 |
+
["A futuristic city with flying cars and neon lights"],
|
574 |
+
["A butterfly emerging from a cocoon in a garden"],
|
575 |
+
["A rocket launching into space with fire and smoke"],
|
576 |
+
["Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"],
|
577 |
+
["A majestic dragon soaring through a mystical forest with glowing mushrooms"]
|
578 |
+
]
|
579 |
+
gr.Examples(
|
580 |
+
examples=examples,
|
581 |
+
inputs=prompt,
|
582 |
+
label="Click to try these examples",
|
583 |
+
elem_classes="example-card"
|
584 |
+
)
|
585 |
+
|
586 |
+
# Features
|
587 |
+
with gr.Group():
|
588 |
+
gr.Markdown("## ✨ Features")
|
589 |
+
gr.Markdown("""
|
590 |
+
🎬 **Multiple AI Models**
|
591 |
+
- State-of-the-art video generation
|
592 |
+
- Quality vs speed options
|
593 |
+
|
594 |
+
🎤 **Professional Voice-Over**
|
595 |
+
- Multiple voice types
|
596 |
+
- Custom narration scripts
|
597 |
+
|
598 |
+
🎨 **Advanced Controls**
|
599 |
+
- Quality settings
|
600 |
+
- Resolution options
|
601 |
+
- Reproducible results
|
602 |
+
|
603 |
+
⚡ **Fast Processing**
|
604 |
+
- GPU acceleration
|
605 |
+
- Optimized pipelines
|
606 |
+
""")
|
607 |
+
|
608 |
+
# Event handlers
|
609 |
+
generate_btn.click(
|
610 |
+
fn=generate_video_interface,
|
611 |
+
inputs=[prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed, resolution, voice_script, voice_type, add_voice],
|
612 |
+
outputs=[video_output, status_text]
|
613 |
+
)
|
614 |
+
|
615 |
+
# Update model info when model changes
|
616 |
+
def update_model_info(model_id):
|
617 |
+
info = generator.get_model_info(model_id)
|
618 |
+
return info
|
619 |
+
|
620 |
+
# Show/hide resolution selector based on model
|
621 |
+
def update_resolution_visibility(model_id):
|
622 |
+
if model_id == "Wan-AI/Wan2.1-T2V-14B":
|
623 |
+
return gr.Dropdown(visible=True)
|
624 |
+
else:
|
625 |
+
return gr.Dropdown(visible=False)
|
626 |
+
|
627 |
+
model_id.change(
|
628 |
+
fn=update_model_info,
|
629 |
+
inputs=model_id,
|
630 |
+
outputs=model_info
|
631 |
+
)
|
632 |
+
|
633 |
+
model_id.change(
|
634 |
+
fn=update_resolution_visibility,
|
635 |
+
inputs=model_id,
|
636 |
+
outputs=resolution
|
637 |
+
)
|
638 |
+
|
639 |
+
# Load initial model info
|
640 |
+
interface.load(lambda: generator.get_model_info(generator.get_available_models()[0]), outputs=model_info)
|
641 |
+
|
642 |
+
return interface
|
643 |
+
|
644 |
+
# Create and launch the interface
|
645 |
+
interface = create_interface()
|
646 |
+
interface.launch(
|
647 |
+
server_name="0.0.0.0",
|
648 |
+
server_port=7860,
|
649 |
+
share=True,
|
650 |
+
show_error=True
|
651 |
+
)
|
text-to-video-generator/requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.2.2
|
2 |
+
torchvision==0.17.2
|
3 |
+
diffusers==0.27.2
|
4 |
+
transformers==4.39.3
|
5 |
+
accelerate==0.28.0
|
6 |
+
safetensors==0.4.2
|
7 |
+
opencv-python==4.9.0.80
|
8 |
+
pillow==10.3.0
|
9 |
+
numpy==1.24.4
|
10 |
+
gradio==4.25.0
|
11 |
+
huggingface-hub==0.23.0
|
12 |
+
xformers==0.0.25
|
13 |
+
imageio==2.34.0
|
14 |
+
imageio-ffmpeg==0.4.9
|
15 |
+
gTTS==2.5.1
|
16 |
+
moviepy==1.0.3
|
text_to_video.py
ADDED
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import gradio as gr
|
3 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
4 |
+
from diffusers.utils import export_to_video
|
5 |
+
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
import os
|
8 |
+
import logging
|
9 |
+
|
10 |
+
# Set up logging
|
11 |
+
logging.basicConfig(level=logging.INFO)
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
class TextToVideoGenerator:
|
15 |
+
def __init__(self):
|
16 |
+
self.pipeline = None
|
17 |
+
self.current_model = None
|
18 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
19 |
+
logger.info(f"Using device: {self.device}")
|
20 |
+
|
21 |
+
# Available models
|
22 |
+
self.models = {
|
23 |
+
"damo-vilab/text-to-video-ms-1.7b": {
|
24 |
+
"name": "DAMO Text-to-Video MS-1.7B",
|
25 |
+
"description": "Fast and efficient text-to-video model",
|
26 |
+
"max_frames": 16,
|
27 |
+
"fps": 8
|
28 |
+
},
|
29 |
+
"cerspense/zeroscope_v2_XL": {
|
30 |
+
"name": "Zeroscope v2 XL",
|
31 |
+
"description": "High-quality text-to-video model",
|
32 |
+
"max_frames": 24,
|
33 |
+
"fps": 6
|
34 |
+
},
|
35 |
+
"stabilityai/stable-video-diffusion-img2vid-xt": {
|
36 |
+
"name": "Stable Video Diffusion XT",
|
37 |
+
"description": "Image-to-video model (requires initial image)",
|
38 |
+
"max_frames": 25,
|
39 |
+
"fps": 6
|
40 |
+
}
|
41 |
+
}
|
42 |
+
|
43 |
+
def load_model(self, model_id):
|
44 |
+
"""Load the specified model"""
|
45 |
+
if self.current_model == model_id and self.pipeline is not None:
|
46 |
+
return f"Model {self.models[model_id]['name']} is already loaded"
|
47 |
+
|
48 |
+
try:
|
49 |
+
logger.info(f"Loading model: {model_id}")
|
50 |
+
|
51 |
+
# Clear GPU memory if needed
|
52 |
+
if torch.cuda.is_available():
|
53 |
+
torch.cuda.empty_cache()
|
54 |
+
|
55 |
+
# Load pipeline
|
56 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
57 |
+
model_id,
|
58 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
59 |
+
variant="fp16" if self.device == "cuda" else None
|
60 |
+
)
|
61 |
+
|
62 |
+
# Move to device
|
63 |
+
self.pipeline = self.pipeline.to(self.device)
|
64 |
+
|
65 |
+
# Optimize scheduler for faster inference
|
66 |
+
if hasattr(self.pipeline, 'scheduler'):
|
67 |
+
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
|
68 |
+
self.pipeline.scheduler.config
|
69 |
+
)
|
70 |
+
|
71 |
+
# Enable memory efficient attention if available
|
72 |
+
if self.device == "cuda":
|
73 |
+
self.pipeline.enable_model_cpu_offload()
|
74 |
+
self.pipeline.enable_vae_slicing()
|
75 |
+
|
76 |
+
self.current_model = model_id
|
77 |
+
logger.info(f"Successfully loaded model: {model_id}")
|
78 |
+
return f"Successfully loaded {self.models[model_id]['name']}"
|
79 |
+
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"Error loading model: {str(e)}")
|
82 |
+
return f"Error loading model: {str(e)}"
|
83 |
+
|
84 |
+
def generate_video(self, prompt, model_id, num_frames=16, fps=8, num_inference_steps=25, guidance_scale=7.5, seed=None):
|
85 |
+
"""Generate video from text prompt"""
|
86 |
+
try:
|
87 |
+
# Load model if not already loaded
|
88 |
+
if self.current_model != model_id:
|
89 |
+
load_result = self.load_model(model_id)
|
90 |
+
if "Error" in load_result:
|
91 |
+
return None, load_result
|
92 |
+
|
93 |
+
# Set seed for reproducibility
|
94 |
+
if seed is not None:
|
95 |
+
torch.manual_seed(seed)
|
96 |
+
if torch.cuda.is_available():
|
97 |
+
torch.cuda.manual_seed(seed)
|
98 |
+
|
99 |
+
# Get model config
|
100 |
+
model_config = self.models[model_id]
|
101 |
+
num_frames = min(num_frames, model_config["max_frames"])
|
102 |
+
fps = model_config["fps"]
|
103 |
+
|
104 |
+
logger.info(f"Generating video with prompt: {prompt}")
|
105 |
+
logger.info(f"Parameters: frames={num_frames}, fps={fps}, steps={num_inference_steps}")
|
106 |
+
|
107 |
+
# Generate video
|
108 |
+
video_frames = self.pipeline(
|
109 |
+
prompt,
|
110 |
+
num_inference_steps=num_inference_steps,
|
111 |
+
guidance_scale=guidance_scale,
|
112 |
+
num_frames=num_frames
|
113 |
+
).frames
|
114 |
+
|
115 |
+
# Convert to numpy array
|
116 |
+
video_frames = np.array(video_frames)
|
117 |
+
|
118 |
+
# Save video
|
119 |
+
output_path = f"generated_video_{seed if seed else 'random'}.mp4"
|
120 |
+
export_to_video(video_frames, output_path, fps=fps)
|
121 |
+
|
122 |
+
logger.info(f"Video saved to: {output_path}")
|
123 |
+
return output_path, f"Video generated successfully! Saved as {output_path}"
|
124 |
+
|
125 |
+
except Exception as e:
|
126 |
+
logger.error(f"Error generating video: {str(e)}")
|
127 |
+
return None, f"Error generating video: {str(e)}"
|
128 |
+
|
129 |
+
def get_available_models(self):
|
130 |
+
"""Get list of available models"""
|
131 |
+
return list(self.models.keys())
|
132 |
+
|
133 |
+
def get_model_info(self, model_id):
|
134 |
+
"""Get information about a specific model"""
|
135 |
+
if model_id in self.models:
|
136 |
+
return self.models[model_id]
|
137 |
+
return None
|
138 |
+
|
139 |
+
# Initialize the generator
|
140 |
+
generator = TextToVideoGenerator()
|
141 |
+
|
142 |
+
def create_interface():
|
143 |
+
"""Create Gradio interface"""
|
144 |
+
|
145 |
+
def generate_video_interface(prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed):
|
146 |
+
if not prompt.strip():
|
147 |
+
return None, "Please enter a prompt"
|
148 |
+
|
149 |
+
return generator.generate_video(
|
150 |
+
prompt=prompt,
|
151 |
+
model_id=model_id,
|
152 |
+
num_frames=num_frames,
|
153 |
+
fps=fps,
|
154 |
+
num_inference_steps=num_inference_steps,
|
155 |
+
guidance_scale=guidance_scale,
|
156 |
+
seed=seed
|
157 |
+
)
|
158 |
+
|
159 |
+
# Create interface
|
160 |
+
with gr.Blocks(title="Text-to-Video Generator", theme=gr.themes.Soft()) as interface:
|
161 |
+
gr.Markdown("# Text-to-Video Generation with Hugging Face Models")
|
162 |
+
gr.Markdown("Generate videos from text descriptions using state-of-the-art AI models")
|
163 |
+
|
164 |
+
with gr.Row():
|
165 |
+
with gr.Column(scale=2):
|
166 |
+
# Input section
|
167 |
+
with gr.Group():
|
168 |
+
gr.Markdown("## Input Parameters")
|
169 |
+
|
170 |
+
prompt = gr.Textbox(
|
171 |
+
label="Text Prompt",
|
172 |
+
placeholder="Enter your video description here...",
|
173 |
+
lines=3,
|
174 |
+
max_lines=5
|
175 |
+
)
|
176 |
+
|
177 |
+
model_id = gr.Dropdown(
|
178 |
+
choices=generator.get_available_models(),
|
179 |
+
value=generator.get_available_models()[0],
|
180 |
+
label="Model",
|
181 |
+
info="Select the model to use for generation"
|
182 |
+
)
|
183 |
+
|
184 |
+
with gr.Row():
|
185 |
+
num_frames = gr.Slider(
|
186 |
+
minimum=8,
|
187 |
+
maximum=24,
|
188 |
+
value=16,
|
189 |
+
step=1,
|
190 |
+
label="Number of Frames",
|
191 |
+
info="More frames = longer video"
|
192 |
+
)
|
193 |
+
|
194 |
+
fps = gr.Slider(
|
195 |
+
minimum=4,
|
196 |
+
maximum=12,
|
197 |
+
value=8,
|
198 |
+
step=1,
|
199 |
+
label="FPS",
|
200 |
+
info="Frames per second"
|
201 |
+
)
|
202 |
+
|
203 |
+
with gr.Row():
|
204 |
+
num_inference_steps = gr.Slider(
|
205 |
+
minimum=10,
|
206 |
+
maximum=50,
|
207 |
+
value=25,
|
208 |
+
step=1,
|
209 |
+
label="Inference Steps",
|
210 |
+
info="More steps = better quality but slower"
|
211 |
+
)
|
212 |
+
|
213 |
+
guidance_scale = gr.Slider(
|
214 |
+
minimum=1.0,
|
215 |
+
maximum=20.0,
|
216 |
+
value=7.5,
|
217 |
+
step=0.5,
|
218 |
+
label="Guidance Scale",
|
219 |
+
info="Higher values = more prompt adherence"
|
220 |
+
)
|
221 |
+
|
222 |
+
seed = gr.Number(
|
223 |
+
label="Seed (Optional)",
|
224 |
+
value=None,
|
225 |
+
info="Set for reproducible results"
|
226 |
+
)
|
227 |
+
|
228 |
+
generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
|
229 |
+
|
230 |
+
# Output section
|
231 |
+
with gr.Group():
|
232 |
+
gr.Markdown("## Output")
|
233 |
+
status_text = gr.Textbox(label="Status", interactive=False)
|
234 |
+
video_output = gr.Video(label="Generated Video")
|
235 |
+
|
236 |
+
with gr.Column(scale=1):
|
237 |
+
# Model information
|
238 |
+
with gr.Group():
|
239 |
+
gr.Markdown("## Model Information")
|
240 |
+
model_info = gr.JSON(label="Current Model Details")
|
241 |
+
|
242 |
+
# Examples
|
243 |
+
with gr.Group():
|
244 |
+
gr.Markdown("## Example Prompts")
|
245 |
+
examples = [
|
246 |
+
["A beautiful sunset over the ocean with waves crashing on the shore"],
|
247 |
+
["A cat playing with a ball of yarn in a cozy living room"],
|
248 |
+
["A futuristic city with flying cars and neon lights"],
|
249 |
+
["A butterfly emerging from a cocoon in a garden"],
|
250 |
+
["A rocket launching into space with fire and smoke"]
|
251 |
+
]
|
252 |
+
gr.Examples(
|
253 |
+
examples=examples,
|
254 |
+
inputs=prompt,
|
255 |
+
label="Try these examples"
|
256 |
+
)
|
257 |
+
|
258 |
+
# Event handlers
|
259 |
+
generate_btn.click(
|
260 |
+
fn=generate_video_interface,
|
261 |
+
inputs=[prompt, model_id, num_frames, fps, num_inference_steps, guidance_scale, seed],
|
262 |
+
outputs=[video_output, status_text]
|
263 |
+
)
|
264 |
+
|
265 |
+
# Update model info when model changes
|
266 |
+
def update_model_info(model_id):
|
267 |
+
info = generator.get_model_info(model_id)
|
268 |
+
return info
|
269 |
+
|
270 |
+
model_id.change(
|
271 |
+
fn=update_model_info,
|
272 |
+
inputs=model_id,
|
273 |
+
outputs=model_info
|
274 |
+
)
|
275 |
+
|
276 |
+
# Load initial model info
|
277 |
+
interface.load(lambda: generator.get_model_info(generator.get_available_models()[0]), outputs=model_info)
|
278 |
+
|
279 |
+
return interface
|
280 |
+
|
281 |
+
if __name__ == "__main__":
|
282 |
+
# Create and launch the interface
|
283 |
+
interface = create_interface()
|
284 |
+
interface.launch(
|
285 |
+
server_name="0.0.0.0",
|
286 |
+
server_port=7860,
|
287 |
+
share=True,
|
288 |
+
show_error=True
|
289 |
+
)
|