akhaliq HF Staff commited on
Commit
c82a12e
·
2 Parent(s): 773c6cb 81f9012

Merge branch 'feature/text-to-music'

Browse files
Files changed (1) hide show
  1. app.py +242 -17
app.py CHANGED
@@ -146,6 +146,61 @@ def reap_old_videos(ttl_seconds: int = VIDEO_FILE_TTL_SECONDS) -> None:
146
  # Temp dir might not exist or be accessible; ignore
147
  pass
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
150
 
151
  IMPORTANT: You MUST output ALL THREE files in the following format:
@@ -1529,6 +1584,68 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
1529
  print(f"Text-to-video generation error: {str(e)}")
1530
  return f"Error generating video (text-to-video): {str(e)}"
1531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1532
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
1533
  """Extract image generation prompts from the full text based on number of images needed"""
1534
  # Use the entire text as the base prompt for image generation
@@ -1821,6 +1938,53 @@ def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str
1821
  # If no <body>, just append
1822
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1823
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1824
  def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
1825
  """Create search/replace blocks using image-to-image generation with a provided input image.
1826
 
@@ -1993,7 +2157,7 @@ def create_video_replacement_blocks_from_input_image(html_content: str, user_pro
1993
  print("[Image2Video] No <body> tag; appending video via replacement block")
1994
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1995
 
1996
- def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: str | None = None) -> str:
1997
  """Apply text-to-image and/or image-to-image replacements to HTML content.
1998
 
1999
  If both toggles are enabled, text-to-image replacements run first, then image-to-image.
@@ -2002,7 +2166,7 @@ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_t
2002
  try:
2003
  print(
2004
  f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
2005
- f"enable_t2i={enable_text_to_image}, has_image={input_image_data is not None}"
2006
  )
2007
  # If image-to-video is enabled, replace the first image with a generated video and return.
2008
  if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
@@ -2040,6 +2204,18 @@ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_t
2040
  print("[MediaApply] No t2v replacement blocks generated")
2041
  return result
2042
 
 
 
 
 
 
 
 
 
 
 
 
 
2043
  # If an input image is provided and image-to-image is enabled, we only replace one image
2044
  # and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
2045
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
@@ -2888,7 +3064,7 @@ The HTML code above contains the complete original website structure with all im
2888
  stop_generation = False
2889
 
2890
 
2891
- def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None):
2892
  if query is None:
2893
  query = ''
2894
  if _history is None:
@@ -2928,7 +3104,9 @@ def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_ima
2928
  # On each generate, reap old global files and cleanup previous session files
2929
  try:
2930
  cleanup_session_videos(session_id)
 
2931
  reap_old_videos()
 
2932
  except Exception:
2933
  pass
2934
 
@@ -3028,9 +3206,9 @@ This will help me create a better design for you."""
3028
 
3029
  clean_code = remove_code_block(content)
3030
 
3031
- # Apply image generation (text→image and/or image→image)
3032
  print("[Generate] Applying post-generation media to GLM-4.5 HTML output")
3033
- final_content = apply_generated_images_to_html(
3034
  content,
3035
  query,
3036
  enable_text_to_image=enable_image_generation,
@@ -3042,6 +3220,8 @@ This will help me create a better design for you."""
3042
  session_id=session_id,
3043
  enable_text_to_video=enable_text_to_video,
3044
  text_to_video_prompt=text_to_video_prompt,
 
 
3045
  )
3046
 
3047
  _history.append([query, final_content])
@@ -3195,9 +3375,9 @@ This will help me create a better design for you."""
3195
  modified_content = apply_search_replace_changes(last_content, clean_code)
3196
  clean_content = remove_code_block(modified_content)
3197
 
3198
- # Apply image generation (text→image and/or image→image)
3199
  print("[Generate] Applying post-generation media to modified HTML content")
3200
- clean_content = apply_generated_images_to_html(
3201
  clean_content,
3202
  query,
3203
  enable_text_to_image=enable_image_generation,
@@ -3209,6 +3389,8 @@ This will help me create a better design for you."""
3209
  session_id=session_id,
3210
  enable_text_to_video=enable_text_to_video,
3211
  text_to_video_prompt=text_to_video_prompt,
 
 
3212
  )
3213
 
3214
  yield {
@@ -3218,9 +3400,9 @@ This will help me create a better design for you."""
3218
  history_output: history_to_chatbot_messages(_history),
3219
  }
3220
  else:
3221
- # Apply image generation (text→image and/or image→image)
3222
  print("[Generate] Applying post-generation media to new HTML content")
3223
- final_content = apply_generated_images_to_html(
3224
  clean_code,
3225
  query,
3226
  enable_text_to_image=enable_image_generation,
@@ -3233,6 +3415,8 @@ This will help me create a better design for you."""
3233
  session_id=session_id,
3234
  enable_text_to_video=enable_text_to_video,
3235
  text_to_video_prompt=text_to_video_prompt,
 
 
3236
  )
3237
 
3238
  preview_val = None
@@ -3620,9 +3804,9 @@ This will help me create a better design for you."""
3620
  modified_content = apply_search_replace_changes(last_content, final_code)
3621
  clean_content = remove_code_block(modified_content)
3622
 
3623
- # Apply image generation (text→image and/or image→image)
3624
  print("[Generate] Applying post-generation media to follow-up HTML content")
3625
- clean_content = apply_generated_images_to_html(
3626
  clean_content,
3627
  query,
3628
  enable_text_to_image=enable_image_generation,
@@ -3635,6 +3819,8 @@ This will help me create a better design for you."""
3635
  text_to_image_prompt=text_to_image_prompt,
3636
  enable_text_to_video=enable_text_to_video,
3637
  text_to_video_prompt=text_to_video_prompt,
 
 
3638
  )
3639
 
3640
  # Update history with the cleaned content
@@ -3649,9 +3835,9 @@ This will help me create a better design for you."""
3649
  # Regular generation - use the content as is
3650
  final_content = remove_code_block(content)
3651
 
3652
- # Apply image generation (text→image and/or image→image)
3653
  print("[Generate] Applying post-generation media to final HTML content")
3654
- final_content = apply_generated_images_to_html(
3655
  final_content,
3656
  query,
3657
  enable_text_to_image=enable_image_generation,
@@ -3664,6 +3850,8 @@ This will help me create a better design for you."""
3664
  session_id=session_id,
3665
  enable_text_to_video=enable_text_to_video,
3666
  text_to_video_prompt=text_to_video_prompt,
 
 
3667
  )
3668
 
3669
  _history.append([query, final_content])
@@ -4858,6 +5046,20 @@ with gr.Blocks(
4858
  visible=False
4859
  )
4860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4861
  def on_image_to_image_toggle(toggled, beta_enabled):
4862
  # Only show in classic mode (beta disabled)
4863
  vis = bool(toggled) and not bool(beta_enabled)
@@ -4891,6 +5093,11 @@ with gr.Blocks(
4891
  inputs=[text_to_video_toggle, beta_toggle],
4892
  outputs=[text_to_video_prompt]
4893
  )
 
 
 
 
 
4894
  model_dropdown = gr.Dropdown(
4895
  choices=[model['name'] for model in AVAILABLE_MODELS],
4896
  value=DEFAULT_MODEL_NAME,
@@ -5141,7 +5348,7 @@ with gr.Blocks(
5141
  show_progress="hidden",
5142
  ).then(
5143
  generation_code,
5144
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
5145
  outputs=[code_output, history, sandbox, history_output]
5146
  ).then(
5147
  end_generation_ui,
@@ -5217,6 +5424,8 @@ with gr.Blocks(
5217
  upd_t2v_prompt = gr.skip()
5218
  upd_model_dropdown = gr.skip()
5219
  upd_current_model = gr.skip()
 
 
5220
 
5221
  # Split by comma to separate main prompt and directives
5222
  segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
@@ -5282,6 +5491,13 @@ with gr.Blocks(
5282
  if p:
5283
  upd_t2v_prompt = gr.update(value=p)
5284
 
 
 
 
 
 
 
 
5285
  # URL (website redesign)
5286
  url = _extract_url(seg)
5287
  if url:
@@ -5346,6 +5562,8 @@ with gr.Blocks(
5346
  upd_t2v_prompt,
5347
  upd_model_dropdown,
5348
  upd_current_model,
 
 
5349
  )
5350
 
5351
  # Wire chat submit -> apply settings -> run generation
@@ -5371,6 +5589,8 @@ with gr.Blocks(
5371
  text_to_video_prompt,
5372
  model_dropdown,
5373
  current_model,
 
 
5374
  ],
5375
  queue=False,
5376
  ).then(
@@ -5380,7 +5600,7 @@ with gr.Blocks(
5380
  show_progress="hidden",
5381
  ).then(
5382
  generation_code,
5383
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
5384
  outputs=[code_output, history, sandbox, history_output]
5385
  ).then(
5386
  end_generation_ui,
@@ -5397,12 +5617,13 @@ with gr.Blocks(
5397
  )
5398
 
5399
  # Toggle between classic controls and beta chat UI
5400
- def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool):
5401
  # Prompts only visible in classic mode and when their toggles are on
5402
  t2i_vis = (not checked) and bool(t2i)
5403
  i2i_vis = (not checked) and bool(i2i)
5404
  i2v_vis = (not checked) and bool(i2v)
5405
  t2v_vis = (not checked) and bool(t2v)
 
5406
 
5407
  return (
5408
  # Chat UI group
@@ -5426,6 +5647,8 @@ with gr.Blocks(
5426
  gr.update(visible=i2v_vis), # image_to_video_prompt
5427
  gr.update(visible=not checked), # text_to_video_toggle
5428
  gr.update(visible=t2v_vis), # text_to_video_prompt
 
 
5429
  gr.update(visible=not checked), # model_dropdown
5430
  gr.update(visible=not checked), # quick_start_md
5431
  gr.update(visible=not checked), # quick_examples_col
@@ -5433,7 +5656,7 @@ with gr.Blocks(
5433
 
5434
  beta_toggle.change(
5435
  toggle_beta,
5436
- inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle],
5437
  outputs=[
5438
  sidebar_chatbot,
5439
  sidebar_msg,
@@ -5454,6 +5677,8 @@ with gr.Blocks(
5454
  image_to_video_prompt,
5455
  text_to_video_toggle,
5456
  text_to_video_prompt,
 
 
5457
  model_dropdown,
5458
  quick_start_md,
5459
  quick_examples_col,
 
146
  # Temp dir might not exist or be accessible; ignore
147
  pass
148
 
149
+ # ---------------------------------------------------------------------------
150
+ # Audio temp-file management (per-session tracking and cleanup)
151
+ # ---------------------------------------------------------------------------
152
+ AUDIO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_audio")
153
+ AUDIO_FILE_TTL_SECONDS = 6 * 60 * 60 # 6 hours
154
+ _SESSION_AUDIO_FILES: Dict[str, List[str]] = {}
155
+ _AUDIO_FILES_LOCK = threading.Lock()
156
+
157
+
158
+ def _ensure_audio_dir_exists() -> None:
159
+ try:
160
+ os.makedirs(AUDIO_TEMP_DIR, exist_ok=True)
161
+ except Exception:
162
+ pass
163
+
164
+
165
+ def _register_audio_for_session(session_id: Optional[str], file_path: str) -> None:
166
+ if not session_id or not file_path:
167
+ return
168
+ with _AUDIO_FILES_LOCK:
169
+ if session_id not in _SESSION_AUDIO_FILES:
170
+ _SESSION_AUDIO_FILES[session_id] = []
171
+ _SESSION_AUDIO_FILES[session_id].append(file_path)
172
+
173
+
174
+ def cleanup_session_audio(session_id: Optional[str]) -> None:
175
+ if not session_id:
176
+ return
177
+ with _AUDIO_FILES_LOCK:
178
+ file_list = _SESSION_AUDIO_FILES.pop(session_id, [])
179
+ for path in file_list:
180
+ try:
181
+ if path and os.path.exists(path):
182
+ os.unlink(path)
183
+ except Exception:
184
+ pass
185
+
186
+
187
+ def reap_old_audio(ttl_seconds: int = AUDIO_FILE_TTL_SECONDS) -> None:
188
+ try:
189
+ _ensure_audio_dir_exists()
190
+ now_ts = time.time()
191
+ for name in os.listdir(AUDIO_TEMP_DIR):
192
+ path = os.path.join(AUDIO_TEMP_DIR, name)
193
+ try:
194
+ if not os.path.isfile(path):
195
+ continue
196
+ mtime = os.path.getmtime(path)
197
+ if now_ts - mtime > ttl_seconds:
198
+ os.unlink(path)
199
+ except Exception:
200
+ pass
201
+ except Exception:
202
+ pass
203
+
204
  TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
205
 
206
  IMPORTANT: You MUST output ALL THREE files in the following format:
 
1584
  print(f"Text-to-video generation error: {str(e)}")
1585
  return f"Error generating video (text-to-video): {str(e)}"
1586
 
1587
+ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None) -> str:
1588
+ """Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
1589
+
1590
+ Saves audio to a temp file and references it via file:// URL similar to videos.
1591
+ Requires ELEVENLABS_API_KEY in the environment.
1592
+ """
1593
+ try:
1594
+ api_key = os.getenv('ELEVENLABS_API_KEY')
1595
+ if not api_key:
1596
+ return "Error: ELEVENLABS_API_KEY environment variable is not set."
1597
+
1598
+ headers = {
1599
+ 'Content-Type': 'application/json',
1600
+ 'xi-api-key': api_key,
1601
+ }
1602
+ payload = {
1603
+ 'prompt': (prompt or 'Epic orchestral theme with soaring strings and powerful brass'),
1604
+ 'music_length_ms': int(music_length_ms) if music_length_ms else 30000,
1605
+ }
1606
+
1607
+ resp = requests.post('https://api.elevenlabs.io/v1/music/compose', headers=headers, json=payload)
1608
+ try:
1609
+ resp.raise_for_status()
1610
+ except Exception as e:
1611
+ return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
1612
+
1613
+ # Persist audio to temp file and return an <audio> element using file:// URL
1614
+ _ensure_audio_dir_exists()
1615
+ file_name = f"{uuid.uuid4()}.wav"
1616
+ file_path = os.path.join(AUDIO_TEMP_DIR, file_name)
1617
+ try:
1618
+ with open(file_path, 'wb') as f:
1619
+ f.write(resp.content)
1620
+ _register_audio_for_session(session_id, file_path)
1621
+ except Exception as save_exc:
1622
+ return f"Error generating music: could not save audio file ({save_exc})"
1623
+
1624
+ # Build file URI
1625
+ try:
1626
+ from pathlib import Path
1627
+ file_url = Path(file_path).as_uri()
1628
+ except Exception:
1629
+ if file_path.startswith('/'):
1630
+ file_url = f"file:///{file_path.lstrip('/')}"
1631
+ else:
1632
+ file_url = f"file:///{file_path}"
1633
+
1634
+ audio_html = (
1635
+ "<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
1636
+ " <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px\">"
1637
+ " <span>🎵 Generated music</span>"
1638
+ " </div>"
1639
+ f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
1640
+ f" <source src=\"{file_url}\" type=\"audio/wav\" />"
1641
+ " Your browser does not support the audio element."
1642
+ " </audio>"
1643
+ "</div>"
1644
+ )
1645
+ return audio_html
1646
+ except Exception as e:
1647
+ return f"Error generating music: {str(e)}"
1648
+
1649
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
1650
  """Extract image generation prompts from the full text based on number of images needed"""
1651
  # Use the entire text as the base prompt for image generation
 
1938
  # If no <body>, just append
1939
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1940
 
1941
+ def create_music_replacement_blocks_text_to_music(html_content: str, prompt: str, session_id: Optional[str] = None) -> str:
1942
+ """Create search/replace blocks that insert ONE generated <audio> near the top of <body>.
1943
+
1944
+ Unlike images/videos which replace placeholders, music doesn't map to an <img> tag.
1945
+ We simply insert an <audio> player after the opening <body>.
1946
+ """
1947
+ if not prompt or not prompt.strip():
1948
+ return ""
1949
+
1950
+ audio_html = generate_music_from_text(prompt, session_id=session_id)
1951
+ if audio_html.startswith("Error"):
1952
+ return ""
1953
+
1954
+ # Prefer inserting after the first <section>...</section> if present; else after <body>
1955
+ import re
1956
+ section_match = re.search(r"<section\b[\s\S]*?</section>", html_content, flags=re.IGNORECASE)
1957
+ if section_match:
1958
+ section_html = section_match.group(0)
1959
+ section_clean = re.sub(r"\s+", " ", section_html.strip())
1960
+ variations = [
1961
+ section_html,
1962
+ section_clean,
1963
+ section_clean.replace('"', "'"),
1964
+ section_clean.replace("'", '"'),
1965
+ re.sub(r"\s+", " ", section_clean),
1966
+ ]
1967
+ blocks = []
1968
+ for v in variations:
1969
+ blocks.append(f"""{SEARCH_START}
1970
+ {v}
1971
+ {DIVIDER}
1972
+ {v}\n {audio_html}
1973
+ {REPLACE_END}""")
1974
+ return "\n\n".join(blocks)
1975
+ if '<body' in html_content:
1976
+ body_end = html_content.find('>', html_content.find('<body')) + 1
1977
+ insertion_point = html_content[:body_end] + '\n '
1978
+ return f"""{SEARCH_START}
1979
+ {insertion_point}
1980
+ {DIVIDER}
1981
+ {insertion_point}
1982
+ {audio_html}
1983
+ {REPLACE_END}"""
1984
+
1985
+ # If no <body>, just append
1986
+ return f"{SEARCH_START}\n\n{DIVIDER}\n{audio_html}\n{REPLACE_END}"
1987
+
1988
  def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
1989
  """Create search/replace blocks using image-to-image generation with a provided input image.
1990
 
 
2157
  print("[Image2Video] No <body> tag; appending video via replacement block")
2158
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
2159
 
2160
+ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: str | None = None, enable_text_to_music: bool = False, text_to_music_prompt: str | None = None) -> str:
2161
  """Apply text-to-image and/or image-to-image replacements to HTML content.
2162
 
2163
  If both toggles are enabled, text-to-image replacements run first, then image-to-image.
 
2166
  try:
2167
  print(
2168
  f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
2169
+ f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_t2m={enable_text_to_music}, has_image={input_image_data is not None}"
2170
  )
2171
  # If image-to-video is enabled, replace the first image with a generated video and return.
2172
  if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
 
2204
  print("[MediaApply] No t2v replacement blocks generated")
2205
  return result
2206
 
2207
+ # If text-to-music is enabled, insert a generated audio player near the top of body and return.
2208
+ if enable_text_to_music and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
2209
+ t2m_prompt = (text_to_music_prompt or user_prompt or "").strip()
2210
+ print(f"[MediaApply] Running text-to-music with prompt len={len(t2m_prompt)}")
2211
+ blocks_tm = create_music_replacement_blocks_text_to_music(result, t2m_prompt, session_id=session_id)
2212
+ if blocks_tm:
2213
+ print("[MediaApply] Applying text-to-music replacement blocks")
2214
+ result = apply_search_replace_changes(result, blocks_tm)
2215
+ else:
2216
+ print("[MediaApply] No t2m replacement blocks generated")
2217
+ return result
2218
+
2219
  # If an input image is provided and image-to-image is enabled, we only replace one image
2220
  # and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
2221
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
 
3064
  stop_generation = False
3065
 
3066
 
3067
+ def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None):
3068
  if query is None:
3069
  query = ''
3070
  if _history is None:
 
3104
  # On each generate, reap old global files and cleanup previous session files
3105
  try:
3106
  cleanup_session_videos(session_id)
3107
+ cleanup_session_audio(session_id)
3108
  reap_old_videos()
3109
+ reap_old_audio()
3110
  except Exception:
3111
  pass
3112
 
 
3206
 
3207
  clean_code = remove_code_block(content)
3208
 
3209
+ # Apply media generation (images/video/music)
3210
  print("[Generate] Applying post-generation media to GLM-4.5 HTML output")
3211
+ final_content = apply_generated_media_to_html(
3212
  content,
3213
  query,
3214
  enable_text_to_image=enable_image_generation,
 
3220
  session_id=session_id,
3221
  enable_text_to_video=enable_text_to_video,
3222
  text_to_video_prompt=text_to_video_prompt,
3223
+ enable_text_to_music=enable_text_to_music,
3224
+ text_to_music_prompt=text_to_music_prompt,
3225
  )
3226
 
3227
  _history.append([query, final_content])
 
3375
  modified_content = apply_search_replace_changes(last_content, clean_code)
3376
  clean_content = remove_code_block(modified_content)
3377
 
3378
+ # Apply media generation (images/video/music)
3379
  print("[Generate] Applying post-generation media to modified HTML content")
3380
+ clean_content = apply_generated_media_to_html(
3381
  clean_content,
3382
  query,
3383
  enable_text_to_image=enable_image_generation,
 
3389
  session_id=session_id,
3390
  enable_text_to_video=enable_text_to_video,
3391
  text_to_video_prompt=text_to_video_prompt,
3392
+ enable_text_to_music=enable_text_to_music,
3393
+ text_to_music_prompt=text_to_music_prompt,
3394
  )
3395
 
3396
  yield {
 
3400
  history_output: history_to_chatbot_messages(_history),
3401
  }
3402
  else:
3403
+ # Apply media generation (images/video/music)
3404
  print("[Generate] Applying post-generation media to new HTML content")
3405
+ final_content = apply_generated_media_to_html(
3406
  clean_code,
3407
  query,
3408
  enable_text_to_image=enable_image_generation,
 
3415
  session_id=session_id,
3416
  enable_text_to_video=enable_text_to_video,
3417
  text_to_video_prompt=text_to_video_prompt,
3418
+ enable_text_to_music=enable_text_to_music,
3419
+ text_to_music_prompt=text_to_music_prompt,
3420
  )
3421
 
3422
  preview_val = None
 
3804
  modified_content = apply_search_replace_changes(last_content, final_code)
3805
  clean_content = remove_code_block(modified_content)
3806
 
3807
+ # Apply media generation (images/video/music)
3808
  print("[Generate] Applying post-generation media to follow-up HTML content")
3809
+ clean_content = apply_generated_media_to_html(
3810
  clean_content,
3811
  query,
3812
  enable_text_to_image=enable_image_generation,
 
3819
  text_to_image_prompt=text_to_image_prompt,
3820
  enable_text_to_video=enable_text_to_video,
3821
  text_to_video_prompt=text_to_video_prompt,
3822
+ enable_text_to_music=enable_text_to_music,
3823
+ text_to_music_prompt=text_to_music_prompt,
3824
  )
3825
 
3826
  # Update history with the cleaned content
 
3835
  # Regular generation - use the content as is
3836
  final_content = remove_code_block(content)
3837
 
3838
+ # Apply media generation (images/video/music)
3839
  print("[Generate] Applying post-generation media to final HTML content")
3840
+ final_content = apply_generated_media_to_html(
3841
  final_content,
3842
  query,
3843
  enable_text_to_image=enable_image_generation,
 
3850
  session_id=session_id,
3851
  enable_text_to_video=enable_text_to_video,
3852
  text_to_video_prompt=text_to_video_prompt,
3853
+ enable_text_to_music=enable_text_to_music,
3854
+ text_to_music_prompt=text_to_music_prompt,
3855
  )
3856
 
3857
  _history.append([query, final_content])
 
5046
  visible=False
5047
  )
5048
 
5049
+ # Text-to-Music
5050
+ text_to_music_toggle = gr.Checkbox(
5051
+ label="🎵 Generate Music (text → music)",
5052
+ value=False,
5053
+ visible=True,
5054
+ info="Compose short music from your prompt using ElevenLabs Music"
5055
+ )
5056
+ text_to_music_prompt = gr.Textbox(
5057
+ label="Text-to-Music Prompt",
5058
+ placeholder="Describe the music to generate (e.g., 'Epic orchestral theme with soaring strings and powerful brass')",
5059
+ lines=2,
5060
+ visible=False
5061
+ )
5062
+
5063
  def on_image_to_image_toggle(toggled, beta_enabled):
5064
  # Only show in classic mode (beta disabled)
5065
  vis = bool(toggled) and not bool(beta_enabled)
 
5093
  inputs=[text_to_video_toggle, beta_toggle],
5094
  outputs=[text_to_video_prompt]
5095
  )
5096
+ text_to_music_toggle.change(
5097
+ on_text_to_image_toggle,
5098
+ inputs=[text_to_music_toggle, beta_toggle],
5099
+ outputs=[text_to_music_prompt]
5100
+ )
5101
  model_dropdown = gr.Dropdown(
5102
  choices=[model['name'] for model in AVAILABLE_MODELS],
5103
  value=DEFAULT_MODEL_NAME,
 
5348
  show_progress="hidden",
5349
  ).then(
5350
  generation_code,
5351
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, text_to_music_toggle, text_to_music_prompt],
5352
  outputs=[code_output, history, sandbox, history_output]
5353
  ).then(
5354
  end_generation_ui,
 
5424
  upd_t2v_prompt = gr.skip()
5425
  upd_model_dropdown = gr.skip()
5426
  upd_current_model = gr.skip()
5427
+ upd_t2m_toggle = gr.skip()
5428
+ upd_t2m_prompt = gr.skip()
5429
 
5430
  # Split by comma to separate main prompt and directives
5431
  segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
 
5491
  if p:
5492
  upd_t2v_prompt = gr.update(value=p)
5493
 
5494
+ # Text-to-music
5495
+ if ("text to music" in seg_norm) or ("text-to-music" in seg_norm) or ("generate music" in seg_norm) or ("compose music" in seg_norm):
5496
+ upd_t2m_toggle = gr.update(value=True)
5497
+ p = after_colon(seg)
5498
+ if p:
5499
+ upd_t2m_prompt = gr.update(value=p)
5500
+
5501
  # URL (website redesign)
5502
  url = _extract_url(seg)
5503
  if url:
 
5562
  upd_t2v_prompt,
5563
  upd_model_dropdown,
5564
  upd_current_model,
5565
+ upd_t2m_toggle,
5566
+ upd_t2m_prompt,
5567
  )
5568
 
5569
  # Wire chat submit -> apply settings -> run generation
 
5589
  text_to_video_prompt,
5590
  model_dropdown,
5591
  current_model,
5592
+ text_to_music_toggle,
5593
+ text_to_music_prompt,
5594
  ],
5595
  queue=False,
5596
  ).then(
 
5600
  show_progress="hidden",
5601
  ).then(
5602
  generation_code,
5603
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, text_to_music_toggle, text_to_music_prompt],
5604
  outputs=[code_output, history, sandbox, history_output]
5605
  ).then(
5606
  end_generation_ui,
 
5617
  )
5618
 
5619
  # Toggle between classic controls and beta chat UI
5620
+ def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, t2m: bool):
5621
  # Prompts only visible in classic mode and when their toggles are on
5622
  t2i_vis = (not checked) and bool(t2i)
5623
  i2i_vis = (not checked) and bool(i2i)
5624
  i2v_vis = (not checked) and bool(i2v)
5625
  t2v_vis = (not checked) and bool(t2v)
5626
+ t2m_vis = (not checked) and bool(t2m)
5627
 
5628
  return (
5629
  # Chat UI group
 
5647
  gr.update(visible=i2v_vis), # image_to_video_prompt
5648
  gr.update(visible=not checked), # text_to_video_toggle
5649
  gr.update(visible=t2v_vis), # text_to_video_prompt
5650
+ gr.update(visible=not checked), # text_to_music_toggle
5651
+ gr.update(visible=t2m_vis), # text_to_music_prompt
5652
  gr.update(visible=not checked), # model_dropdown
5653
  gr.update(visible=not checked), # quick_start_md
5654
  gr.update(visible=not checked), # quick_examples_col
 
5656
 
5657
  beta_toggle.change(
5658
  toggle_beta,
5659
+ inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, text_to_music_toggle],
5660
  outputs=[
5661
  sidebar_chatbot,
5662
  sidebar_msg,
 
5677
  image_to_video_prompt,
5678
  text_to_video_toggle,
5679
  text_to_video_prompt,
5680
+ text_to_music_toggle,
5681
+ text_to_music_prompt,
5682
  model_dropdown,
5683
  quick_start_md,
5684
  quick_examples_col,