Pastor_AI / app.py
anabury's picture
Create app.py
7d798c1 verified
import torch
import numpy as np
import subprocess
from yt_dlp import YoutubeDL
from sentence_transformers import SentenceTransformer, util
from TTS.api import TTS
import moviepy.editor as mp
import gradio as gr
import cv2
# -------------------------------
# 1. Configuration
# -------------------------------
youtube_url = "https://www.youtube.com/@TheGodHouseCentre"
# TTS Model
tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=False, gpu=torch.cuda.is_available())
# Text Embedding Model
text_model = SentenceTransformer("all-MiniLM-L6-v2")
# Demo dataset
dataset = [
{"question": "Hello", "answer": "Hi there! How can I help you?", "video_url": youtube_url}
]
# Precompute embeddings
questions = [item["question"] for item in dataset]
question_embeddings = text_model.encode(questions, convert_to_tensor=True)
# -------------------------------
# 2. Retrieve Closest Answer
# -------------------------------
def retrieve_answer(query):
query_emb = text_model.encode(query, convert_to_tensor=True)
cos_scores = util.cos_sim(query_emb, question_embeddings)[0]
best_idx = torch.argmax(cos_scores)
return dataset[best_idx]
# -------------------------------
# 3. TTS Audio Generation
# -------------------------------
def generate_audio(text):
audio_path = "/tmp/temp_audio.wav"
tts.tts_to_file(text=text, file_path=audio_path)
return audio_path
# -------------------------------
# 4. YouTube Streaming (single frame demo)
# -------------------------------
def stream_youtube_frame(url):
ydl_opts = {'format': 'best', 'quiet': True, 'noplaylist': True}
with YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
stream_url = info['url']
width, height = 640, 360
frame_size = width * height * 3
process = subprocess.Popen(
['ffmpeg', '-i', stream_url, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-vcodec', 'rawvideo', '-'],
stdout=subprocess.PIPE
)
raw_frame = process.stdout.read(frame_size)
frame = np.frombuffer(raw_frame, dtype=np.uint8).reshape((height, width, 3))
return frame
# -------------------------------
# 5. Video Generation with Audio
# -------------------------------
def generate_video_with_audio(source_frame, driving_audio):
tmp_frame = "/tmp/temp_frame.jpg"
tmp_output = "/tmp/output_video.mp4"
cv2.imwrite(tmp_frame, cv2.cvtColor(source_frame, cv2.COLOR_RGB2BGR))
clip = mp.ImageClip(tmp_frame).set_duration(5)
clip = clip.set_audio(mp.AudioFileClip(driving_audio))
clip.write_videofile(tmp_output, fps=24, verbose=False, logger=None)
return tmp_output
# -------------------------------
# 6. Main Pipeline
# -------------------------------
def answer_question(query):
row = retrieve_answer(query)
audio_file = generate_audio(row["answer"])
frame = stream_youtube_frame(row["video_url"])
video_file = generate_video_with_audio(frame, audio_file)
return video_file
# -------------------------------
# 7. Gradio Interface
# -------------------------------
iface = gr.Interface(
fn=answer_question,
inputs=gr.Textbox(label="Ask a question"),
outputs=gr.Video(label="Generated Video"),
title="AI Video/Audio Generator",
description="Ask a question and get a video+audio response generated from YouTube frames with TTS."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)