Solshine commited on
Commit
698f92d
·
verified ·
1 Parent(s): a9b6ab8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfminer.high_level
3
+ import transformers
4
+ from transformers import pipeline
5
+ from pydub import AudioSegment
6
+ import tempfile
7
+
8
+ # Error handling function
9
+ def handle_error(message):
10
+ print(f"Error: {message}")
11
+ return {"audio": None, "error": message}
12
+
13
+ # Function to extract text from PDF
14
+ def extract_text(pdf_path):
15
+ try:
16
+ with open(pdf_path, "rb") as file:
17
+ text = pdfminer.high_level.extract_text(file)
18
+ return text
19
+ except Exception as e:
20
+ return handle_error(f"Failed to extract text: {e}")
21
+
22
+ # Function to split text into chunks
23
+ def chunk_text(text, chunk_size=250):
24
+ chunks = []
25
+ for i in range(0, len(text), chunk_size):
26
+ chunk = text[i:i + chunk_size]
27
+ chunks.append(chunk)
28
+ return chunks
29
+
30
+ # Function to perform text-to-speech and stitch audio
31
+ def convert_to_speech(text_chunks, language="en", speaker="0"):
32
+ try:
33
+ model_name = "tts-es-es1" # Replace with your chosen model
34
+ tts_pipeline = pipeline("text-to-speech", model=model_name)
35
+ audio_segments = []
36
+ for chunk in text_chunks:
37
+ audio = tts_pipeline(text=chunk, lang=language, speaker=speaker)
38
+ audio_segments.append(AudioSegment.from_mp3(audio["audio"]))
39
+ return audio_segments
40
+ except Exception as e:
41
+ return handle_error(f"Text-to-speech failed: {e}")
42
+
43
+ # Function to save and return audio file
44
+ def save_audio(audio_segments, filename, format="mp3"):
45
+ try:
46
+ combined_audio = audio_segments[0]
47
+ for segment in audio_segments[1:]:
48
+ combined_audio += segment
49
+ audio_path = tempfile.NamedTemporaryFile(suffix=f".{format}").name
50
+ combined_audio.export(audio_path, format=format)
51
+ return {"audio_path": audio_path}
52
+ except Exception as e:
53
+ return handle_error(f"Failed to save audio: {e}")
54
+
55
+ # Gradio interface definition
56
+ def pdf_to_speech(pdf_file):
57
+ # Extract text from PDF
58
+ text = extract_text(pdf_file)
59
+ if text["error"]:
60
+ return text["error"]
61
+
62
+ # Chunk text and convert to speech
63
+ text_chunks = chunk_text(text)
64
+ audio_result = convert_to_speech(text_chunks)
65
+ if audio_result["error"]:
66
+ return audio_result["error"]
67
+
68
+ # Save and return audio
69
+ audio_data = save_audio(audio_result)
70
+ return audio_data["audio_path"]
71
+
72
+ # Create Gradio interface
73
+ interface = gr.Interface(
74
+ fn=pdf_to_speech,
75
+ inputs=gr.FileInput(type="pdf"),
76
+ outputs=[gr.Audio(label="Play"), gr.File(label="Download")],
77
+ )
78
+
79
+ # Launch Gradio app
80
+ interface.launch()