News-Summarizer / app.py
vicky4s4s's picture
Update app.py
37b3c2b verified
import json
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline,set_seed
from utils import extract_data
from agents import generate_response
from gtts import gTTS
import gradio as gr
import os
import asyncio
from googletrans import Translator
import nltk
nltk.download("punkt")
nltk.download('punkt_tab')
set_seed(42)
def eng_to_hindi(text):
translator = Translator()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
translated_text = loop.run_until_complete(translator.translate(text, src="en", dest="hi"))
return translated_text.text
def text_to_voice(text,complete_text):
output_audio = r"output.mp3"
output_text = r"output.txt"
hindi_text = eng_to_hindi(text)
tts = gTTS(text=hindi_text, lang="hi")
tts.save(output_audio)
with open(output_text, "w", encoding="utf-8") as f:
f.write(complete_text)
return output_audio, output_text
def sentiment_analysis(input_text):
model_id = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=model_id,
tokenizer=model_id,
)
data = extract_data(input_text)
sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
summary_list = []
all_articles = []
for sublist in data:
for item in sublist:
summary_text = item['summary']
summary_list.append(summary_text)
results = sentiment_pipeline(summary_text)
sentiment_label = results[0]['label'].upper()
sentiment_counts[sentiment_label] += 1
all_articles.append({
"Title": item['title'],
"Summary": summary_text,
"Sentiment": sentiment_label,
"Topics": item['topics']
})
clean_text = ""
for item in summary_list:
clean_text += item + " \n"
response = generate_response(summary_list,
sentiment_counts["POSITIVE"],
sentiment_counts["NEGATIVE"],
clean_text)
response_dict = json.loads(response)
coverage_differences = response_dict.get("Coverage Differences", [])
Topic_Overlap = response_dict.get("Topic Overlap", [])
Final_Sentiment_Analysis = response_dict.get("Final Sentiment Analysis", [])
summarizing_report = response_dict.get("Overall_Sentiment_Ssummarizing_Report", [])
final_output = {
"Company": input_text,
"Articles": all_articles,
"Comparative Sentiment Score": {
"Sentiment Distribution": {
"Positive": sentiment_counts["POSITIVE"],
"Negative": sentiment_counts["NEGATIVE"],
}
},
"Coverage Differences": coverage_differences,
"Topic Overlap":Topic_Overlap,
"Final Sentiment Analysis": Final_Sentiment_Analysis,
"Overall sentiment summarizing report": summarizing_report
}
return final_output
def main(input_text):
final_answer = sentiment_analysis(input_text)
clean_text = json.dumps(final_answer, indent=4)
output_audio, output_text = text_to_voice(final_answer["Overall sentiment summarizing report"],clean_text)
return output_audio, output_text
interface = gr.Interface(
fn=main,
inputs=gr.Textbox(label="Enter the input"),
outputs=[
gr.Audio(label="Hindi Audio Output"),
gr.File(label="complete summarization report")
],
title="News Summarizer",
description="Enter text in English, and get a pure Hindi speech output along with a downloadable text file."
)
interface.launch()