import json import spacy from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from transformers import pipeline,set_seed from utils import extract_data from agents import generate_response from gtts import gTTS import gradio as gr import os import asyncio from googletrans import Translator import nltk nltk.download("punkt") nltk.download('punkt_tab') set_seed(42) def eng_to_hindi(text): translator = Translator() loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) translated_text = loop.run_until_complete(translator.translate(text, src="en", dest="hi")) return translated_text.text def text_to_voice(text,complete_text): output_audio = r"output.mp3" output_text = r"output.txt" hindi_text = eng_to_hindi(text) tts = gTTS(text=hindi_text, lang="hi") tts.save(output_audio) with open(output_text, "w", encoding="utf-8") as f: f.write(complete_text) return output_audio, output_text def sentiment_analysis(input_text): model_id = "distilbert/distilbert-base-uncased-finetuned-sst-2-english" sentiment_pipeline = pipeline( "sentiment-analysis", model=model_id, tokenizer=model_id, ) data = extract_data(input_text) sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0} summary_list = [] all_articles = [] for sublist in data: for item in sublist: summary_text = item['summary'] summary_list.append(summary_text) results = sentiment_pipeline(summary_text) sentiment_label = results[0]['label'].upper() sentiment_counts[sentiment_label] += 1 all_articles.append({ "Title": item['title'], "Summary": summary_text, "Sentiment": sentiment_label, "Topics": item['topics'] }) clean_text = "" for item in summary_list: clean_text += item + " \n" response = generate_response(summary_list, sentiment_counts["POSITIVE"], sentiment_counts["NEGATIVE"], clean_text) response_dict = json.loads(response) coverage_differences = response_dict.get("Coverage Differences", []) Topic_Overlap = response_dict.get("Topic Overlap", []) Final_Sentiment_Analysis = response_dict.get("Final Sentiment Analysis", []) summarizing_report = response_dict.get("Overall_Sentiment_Ssummarizing_Report", []) final_output = { "Company": input_text, "Articles": all_articles, "Comparative Sentiment Score": { "Sentiment Distribution": { "Positive": sentiment_counts["POSITIVE"], "Negative": sentiment_counts["NEGATIVE"], } }, "Coverage Differences": coverage_differences, "Topic Overlap":Topic_Overlap, "Final Sentiment Analysis": Final_Sentiment_Analysis, "Overall sentiment summarizing report": summarizing_report } return final_output def main(input_text): final_answer = sentiment_analysis(input_text) clean_text = json.dumps(final_answer, indent=4) output_audio, output_text = text_to_voice(final_answer["Overall sentiment summarizing report"],clean_text) return output_audio, output_text interface = gr.Interface( fn=main, inputs=gr.Textbox(label="Enter the input"), outputs=[ gr.Audio(label="Hindi Audio Output"), gr.File(label="complete summarization report") ], title="News Summarizer", description="Enter text in English, and get a pure Hindi speech output along with a downloadable text file." ) interface.launch()