Spaces:

vicky4s4s
/

News-Summarizer

Sleeping

App Files Files Community

News-Summarizer / app.py

vicky4s4s

Update app.py

37b3c2b verified 5 months ago

raw

history blame contribute delete

3.79 kB

	import json
	import spacy
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from transformers import pipeline,set_seed
	from utils import extract_data
	from agents import generate_response
	from gtts import gTTS
	import gradio as gr
	import os
	import asyncio
	from googletrans import Translator
	import nltk
	nltk.download("punkt")
	nltk.download('punkt_tab')

	set_seed(42)

	def eng_to_hindi(text):
	translator = Translator()
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	translated_text = loop.run_until_complete(translator.translate(text, src="en", dest="hi"))
	return translated_text.text

	def text_to_voice(text,complete_text):
	output_audio = r"output.mp3"
	output_text = r"output.txt"
	hindi_text = eng_to_hindi(text)
	tts = gTTS(text=hindi_text, lang="hi")
	tts.save(output_audio)
	with open(output_text, "w", encoding="utf-8") as f:
	f.write(complete_text)
	return output_audio, output_text

	def sentiment_analysis(input_text):
	model_id = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
	sentiment_pipeline = pipeline(
	"sentiment-analysis",
	model=model_id,
	tokenizer=model_id,
	)
	data = extract_data(input_text)
	sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
	summary_list = []
	all_articles = []
	for sublist in data:
	for item in sublist:
	summary_text = item['summary']
	summary_list.append(summary_text)
	results = sentiment_pipeline(summary_text)
	sentiment_label = results[0]['label'].upper()
	sentiment_counts[sentiment_label] += 1
	all_articles.append({
	"Title": item['title'],
	"Summary": summary_text,
	"Sentiment": sentiment_label,
	"Topics": item['topics']
	})

	clean_text = ""
	for item in summary_list:
	clean_text += item + " \n"
	response = generate_response(summary_list,
	sentiment_counts["POSITIVE"],
	sentiment_counts["NEGATIVE"],
	clean_text)
	response_dict = json.loads(response)
	coverage_differences = response_dict.get("Coverage Differences", [])
	Topic_Overlap = response_dict.get("Topic Overlap", [])
	Final_Sentiment_Analysis = response_dict.get("Final Sentiment Analysis", [])
	summarizing_report = response_dict.get("Overall_Sentiment_Ssummarizing_Report", [])
	final_output = {
	"Company": input_text,
	"Articles": all_articles,
	"Comparative Sentiment Score": {
	"Sentiment Distribution": {
	"Positive": sentiment_counts["POSITIVE"],
	"Negative": sentiment_counts["NEGATIVE"],
	}
	},
	"Coverage Differences": coverage_differences,
	"Topic Overlap":Topic_Overlap,
	"Final Sentiment Analysis": Final_Sentiment_Analysis,
	"Overall sentiment summarizing report": summarizing_report
	}

	return final_output


	def main(input_text):
	final_answer = sentiment_analysis(input_text)
	clean_text = json.dumps(final_answer, indent=4)
	output_audio, output_text = text_to_voice(final_answer["Overall sentiment summarizing report"],clean_text)
	return output_audio, output_text

	interface = gr.Interface(
	fn=main,
	inputs=gr.Textbox(label="Enter the input"),
	outputs=[
	gr.Audio(label="Hindi Audio Output"),
	gr.File(label="complete summarization report")
	],
	title="News Summarizer",
	description="Enter text in English, and get a pure Hindi speech output along with a downloadable text file."
	)

	interface.launch()