Spaces:

vicky4s4s
/

News-Summarizer

Sleeping

File size: 3,786 Bytes

import json
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline,set_seed
from utils import extract_data
from agents import generate_response
from gtts import gTTS
import gradio as gr
import os
import asyncio
from googletrans import Translator
import nltk
nltk.download("punkt")
nltk.download('punkt_tab')

set_seed(42)

def eng_to_hindi(text):
    translator = Translator()
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    translated_text = loop.run_until_complete(translator.translate(text, src="en", dest="hi"))
    return translated_text.text

def text_to_voice(text,complete_text):
    output_audio = r"output.mp3"
    output_text = r"output.txt"
    hindi_text = eng_to_hindi(text)
    tts = gTTS(text=hindi_text, lang="hi")
    tts.save(output_audio)
    with open(output_text, "w", encoding="utf-8") as f:
        f.write(complete_text)
    return output_audio, output_text

def sentiment_analysis(input_text):
    model_id = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
    sentiment_pipeline = pipeline(
        "sentiment-analysis",
        model=model_id,
        tokenizer=model_id,
    )
    data = extract_data(input_text)
    sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
    summary_list = []
    all_articles = []
    for sublist in data:
        for item in sublist:
            summary_text = item['summary']
            summary_list.append(summary_text)
            results = sentiment_pipeline(summary_text)
            sentiment_label = results[0]['label'].upper()
            sentiment_counts[sentiment_label] += 1
            all_articles.append({
                "Title": item['title'],
                "Summary": summary_text,
                "Sentiment": sentiment_label,
                "Topics": item['topics']
            })
    
    clean_text = ""
    for item in summary_list:
        clean_text += item + " \n"
    response = generate_response(summary_list,
                                 sentiment_counts["POSITIVE"],
                                 sentiment_counts["NEGATIVE"],
                                 clean_text)
    response_dict = json.loads(response)
    coverage_differences = response_dict.get("Coverage Differences", [])
    Topic_Overlap = response_dict.get("Topic Overlap", [])
    Final_Sentiment_Analysis = response_dict.get("Final Sentiment Analysis", [])
    summarizing_report = response_dict.get("Overall_Sentiment_Ssummarizing_Report", [])
    final_output = {
        "Company": input_text,
        "Articles": all_articles,
        "Comparative Sentiment Score": {
            "Sentiment Distribution": {
                "Positive": sentiment_counts["POSITIVE"],
                "Negative": sentiment_counts["NEGATIVE"],
            }
        },
        "Coverage Differences": coverage_differences,
        "Topic Overlap":Topic_Overlap,
        "Final Sentiment Analysis": Final_Sentiment_Analysis,
        "Overall sentiment summarizing report": summarizing_report
    }

    return final_output


def main(input_text):
    final_answer = sentiment_analysis(input_text)
    clean_text = json.dumps(final_answer, indent=4)
    output_audio, output_text = text_to_voice(final_answer["Overall sentiment summarizing report"],clean_text)
    return output_audio, output_text

interface = gr.Interface(
    fn=main,
    inputs=gr.Textbox(label="Enter the input"),
    outputs=[
        gr.Audio(label="Hindi Audio Output"),
        gr.File(label="complete summarization report")
    ],
    title="News Summarizer",
    description="Enter text in English, and get a pure Hindi speech output along with a downloadable text file."
)

interface.launch()