import gradio as gr import nltk import numpy as np import networkx as nx from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords import string from transformers import PegasusForConditionalGeneration, PegasusTokenizer # Download required NLTK data files nltk.download('punkt') nltk.download('stopwords') # Load pre-trained Pegasus model and tokenizer model_name = "pegasus-fine_tuned_model" # Example Pegasus model tokenizer = PegasusTokenizer.from_pretrained(model_name) model = PegasusForConditionalGeneration.from_pretrained(model_name) def preprocess_text(text): # Tokenize text into sentences sentences = sent_tokenize(text) # Remove stopwords and punctuation, and convert to lowercase stop_words = set(stopwords.words('english')) preprocessed_sentences = [] for sentence in sentences: words = word_tokenize(sentence.lower()) filtered_words = [word for word in words if word not in stop_words and word not in string.punctuation] preprocessed_sentences.append(' '.join(filtered_words)) return sentences, preprocessed_sentences def build_similarity_matrix(sentences): tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(sentences) similarity_matrix = cosine_similarity(tfidf_matrix) return similarity_matrix def textrank_summary(text, num_sentences=3): original_sentences, preprocessed_sentences = preprocess_text(text) similarity_matrix = build_similarity_matrix(preprocessed_sentences) similarity_graph = nx.from_numpy_array(similarity_matrix) scores = nx.pagerank(similarity_graph) ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True) summary = ' '.join([sentence for score, sentence in ranked_sentences[:num_sentences]]) return summary def tfidf_summary(text, num_sentences=3): original_sentences, preprocessed_sentences = preprocess_text(text) tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(preprocessed_sentences) sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten() ranked_sentences = [original_sentences[i] for i in np.argsort(sentence_scores, axis=0)[::-1]] summary = ' '.join(ranked_sentences[:num_sentences]) return summary def pegasus_summary(text): inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True, padding=True) summary_ids = model.generate( inputs["input_ids"], max_length=250, min_length=30, # Adjust max_length as needed num_beams=5, early_stopping=True ) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary def summarize_text(text, method): if method == "TF-IDF": return tfidf_summary(text) elif method == "TextRank": return textrank_summary(text) elif method == "Abstractive": return pegasus_summary(text) # Custom CSS for styling custom_css = """ .gr-box { border-radius: 10px; padding: 20px; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); margin: 20px 0; background-color: #fff; } .gr-input, .gr-output { border: 1px solid #ccc; border-radius: 5px; padding: 10px; font-size: 16px; } .gr-button { background-color: #007bff; color: white; padding: 10px 20px; border: none; border-radius: 5px; font-size: 16px; cursor: pointer; transition: background-color 0.3s; } .gr-button:hover { background-color: #0056b3; } """ # Create a visually appealing Gradio interface interface = gr.Interface( fn=summarize_text, inputs=[ gr.Textbox( lines=30, placeholder="Paste your text here...", label="Input Text", elem_classes="gr-input" # Apply custom CSS class ), gr.Radio( choices=["TF-IDF", "TextRank", "Abstractive"], label="Summarization Method", value="Abstractive" ) ], outputs=gr.Textbox( lines=30, label="Concise Summary", elem_classes="gr-output" # Apply custom CSS class ), title="Pegasus Text Summarizer", description="Get a clear and concise summary of your text in seconds!", theme="default", # Use a built-in theme css=custom_css # Add custom CSS ) # Launch the interface interface.launch( share=True, debug=True # Enable debug mode for error handling (optional) )