WordCloud / app.py
MD Rafiul Biswas
app.py
196ce39
import gradio as gr
import re
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Download stopwords if not already present
nltk.download('stopwords')
# Sample Arabic stopwords (add more if needed)
# Read the content of the file and store it in a list
file_path = "list.txt"
# Initialize an empty list
lines_list = []
# Open the file and read the content line by line
with open(file_path, 'r') as file:
# Read each line and append it to the list
lines_list = file.readlines()
# Strip any trailing newline characters from each line
arabic_stopwords = [line.strip() for line in lines_list]
# Function to clean text (removes usernames, URLs, and extra whitespaces)
def clean_text(text):
text = re.sub(r'@\w+|RT', '', text) # Remove usernames
text = re.sub(r'http\S+|www\S+', '', text) # Remove URLs
text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespaces
return text
# Function to remove Arabic and English stopwords
def remove_stopwords(text):
words = text.split()
english_stopwords = set(stopwords.words('english')) # English stopwords
combined_stopwords = set(arabic_stopwords).union(english_stopwords)
return ' '.join([word for word in words if word not in combined_stopwords])
# Function to generate word cloud
def generate_wordcloud(text_array):
text_data = ' '.join(text_array)
clean_text_data = clean_text(text_data)
clean_text_data = remove_stopwords(clean_text_data)
# Generate the word cloud
wordcloud = WordCloud(font_path='Amiri-Regular.ttf',
background_color='white',
width=800,
height=600,
colormap='tab20c').generate(clean_text_data)
plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig('wordcloud.png')
return 'wordcloud.png'
# Gradio Interface
def gradio_interface(text_input):
text_array = text_input.split('\n') # Split input by new lines
return generate_wordcloud(text_array)
# Create Gradio Interface
interface = gr.Interface(fn=gradio_interface,
inputs=gr.Textbox(lines=10, placeholder="Enter text data (one sentence per line)"),
outputs="image",
title="Arabic Word Cloud Generator",
description="Generate a word cloud from Arabic text after cleaning and stopword removal.")
# Launch Gradio Interface
interface.launch()