Spaces:

rafiulbiswas
/

WordCloud

Sleeping

WordCloud / app.py

MD Rafiul Biswas

app.py

196ce39 11 months ago

2.58 kB

	import gradio as gr
	import re
	import nltk
	from nltk.corpus import stopwords
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt

	# Download stopwords if not already present
	nltk.download('stopwords')

	# Sample Arabic stopwords (add more if needed)
	# Read the content of the file and store it in a list
	file_path = "list.txt"

	# Initialize an empty list
	lines_list = []

	# Open the file and read the content line by line
	with open(file_path, 'r') as file:
	# Read each line and append it to the list
	lines_list = file.readlines()

	# Strip any trailing newline characters from each line
	arabic_stopwords = [line.strip() for line in lines_list]

	# Function to clean text (removes usernames, URLs, and extra whitespaces)
	def clean_text(text):
	text = re.sub(r'@\w+\|RT', '', text) # Remove usernames
	text = re.sub(r'http\S+\|www\S+', '', text) # Remove URLs
	text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespaces
	return text

	# Function to remove Arabic and English stopwords
	def remove_stopwords(text):
	words = text.split()
	english_stopwords = set(stopwords.words('english')) # English stopwords
	combined_stopwords = set(arabic_stopwords).union(english_stopwords)
	return ' '.join([word for word in words if word not in combined_stopwords])

	# Function to generate word cloud
	def generate_wordcloud(text_array):
	text_data = ' '.join(text_array)
	clean_text_data = clean_text(text_data)
	clean_text_data = remove_stopwords(clean_text_data)

	# Generate the word cloud
	wordcloud = WordCloud(font_path='Amiri-Regular.ttf',
	background_color='white',
	width=800,
	height=600,
	colormap='tab20c').generate(clean_text_data)

	plt.figure(figsize=(10, 8))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	plt.savefig('wordcloud.png')
	return 'wordcloud.png'

	# Gradio Interface
	def gradio_interface(text_input):
	text_array = text_input.split('\n') # Split input by new lines
	return generate_wordcloud(text_array)

	# Create Gradio Interface
	interface = gr.Interface(fn=gradio_interface,
	inputs=gr.Textbox(lines=10, placeholder="Enter text data (one sentence per line)"),
	outputs="image",
	title="Arabic Word Cloud Generator",
	description="Generate a word cloud from Arabic text after cleaning and stopword removal.")

	# Launch Gradio Interface
	interface.launch()