import gradio as gr import re import nltk from nltk.corpus import stopwords from wordcloud import WordCloud import matplotlib.pyplot as plt # Download stopwords if not already present nltk.download('stopwords') # Sample Arabic stopwords (add more if needed) # Read the content of the file and store it in a list file_path = "list.txt" # Initialize an empty list lines_list = [] # Open the file and read the content line by line with open(file_path, 'r') as file: # Read each line and append it to the list lines_list = file.readlines() # Strip any trailing newline characters from each line arabic_stopwords = [line.strip() for line in lines_list] # Function to clean text (removes usernames, URLs, and extra whitespaces) def clean_text(text): text = re.sub(r'@\w+|RT', '', text) # Remove usernames text = re.sub(r'http\S+|www\S+', '', text) # Remove URLs text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespaces return text # Function to remove Arabic and English stopwords def remove_stopwords(text): words = text.split() english_stopwords = set(stopwords.words('english')) # English stopwords combined_stopwords = set(arabic_stopwords).union(english_stopwords) return ' '.join([word for word in words if word not in combined_stopwords]) # Function to generate word cloud def generate_wordcloud(text_array): text_data = ' '.join(text_array) clean_text_data = clean_text(text_data) clean_text_data = remove_stopwords(clean_text_data) # Generate the word cloud wordcloud = WordCloud(font_path='Amiri-Regular.ttf', background_color='white', width=800, height=600, colormap='tab20c').generate(clean_text_data) plt.figure(figsize=(10, 8)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.savefig('wordcloud.png') return 'wordcloud.png' # Gradio Interface def gradio_interface(text_input): text_array = text_input.split('\n') # Split input by new lines return generate_wordcloud(text_array) # Create Gradio Interface interface = gr.Interface(fn=gradio_interface, inputs=gr.Textbox(lines=10, placeholder="Enter text data (one sentence per line)"), outputs="image", title="Arabic Word Cloud Generator", description="Generate a word cloud from Arabic text after cleaning and stopword removal.") # Launch Gradio Interface interface.launch()