import gradio as gr import torch import pandas as pd import re import nltk from nltk.corpus import stopwords from textblob import TextBlob import os import torch.nn.functional as F from transformers import AutoTokenizer, AutoModelForSequenceClassification from huggingface_hub import login # Ensure required NLTK resources are downloaded try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') try: nltk.data.find('corpora/stopwords') except LookupError: nltk.download('stopwords') stop_words = set(stopwords.words('english')) # Text cleaning function def clean_text(text): """Clean and preprocess text""" if pd.isna(text): return "" text = text.lower() text = re.sub(r'[^a-zA-Z0-9\s]', '', text) words = nltk.word_tokenize(text) words = [word for word in words if word not in stop_words] return ' '.join(words) # Load models model_name = "Charankarnati18/TASK4" # Replace with your actual model path # Initialize tokenizers and models models_loaded = True try: # Main classification model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) print("Main model loaded successfully.") except Exception as e: print(f"Error loading main model: {e}") models_loaded = False try: # Toxicity model for harmful intent detection toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert") toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert") print("Toxicity model loaded successfully.") except Exception as e: print(f"Error loading toxicity model: {e}") models_loaded = False # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") if models_loaded: model = model.to(device) toxicity_model = toxicity_model.to(device) # Context detection functions def detect_toxicity(text): """Detect toxicity/harmful intent using a BERT-based model""" if not models_loaded: return "Model loading error" inputs = toxicity_tokenizer(text, return_tensors="pt", truncation=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = toxicity_model(**inputs) probabilities = torch.sigmoid(outputs.logits).cpu().numpy()[0] # Check if any toxic class probability is above threshold return "harmful_intent" if any(p > 0.5 for p in probabilities) else "not_harmful" def detect_sarcasm(text): """Detect sarcasm using sentiment analysis and punctuation patterns""" sentiment = TextBlob(text).sentiment # Sarcasm often has positive words but negative sentiment has_punctuation = "!" in text or "?" in text has_contradiction = sentiment.polarity > 0 and sentiment.subjectivity > 0.6 return "sarcasm" if has_contradiction and has_punctuation else "not_sarcasm" def detect_joke(text): """Detect jokes using simple heuristics""" joke_indicators = ["lol", "lmao", "rofl", "haha", "funny", "joke", "humor"] text_lower = text.lower() has_joke_words = any(indicator in text_lower for indicator in joke_indicators) ends_with_punchline = text.endswith("!") or text.endswith("?") return "joke" if has_joke_words or ends_with_punchline else "not_joke" def classify_misogyny_type(text): """Classify misogyny type using the main model""" if not models_loaded: return "Model loading error", {} # Clean text cleaned_text = clean_text(text) # Tokenize input inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} # Make prediction with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = F.softmax(logits, dim=1) prediction = torch.argmax(logits, dim=1).item() # Map prediction to class labels class_labels = ["sarcasm", "joke", "harmful_intent"] probs_dict = {label: float(probabilities[0][i].item()) * 100 for i, label in enumerate(class_labels)} return class_labels[prediction], probs_dict # Moderation functions def flag_content(text, misogyny_type, probs): """Flag content based on detected context.""" if misogyny_type == "harmful_intent": severity = "High" if probs.get("harmful_intent", 0) > 80 else "Medium" return f"🚫 FLAGGED: This content contains harmful intent. Severity: {severity}" elif misogyny_type == "sarcasm": return "⚠️ FLAGGED: This content contains sarcasm that may be problematic." elif misogyny_type == "joke": return "⚠️ FLAGGED: This content contains potentially inappropriate humor." else: return "✅ This content is clean." def warn_user(misogyny_type, probs): """Warn the user based on flagged content.""" if misogyny_type == "harmful_intent": return "WARNING: Your content has been flagged for inappropriate language. Please review our community guidelines." elif misogyny_type in ["sarcasm", "joke"]: return "NOTICE: Your content may be misinterpreted. Please ensure it aligns with our community guidelines." else: return "Your content is appropriate." def educate_user(misogyny_type): """Provide educational feedback to the user.""" if misogyny_type == "harmful_intent": return "Harmful content can hurt others. Here are some resources for positive communication." elif misogyny_type == "sarcasm": return "Sarcasm can be misunderstood in online contexts. Consider being more direct in your communication." elif misogyny_type == "joke": return "Humor is subjective - what seems funny to one person may be hurtful to another." else: return "Thank you for your positive contribution!" def moderate_content(text): """Moderate content by classifying, flagging, warning, and educating.""" # Check if models are loaded if not models_loaded: return { "Classification": "Error: Models not loaded properly", "Flag": "Unable to process", "Warning": "Unable to process", "Education": "Unable to process", "Confidence Scores": {} } # Classify misogyny type misogyny_type, probs = classify_misogyny_type(text) # Generate moderation responses flag_message = flag_content(text, misogyny_type, probs) warn_message = warn_user(misogyny_type, probs) educate_message = educate_user(misogyny_type) # Format probabilities as percentages formatted_probs = {k: f"{v:.2f}%" for k, v in probs.items()} # Return results return { "Classification": misogyny_type.capitalize(), "Flag": flag_message, "Warning": warn_message, "Education": educate_message, "Confidence Scores": formatted_probs } # Create Gradio interface with gr.Blocks(title="Misogyny Detection and Content Moderation") as app: gr.Markdown("# Misogyny Detection and Content Moderation") gr.Markdown("This app analyzes text to detect potential misogynistic content and provides moderation recommendations.") with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="Enter text to analyze", placeholder="Type or paste text here...", lines=5 ) submit_btn = gr.Button("Analyze Text") with gr.Column(): output = gr.JSON(label="Analysis Results") # Examples examples = [ ["This is a positive and respectful comment about everyone."], ["Women are so emotional and can't handle leadership roles."], ["That's such a funny joke! Women drivers, am I right?"], ["I was being sarcastic when I said she was good at her job!"], ["All women deserve to be treated poorly because they're inferior."] ] gr.Examples(examples, input_text) # Set up event submit_btn.click(fn=moderate_content, inputs=input_text, outputs=output) gr.Markdown("## How It Works") gr.Markdown(""" This app uses machine learning models to: 1. Classify text into different types (sarcasm, joke, harmful intent) 2. Flag potentially problematic content 3. Provide warnings and educational resources The models are trained to detect nuanced forms of misogyny, including those disguised as humor or sarcasm. """) # Launch the app app.launch()