Upload 82 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +13 -0
- Dockerfile +18 -0
- app.py +214 -0
- config.yaml +27 -0
- config_p.yaml +54 -0
- demo_app.py +17 -0
- demo_app_p.py +132 -0
- main.py +335 -0
- social_moderation/__pycache__/main.cpython-310.pyc +0 -0
- social_moderation/__pycache__/main.cpython-311.pyc +0 -0
- social_moderation/__pycache__/main.cpython-313.pyc +0 -0
- social_moderation/app.py +214 -0
- social_moderation/config.yaml +27 -0
- social_moderation/config_p.yaml +54 -0
- social_moderation/data/output/images/1-s2.0-S0957417420305492-gr2_blurred.jpg +0 -0
- social_moderation/data/output/images/1000_F_1566391003_PcPMXVvR99sK3Rf9YxJaChFZRhD7XuAx_blurred.jpg +3 -0
- social_moderation/data/output/images/3a8ecb09-8121-4764-bdd6-cb4be203efa7_blurred.jpg +0 -0
- social_moderation/data/output/images/Screenshot 2025-10-12 162027_blurred.png +3 -0
- social_moderation/data/output/images/Screenshot 2025-10-12 164637_blurred.png +3 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 1.42.43 AM (1)_blurred.jpeg +3 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.22 PM_blurred.jpeg +3 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.35 PM_blurred.jpeg +0 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.50 PM_blurred.jpeg +3 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.33 PM_blurred.jpeg +3 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.37 PM_blurred.jpeg +0 -0
- social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.40 PM_blurred.jpeg +0 -0
- social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred.png +3 -0
- social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred_blurred.png +3 -0
- social_moderation/data/output/images/stock-photo-hand-of-man-injured-wound-from-accident-and-blood-bleeding-on-white-background-insurance-concept-519504064_blurred.jpg +3 -0
- social_moderation/data/output/images/test_faces_blurred.jpeg +3 -0
- social_moderation/data/output/videos/WhatsApp Video 2025-09-21 at 11.40.51 AM_blurred.mp4 +3 -0
- social_moderation/data/output/videos/WhatsApp Video 2025-09-21 at 11.40.59 AM_blurred.mp4 +3 -0
- social_moderation/demo_app.py +17 -0
- social_moderation/demo_app_p.py +132 -0
- social_moderation/detectors/__pycache__/hatebert_detector.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/nsfw_detector.cpython-310.pyc +0 -0
- social_moderation/detectors/__pycache__/nsfw_detector.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/nsfw_detector.cpython-313.pyc +0 -0
- social_moderation/detectors/__pycache__/offensive_word_detector.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/text_detector.cpython-310.pyc +0 -0
- social_moderation/detectors/__pycache__/text_detector.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/yolov8_face.cpython-310.pyc +0 -0
- social_moderation/detectors/__pycache__/yolov8_face.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/yolov8_standard.cpython-311.pyc +0 -0
- social_moderation/detectors/__pycache__/yolov8_standard.cpython-313.pyc +0 -0
- social_moderation/detectors/hatebert_detector.py +91 -0
- social_moderation/detectors/nsfw_detector.py +193 -0
- social_moderation/detectors/offensive_word_detector.py +160 -0
- social_moderation/detectors/opencv_face.py +41 -0
- social_moderation/detectors/opencv_face_p.py +69 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
social_moderation/data/output/images/1000_F_1566391003_PcPMXVvR99sK3Rf9YxJaChFZRhD7XuAx_blurred.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
social_moderation/data/output/images/Screenshot[[:space:]]2025-10-12[[:space:]]162027_blurred.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
social_moderation/data/output/images/Screenshot[[:space:]]2025-10-12[[:space:]]164637_blurred.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
social_moderation/data/output/images/stock-photo-hand-of-man-injured-wound-from-accident-and-blood-bleeding-on-white-background-insurance-concept-519504064_blurred.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
social_moderation/data/output/images/test_faces_blurred.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
social_moderation/data/output/images/WhatsApp[[:space:]]Image[[:space:]]2025-10-12[[:space:]]at[[:space:]]1.42.43[[:space:]]AM[[:space:]](1)_blurred.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
social_moderation/data/output/images/WhatsApp[[:space:]]Image[[:space:]]2025-10-12[[:space:]]at[[:space:]]4.50.22[[:space:]]PM_blurred.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
social_moderation/data/output/images/WhatsApp[[:space:]]Image[[:space:]]2025-10-12[[:space:]]at[[:space:]]4.50.50[[:space:]]PM_blurred.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
social_moderation/data/output/images/WhatsApp[[:space:]]Image[[:space:]]2025-10-12[[:space:]]at[[:space:]]4.51.33[[:space:]]PM_blurred.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred_blurred.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
social_moderation/data/output/videos/WhatsApp[[:space:]]Video[[:space:]]2025-09-21[[:space:]]at[[:space:]]11.40.51[[:space:]]AM_blurred.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
social_moderation/data/output/videos/WhatsApp[[:space:]]Video[[:space:]]2025-09-21[[:space:]]at[[:space:]]11.40.59[[:space:]]AM_blurred.mp4 filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
libsm6 \
|
| 7 |
+
libxext6 \
|
| 8 |
+
libxrender-dev \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["python", "social_moderation/app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio Web UI for Smart Content Moderation System
|
| 3 |
+
Face Blur + Hate Speech + Blood/NSFW Detection
|
| 4 |
+
AUTO-LAUNCHES BROWSER + AUTO-SAVES TO OUTPUT FOLDER
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import tempfile
|
| 9 |
+
import os
|
| 10 |
+
import webbrowser
|
| 11 |
+
import time
|
| 12 |
+
import shutil
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import cv2
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from main import process_image, process_video, validate_blur_strength, get_media_type
|
| 17 |
+
|
| 18 |
+
# Create output folder
|
| 19 |
+
OUTPUT_DIR = Path("social_moderation/data/output")
|
| 20 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 21 |
+
|
| 22 |
+
def blur_media(input_files, blur_strength, confidence, blur_text, nsfw_blur,
|
| 23 |
+
blood_threshold, nsfw_blur_type, progress=gr.Progress()):
|
| 24 |
+
"""Process media files with complete moderation and auto-save."""
|
| 25 |
+
|
| 26 |
+
results = []
|
| 27 |
+
total = len(input_files) if input_files else 0
|
| 28 |
+
|
| 29 |
+
if total == 0:
|
| 30 |
+
return None, "❌ No files uploaded"
|
| 31 |
+
|
| 32 |
+
blur_strength = validate_blur_strength(blur_strength)
|
| 33 |
+
|
| 34 |
+
for idx, input_file in enumerate(input_files):
|
| 35 |
+
input_path = input_file.name
|
| 36 |
+
filename = Path(input_path).name
|
| 37 |
+
|
| 38 |
+
progress((idx / total), desc=f"Processing {filename}...")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
media_type = get_media_type(input_path, 'auto')
|
| 42 |
+
|
| 43 |
+
# Create output path with timestamp
|
| 44 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 45 |
+
name_without_ext = Path(filename).stem
|
| 46 |
+
file_ext = Path(filename).suffix
|
| 47 |
+
|
| 48 |
+
# Save to appropriate subfolder
|
| 49 |
+
if media_type == 'image':
|
| 50 |
+
output_subdir = OUTPUT_DIR / "images"
|
| 51 |
+
else:
|
| 52 |
+
output_subdir = OUTPUT_DIR / "videos"
|
| 53 |
+
|
| 54 |
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
| 55 |
+
|
| 56 |
+
# Generate output filename with timestamp
|
| 57 |
+
output_filename = f"{name_without_ext}_{timestamp}{file_ext}"
|
| 58 |
+
output_path = output_subdir / output_filename
|
| 59 |
+
|
| 60 |
+
# Process the file
|
| 61 |
+
success = process_image(
|
| 62 |
+
input_path, str(output_path), blur_strength, confidence,
|
| 63 |
+
False, blur_text, nsfw_blur, nsfw_blur_type, False,
|
| 64 |
+
blood_threshold=blood_threshold
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
if success:
|
| 68 |
+
results.append({
|
| 69 |
+
'path': str(output_path),
|
| 70 |
+
'filename': output_filename,
|
| 71 |
+
'type': media_type
|
| 72 |
+
})
|
| 73 |
+
progress((idx + 1) / total, desc=f"✅ Saved: {output_filename}")
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
progress((idx / total), desc=f"❌ Error: {e}")
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
if not results:
|
| 80 |
+
return None, "❌ Processing failed"
|
| 81 |
+
|
| 82 |
+
# Prepare output and status
|
| 83 |
+
output_path_str = results[0]['path']
|
| 84 |
+
status = f"✅ Successfully processed {len(results)} file(s)\n\n"
|
| 85 |
+
status += "📁 **Saved to:**\n"
|
| 86 |
+
|
| 87 |
+
for result in results:
|
| 88 |
+
status += f" • {result['filename']}\n"
|
| 89 |
+
if result['type'] == 'image':
|
| 90 |
+
status += f" 📍 `{OUTPUT_DIR / 'images' / result['filename']}`\n"
|
| 91 |
+
else:
|
| 92 |
+
status += f" 📍 `{OUTPUT_DIR / 'videos' / result['filename']}`\n"
|
| 93 |
+
|
| 94 |
+
status += "\n**Features Applied:**"
|
| 95 |
+
status += "\n 👤 Face blurring"
|
| 96 |
+
status += "\n 🔤 Hate speech detection"
|
| 97 |
+
status += "\n 🩸 Blood/NSFW detection"
|
| 98 |
+
|
| 99 |
+
return output_path_str, status
|
| 100 |
+
|
| 101 |
+
# Create Gradio Interface
|
| 102 |
+
with gr.Blocks(title="Smart Content Moderation", theme=gr.themes.Soft()) as demo:
|
| 103 |
+
gr.Markdown("# 🛡️ Smart Content Moderation System")
|
| 104 |
+
gr.Markdown("**Blur faces • Detect hate speech • Blur blood/NSFW content**")
|
| 105 |
+
gr.Markdown(f"📁 **Output Folder:** `{OUTPUT_DIR}`")
|
| 106 |
+
|
| 107 |
+
with gr.Row():
|
| 108 |
+
with gr.Column():
|
| 109 |
+
gr.Markdown("### 📤 Input")
|
| 110 |
+
input_files = gr.File(label="Upload Image/Video", file_count="multiple", file_types=["image", "video"])
|
| 111 |
+
|
| 112 |
+
with gr.Column():
|
| 113 |
+
gr.Markdown("### 📥 Output")
|
| 114 |
+
output_file = gr.File(label="Blurred Output (Download)")
|
| 115 |
+
status_text = gr.Textbox(label="Status & Save Location", interactive=False, lines=6)
|
| 116 |
+
|
| 117 |
+
gr.Markdown("---")
|
| 118 |
+
|
| 119 |
+
with gr.Row():
|
| 120 |
+
with gr.Column():
|
| 121 |
+
gr.Markdown("### ⚙️ Blur Settings")
|
| 122 |
+
blur_strength = gr.Slider(minimum=3, maximum=151, value=51, step=2, label="👤 Blur Strength")
|
| 123 |
+
confidence = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence")
|
| 124 |
+
|
| 125 |
+
with gr.Column():
|
| 126 |
+
gr.Markdown("### 🔤 Hate Speech Settings")
|
| 127 |
+
blur_text = gr.Checkbox(label="Enable Hate Speech Detection & Blur", value=True)
|
| 128 |
+
gr.Markdown("*Detects: 'Hate You', profanity, offensive text*")
|
| 129 |
+
|
| 130 |
+
with gr.Column():
|
| 131 |
+
gr.Markdown("### 🩸 Blood/NSFW Settings")
|
| 132 |
+
nsfw_blur = gr.Checkbox(label="Enable Blood/NSFW Detection & Blur", value=True)
|
| 133 |
+
blood_threshold = gr.Slider(
|
| 134 |
+
minimum=0.0, maximum=1.0, value=0.3, step=0.1,
|
| 135 |
+
label="Blood Sensitivity (lower = more sensitive)"
|
| 136 |
+
)
|
| 137 |
+
nsfw_blur_type = gr.Dropdown(
|
| 138 |
+
choices=["gaussian", "pixelate", "mosaic", "black"],
|
| 139 |
+
value="gaussian",
|
| 140 |
+
label="Blur Type"
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
gr.Markdown("---")
|
| 144 |
+
|
| 145 |
+
with gr.Row():
|
| 146 |
+
process_btn = gr.Button("🎬 Process & Save", variant="primary", size="lg")
|
| 147 |
+
|
| 148 |
+
# Connect button
|
| 149 |
+
process_btn.click(
|
| 150 |
+
blur_media,
|
| 151 |
+
inputs=[
|
| 152 |
+
input_files, blur_strength, confidence, blur_text, nsfw_blur,
|
| 153 |
+
blood_threshold, nsfw_blur_type
|
| 154 |
+
],
|
| 155 |
+
outputs=[output_file, status_text]
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
gr.Markdown("""
|
| 159 |
+
---
|
| 160 |
+
## ✨ Features:
|
| 161 |
+
|
| 162 |
+
### 👤 Face Blurring
|
| 163 |
+
- YOLOv8 face detection with 99% accuracy
|
| 164 |
+
- Adaptive Gaussian blur based on face size
|
| 165 |
+
- Works on images and videos
|
| 166 |
+
|
| 167 |
+
### 🔤 Hate Speech Detection
|
| 168 |
+
- EasyOCR text detection (45+ languages)
|
| 169 |
+
- Rule-based toxicity detection
|
| 170 |
+
- Detects offensive words, hate patterns
|
| 171 |
+
|
| 172 |
+
### 🩸 Blood/NSFW Content
|
| 173 |
+
- HSV color-based blood detection
|
| 174 |
+
- Sensitive to 5%+ red pixels
|
| 175 |
+
- Multiple blur types available
|
| 176 |
+
|
| 177 |
+
## 🚀 Usage:
|
| 178 |
+
|
| 179 |
+
1. Upload image or video
|
| 180 |
+
2. Enable desired features
|
| 181 |
+
3. Adjust sensitivity sliders
|
| 182 |
+
4. Click "Process & Save"
|
| 183 |
+
5. Download or find in output folder
|
| 184 |
+
|
| 185 |
+
## 📁 Auto-Save:
|
| 186 |
+
|
| 187 |
+
- Images → `social_moderation/data/output/images/`
|
| 188 |
+
- Videos → `social_moderation/data/output/videos/`
|
| 189 |
+
- Timestamped filenames for organization
|
| 190 |
+
|
| 191 |
+
## 📊 Recommendations:
|
| 192 |
+
|
| 193 |
+
- **Blur Strength**: 51 (default) = good balance
|
| 194 |
+
- **Confidence**: 0.5 = balanced detection
|
| 195 |
+
- **Blood Threshold**: 0.3 = very sensitive
|
| 196 |
+
""")
|
| 197 |
+
|
| 198 |
+
if __name__ == "__main__":
|
| 199 |
+
# Auto-launch browser
|
| 200 |
+
def open_browser():
|
| 201 |
+
time.sleep(2)
|
| 202 |
+
webbrowser.open("http://localhost:7860")
|
| 203 |
+
|
| 204 |
+
import threading
|
| 205 |
+
thread = threading.Thread(target=open_browser, daemon=True)
|
| 206 |
+
thread.start()
|
| 207 |
+
|
| 208 |
+
# Launch Gradio app
|
| 209 |
+
demo.launch(
|
| 210 |
+
server_name="0.0.0.0",
|
| 211 |
+
server_port=7860,
|
| 212 |
+
share=False,
|
| 213 |
+
show_error=True
|
| 214 |
+
)
|
config.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "cuda" # "cuda" or "cpu"
|
| 2 |
+
frame_skip: 3 # detect every N frames
|
| 3 |
+
debug: false # overlay boxes if true
|
| 4 |
+
|
| 5 |
+
face_detector:
|
| 6 |
+
type: "yolov8_standard" # options: yolov8_standard, yolov8, opencv
|
| 7 |
+
model_path: "weights/yolov8n-face.pt"
|
| 8 |
+
conf_threshold: 0.35
|
| 9 |
+
|
| 10 |
+
text_detector:
|
| 11 |
+
ocr_languages: ["en"]
|
| 12 |
+
ocr_conf_threshold: 0.45
|
| 13 |
+
preprocess:
|
| 14 |
+
use_clahe: true
|
| 15 |
+
sharpen: true
|
| 16 |
+
denoise: true
|
| 17 |
+
|
| 18 |
+
blur:
|
| 19 |
+
method: "gaussian" # gaussian | mosaic
|
| 20 |
+
gaussian_min_kernel: 15
|
| 21 |
+
gaussian_max_kernel: 121
|
| 22 |
+
mosaic_block_size_min: 6
|
| 23 |
+
|
| 24 |
+
text_blur:
|
| 25 |
+
padding_x_ratio: 0.10
|
| 26 |
+
padding_y_ratio: 0.15
|
| 27 |
+
sentiment_threshold: -0.3
|
config_p.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Smart Content Moderation Configuration
|
| 2 |
+
device: "cuda" # "cuda" or "cpu"
|
| 3 |
+
frame_skip: 3
|
| 4 |
+
debug: false
|
| 5 |
+
|
| 6 |
+
# Face Detection
|
| 7 |
+
face_detector:
|
| 8 |
+
type: "yolov8_standard"
|
| 9 |
+
model_path: "weights/yolov8n-face.pt"
|
| 10 |
+
conf_threshold: 0.35
|
| 11 |
+
bbox_adjustment:
|
| 12 |
+
padding_ratio: 0.08
|
| 13 |
+
neck_extension: 0.35
|
| 14 |
+
forehead_padding: 0.12
|
| 15 |
+
motion_smoothing:
|
| 16 |
+
enabled: true
|
| 17 |
+
iou_threshold: 0.5
|
| 18 |
+
smoothing_window: 5
|
| 19 |
+
max_missing_frames: 30
|
| 20 |
+
|
| 21 |
+
# Text Detection
|
| 22 |
+
text_detector:
|
| 23 |
+
ocr_languages: ["en"]
|
| 24 |
+
ocr_conf_threshold: 0.45
|
| 25 |
+
preprocessing:
|
| 26 |
+
use_clahe: true
|
| 27 |
+
sharpen: true
|
| 28 |
+
denoise: true
|
| 29 |
+
|
| 30 |
+
# NSFW/Blood/Violence Detection
|
| 31 |
+
nsfw_detection:
|
| 32 |
+
nsfw_threshold: 0.7 # NSFW content threshold
|
| 33 |
+
violence_threshold: 0.6 # Violence content threshold
|
| 34 |
+
blood_threshold: 0.5 # Blood/gore threshold
|
| 35 |
+
blood_percentage_threshold: 8.0 # Red pixel percentage for blood detection
|
| 36 |
+
|
| 37 |
+
# Blur Settings
|
| 38 |
+
blur:
|
| 39 |
+
face:
|
| 40 |
+
method: "gaussian"
|
| 41 |
+
adaptive_intensity: true
|
| 42 |
+
gaussian_min_kernel: 51
|
| 43 |
+
gaussian_max_kernel: 121
|
| 44 |
+
mosaic_block_size: 8
|
| 45 |
+
|
| 46 |
+
text:
|
| 47 |
+
method: "gaussian"
|
| 48 |
+
gaussian_min_kernel: 51
|
| 49 |
+
gaussian_max_kernel: 121
|
| 50 |
+
mosaic_block_size: 8
|
| 51 |
+
|
| 52 |
+
# System Settings
|
| 53 |
+
system:
|
| 54 |
+
benchmark_logging: true
|
demo_app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from social_moderation.pipeline.processor import Processor
|
| 3 |
+
import tempfile
|
| 4 |
+
|
| 5 |
+
st.title("Social Moderation Demo")
|
| 6 |
+
video = st.file_uploader("Upload a video to test", type=["mp4", "mov", "avi"])
|
| 7 |
+
if st.button("Run Moderation"):
|
| 8 |
+
proc = Processor(config_path="config.yaml")
|
| 9 |
+
tmp_in = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 10 |
+
tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 11 |
+
with open(tmp_in, "wb") as f:
|
| 12 |
+
f.write(video.getvalue())
|
| 13 |
+
proc.process_video(tmp_in, tmp_out)
|
| 14 |
+
st.video(tmp_out)
|
| 15 |
+
st.success("✅ Done! You can download the processed video below:")
|
| 16 |
+
with open(tmp_out, "rb") as f:
|
| 17 |
+
st.download_button("Download Processed Video", f, file_name="blurred_output.mp4")
|
demo_app_p.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Streamlit Demo App for HARI Content Moderation System
|
| 3 |
+
Interactive UI for testing moderation on uploaded videos/images
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import cv2
|
| 8 |
+
import tempfile
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
st.set_page_config(
|
| 17 |
+
page_title="HARI Content Moderation",
|
| 18 |
+
page_icon="🛡️",
|
| 19 |
+
layout="wide"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
st.title("🛡️ HARI Content Moderation System")
|
| 24 |
+
st.markdown(
|
| 25 |
+
"Upload videos or images to automatically blur faces and toxic text. "
|
| 26 |
+
"Powered by YOLOv8, EasyOCR, and Detoxify."
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Sidebar configuration
|
| 30 |
+
st.sidebar.header("⚙️ Settings")
|
| 31 |
+
|
| 32 |
+
# Moderation toggles
|
| 33 |
+
enable_face_blur = st.sidebar.checkbox("🙂 Blur Faces", value=True)
|
| 34 |
+
enable_text_blur = st.sidebar.checkbox("💬 Blur Toxic Text", value=True)
|
| 35 |
+
|
| 36 |
+
# Advanced settings
|
| 37 |
+
with st.sidebar.expander("🔧 Advanced Settings"):
|
| 38 |
+
blur_method = st.selectbox("Blur Method", ["gaussian", "mosaic", "pixelate"])
|
| 39 |
+
frame_skip = st.slider("Frame Skip (process every Nth frame)", 1, 10, 2)
|
| 40 |
+
toxicity_threshold = st.slider("Toxicity Threshold", 0.0, 1.0, 0.7, 0.05)
|
| 41 |
+
motion_smoothing = st.checkbox("Motion Smoothing (video)", value=True)
|
| 42 |
+
|
| 43 |
+
# File upload
|
| 44 |
+
uploaded_file = st.file_uploader(
|
| 45 |
+
"📤 Upload Video/Image",
|
| 46 |
+
type=['jpg', 'jpeg', 'png', 'mp4', 'avi', 'mov']
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if uploaded_file is not None:
|
| 50 |
+
# Save uploaded file
|
| 51 |
+
suffix = Path(uploaded_file.name).suffix
|
| 52 |
+
|
| 53 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
|
| 54 |
+
tmp_in.write(uploaded_file.read())
|
| 55 |
+
input_path = tmp_in.name
|
| 56 |
+
|
| 57 |
+
# Display original
|
| 58 |
+
col1, col2 = st.columns(2)
|
| 59 |
+
|
| 60 |
+
with col1:
|
| 61 |
+
st.subheader("📥 Original")
|
| 62 |
+
|
| 63 |
+
if uploaded_file.type.startswith('image'):
|
| 64 |
+
st.image(input_path, use_container_width=True)
|
| 65 |
+
else:
|
| 66 |
+
st.video(input_path)
|
| 67 |
+
|
| 68 |
+
# Process button
|
| 69 |
+
if st.button("🚀 Run Moderation", type="primary"):
|
| 70 |
+
with col2:
|
| 71 |
+
st.subheader("📤 Moderated")
|
| 72 |
+
|
| 73 |
+
with st.spinner("Processing... This may take a moment."):
|
| 74 |
+
try:
|
| 75 |
+
# Initialize processor
|
| 76 |
+
from social_moderation.pipeline.processor import Processor
|
| 77 |
+
|
| 78 |
+
processor = Processor(config_path="config.yaml")
|
| 79 |
+
|
| 80 |
+
# Update config with UI settings
|
| 81 |
+
processor.config["blur"]["face"]["method"] = blur_method
|
| 82 |
+
processor.config["blur"]["text"]["method"] = blur_method
|
| 83 |
+
processor.config["system"]["frame_skip"] = frame_skip
|
| 84 |
+
processor.config["toxicity"]["threshold"] = toxicity_threshold
|
| 85 |
+
processor.config["face_detector"]["motion_smoothing"]["enabled"] = motion_smoothing
|
| 86 |
+
|
| 87 |
+
# Process
|
| 88 |
+
output_path = input_path.replace(suffix, f"_moderated{suffix}")
|
| 89 |
+
|
| 90 |
+
if uploaded_file.type.startswith('image'):
|
| 91 |
+
# Image processing
|
| 92 |
+
image = cv2.imread(input_path)
|
| 93 |
+
|
| 94 |
+
if enable_face_blur:
|
| 95 |
+
image = processor.face_blurrer.blur_faces(image)
|
| 96 |
+
|
| 97 |
+
if enable_text_blur:
|
| 98 |
+
image = processor.text_blurrer.blur_toxic_text(image)
|
| 99 |
+
|
| 100 |
+
cv2.imwrite(output_path, image)
|
| 101 |
+
st.image(output_path, use_container_width=True)
|
| 102 |
+
|
| 103 |
+
else:
|
| 104 |
+
# Video processing
|
| 105 |
+
processor.process_video(input_path, output_path)
|
| 106 |
+
st.video(output_path)
|
| 107 |
+
|
| 108 |
+
st.success("✅ Processing complete!")
|
| 109 |
+
|
| 110 |
+
# Download button
|
| 111 |
+
with open(output_path, 'rb') as f:
|
| 112 |
+
st.download_button(
|
| 113 |
+
label="💾 Download Moderated File",
|
| 114 |
+
data=f,
|
| 115 |
+
file_name=f"moderated_{uploaded_file.name}",
|
| 116 |
+
mime=uploaded_file.type
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
st.error(f"❌ Processing failed: {str(e)}")
|
| 121 |
+
logger.exception("Processing error")
|
| 122 |
+
|
| 123 |
+
# Footer
|
| 124 |
+
st.sidebar.markdown("---")
|
| 125 |
+
st.sidebar.markdown("### 📚 About")
|
| 126 |
+
st.sidebar.info(
|
| 127 |
+
"HARI Content Moderation System uses state-of-the-art AI models to "
|
| 128 |
+
"automatically detect and blur faces and toxic text in media content."
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if __name__ == '__main__':
|
| 132 |
+
main()
|
main.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smart Content Moderation Pipeline
|
| 3 |
+
Face Blur + Hate Speech Detection + Blood/NSFW Detection
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import argparse
|
| 8 |
+
from modules.face_blur_p import FaceBlurrer
|
| 9 |
+
from modules.text_blur_p import TextBlurrer
|
| 10 |
+
from modules.nsfw_blur import NSFWBlurrer
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import logging
|
| 15 |
+
import yaml
|
| 16 |
+
|
| 17 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
def validate_blur_strength(blur_strength):
|
| 21 |
+
"""Ensure blur strength is odd and reasonable"""
|
| 22 |
+
if blur_strength % 2 == 0:
|
| 23 |
+
blur_strength += 1
|
| 24 |
+
return min(max(blur_strength, 3), 151)
|
| 25 |
+
|
| 26 |
+
def get_media_type(input_path, media_type_arg):
|
| 27 |
+
"""Determine media type"""
|
| 28 |
+
if media_type_arg != 'auto':
|
| 29 |
+
return media_type_arg
|
| 30 |
+
|
| 31 |
+
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.webp', '.heic'}
|
| 32 |
+
video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.3gp'}
|
| 33 |
+
|
| 34 |
+
ext = Path(input_path).suffix.lower()
|
| 35 |
+
|
| 36 |
+
if ext in image_extensions:
|
| 37 |
+
return 'image'
|
| 38 |
+
elif ext in video_extensions:
|
| 39 |
+
return 'video'
|
| 40 |
+
|
| 41 |
+
return 'image'
|
| 42 |
+
|
| 43 |
+
def process_image(input_path, output_path, blur_strength, confidence, exclude_center, blur_text_p,
|
| 44 |
+
nsfw_blur, nsfw_blur_type, selective_nsfw, nsfw_threshold=0.7,
|
| 45 |
+
violence_threshold=0.6, blood_threshold=0.5):
|
| 46 |
+
"""Process image with face blur + hate speech + blood detection"""
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
# Load image
|
| 50 |
+
image = cv2.imread(input_path)
|
| 51 |
+
if image is None:
|
| 52 |
+
raise ValueError(f"Could not load image from {input_path}")
|
| 53 |
+
|
| 54 |
+
blur_strength = validate_blur_strength(blur_strength)
|
| 55 |
+
|
| 56 |
+
# Load config
|
| 57 |
+
config_path = os.path.join(os.path.dirname(__file__), 'config_p.yaml')
|
| 58 |
+
config_path = os.path.abspath(config_path)
|
| 59 |
+
|
| 60 |
+
with open(config_path, 'r') as f:
|
| 61 |
+
config = yaml.safe_load(f)
|
| 62 |
+
|
| 63 |
+
# Update blur strength in config
|
| 64 |
+
config['blur']['face']['gaussian_min_kernel'] = blur_strength
|
| 65 |
+
config['blur']['face']['gaussian_max_kernel'] = blur_strength
|
| 66 |
+
config['blur']['face']['mosaic_block_size'] = max(2, blur_strength // 6)
|
| 67 |
+
config['blur']['text']['gaussian_min_kernel'] = blur_strength
|
| 68 |
+
config['blur']['text']['gaussian_max_kernel'] = blur_strength
|
| 69 |
+
config['blur']['text']['mosaic_block_size'] = max(2, blur_strength // 6)
|
| 70 |
+
|
| 71 |
+
# Initialize face detector and blurrer
|
| 72 |
+
from detectors.yolov8_face import YOLOv8Face
|
| 73 |
+
face_detector = YOLOv8Face(conf=confidence)
|
| 74 |
+
face_blurrer = FaceBlurrer(face_detector, config)
|
| 75 |
+
|
| 76 |
+
logger.info(f"Processing: {Path(input_path).name}")
|
| 77 |
+
logger.info(f"Image dimensions: {image.shape[1]}x{image.shape[0]}")
|
| 78 |
+
logger.info(f"Blur strength: {blur_strength}, Confidence: {confidence}")
|
| 79 |
+
logger.info(f"Text blur: {blur_text_p}, NSFW blur: {nsfw_blur}")
|
| 80 |
+
|
| 81 |
+
output = image.copy()
|
| 82 |
+
|
| 83 |
+
# ============================================================
|
| 84 |
+
# STEP 1: BLOOD/NSFW/VIOLENCE DETECTION
|
| 85 |
+
# ============================================================
|
| 86 |
+
if nsfw_blur:
|
| 87 |
+
logger.info("=" * 60)
|
| 88 |
+
logger.info("STEP 1: BLOOD/NSFW/VIOLENCE DETECTION & BLURRING")
|
| 89 |
+
logger.info("=" * 60)
|
| 90 |
+
|
| 91 |
+
nsfw_blurrer = NSFWBlurrer(
|
| 92 |
+
blur_strength=(blur_strength, blur_strength),
|
| 93 |
+
blur_type=nsfw_blur_type,
|
| 94 |
+
blood_threshold=blood_threshold
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
result = nsfw_blurrer.blur_unsafe_content(output, add_warning=True)
|
| 99 |
+
output = result['image']
|
| 100 |
+
if result['analysis']:
|
| 101 |
+
logger.info(f"✓ Analysis: {result['analysis']['flags']}")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.warning(f"NSFW blur failed: {e}", exc_info=True)
|
| 104 |
+
|
| 105 |
+
# ============================================================
|
| 106 |
+
# STEP 2: HATE SPEECH/OFFENSIVE TEXT DETECTION
|
| 107 |
+
# ============================================================
|
| 108 |
+
if blur_text_p:
|
| 109 |
+
logger.info("=" * 60)
|
| 110 |
+
logger.info("STEP 2: HATE SPEECH/OFFENSIVE TEXT DETECTION & BLURRING")
|
| 111 |
+
logger.info("=" * 60)
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
text_blurrer = TextBlurrer(blur_strength=(blur_strength, blur_strength))
|
| 115 |
+
logger.info("TextBlurrer initialized, attempting to blur text...")
|
| 116 |
+
|
| 117 |
+
output = text_blurrer.blur_hate_text(output, confidence)
|
| 118 |
+
logger.info("✓ Text blurred using blur_hate_text")
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.warning(f"Text blur failed: {e}", exc_info=True)
|
| 121 |
+
|
| 122 |
+
# ============================================================
|
| 123 |
+
# STEP 3: FACE DETECTION & BLURRING
|
| 124 |
+
# ============================================================
|
| 125 |
+
logger.info("=" * 60)
|
| 126 |
+
logger.info("STEP 3: FACE DETECTION & BLURRING")
|
| 127 |
+
logger.info("=" * 60)
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
output = face_blurrer.blur_faces(output)
|
| 131 |
+
logger.info("✓ Faces blurred successfully")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logger.warning(f"Face blur failed: {e}", exc_info=True)
|
| 134 |
+
|
| 135 |
+
# Save result
|
| 136 |
+
cv2.imwrite(output_path, output)
|
| 137 |
+
logger.info("=" * 60)
|
| 138 |
+
logger.info(f"✓ Successfully saved blurred image to: {output_path}")
|
| 139 |
+
logger.info("=" * 60)
|
| 140 |
+
return True
|
| 141 |
+
|
| 142 |
+
except Exception as e:
|
| 143 |
+
logger.error(f"✗ Error processing image: {e}", exc_info=True)
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
def process_video(input_path, output_path, blur_strength, confidence, exclude_center, blur_text,
|
| 147 |
+
nsfw_blur, nsfw_blur_type, selective_nsfw, nsfw_threshold=0.7,
|
| 148 |
+
violence_threshold=0.6, blood_threshold=0.5):
|
| 149 |
+
"""Process video frame by frame"""
|
| 150 |
+
|
| 151 |
+
cap = None
|
| 152 |
+
out = None
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
# Open video
|
| 156 |
+
cap = cv2.VideoCapture(input_path)
|
| 157 |
+
if not cap.isOpened():
|
| 158 |
+
raise ValueError(f"Could not open video: {input_path}")
|
| 159 |
+
|
| 160 |
+
# Get video properties
|
| 161 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 162 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 163 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 164 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 165 |
+
|
| 166 |
+
if fps <= 0 or total_frames <= 0:
|
| 167 |
+
raise ValueError("Invalid video properties")
|
| 168 |
+
|
| 169 |
+
blur_strength = validate_blur_strength(blur_strength)
|
| 170 |
+
|
| 171 |
+
# Load config
|
| 172 |
+
config_path = os.path.join(os.path.dirname(__file__), 'config_p.yaml')
|
| 173 |
+
config_path = os.path.abspath(config_path)
|
| 174 |
+
|
| 175 |
+
with open(config_path, 'r') as f:
|
| 176 |
+
config = yaml.safe_load(f)
|
| 177 |
+
|
| 178 |
+
config['blur']['face']['gaussian_min_kernel'] = blur_strength
|
| 179 |
+
config['blur']['face']['gaussian_max_kernel'] = blur_strength
|
| 180 |
+
config['blur']['face']['mosaic_block_size'] = max(2, blur_strength // 6)
|
| 181 |
+
config['blur']['text']['gaussian_min_kernel'] = blur_strength
|
| 182 |
+
config['blur']['text']['gaussian_max_kernel'] = blur_strength
|
| 183 |
+
config['blur']['text']['mosaic_block_size'] = max(2, blur_strength // 6)
|
| 184 |
+
|
| 185 |
+
# Initialize video writer
|
| 186 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 187 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 188 |
+
if not out.isOpened():
|
| 189 |
+
raise ValueError(f"Could not create output video: {output_path}")
|
| 190 |
+
|
| 191 |
+
# Initialize blurrers
|
| 192 |
+
from detectors.yolov8_face import YOLOv8Face
|
| 193 |
+
face_detector = YOLOv8Face(conf=confidence)
|
| 194 |
+
face_blurrer = FaceBlurrer(face_detector, config)
|
| 195 |
+
|
| 196 |
+
text_blurrer = None
|
| 197 |
+
if blur_text:
|
| 198 |
+
text_blurrer = TextBlurrer(blur_strength=(blur_strength, blur_strength))
|
| 199 |
+
|
| 200 |
+
nsfw_blurrer = None
|
| 201 |
+
if nsfw_blur:
|
| 202 |
+
nsfw_blurrer = NSFWBlurrer(
|
| 203 |
+
blur_strength=(blur_strength, blur_strength),
|
| 204 |
+
blur_type=nsfw_blur_type,
|
| 205 |
+
blood_threshold=blood_threshold
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
logger.info("=" * 60)
|
| 209 |
+
logger.info(f"Processing video: {Path(input_path).name}")
|
| 210 |
+
logger.info(f"Video: {width}x{height}, {fps:.2f} FPS, {total_frames} frames")
|
| 211 |
+
logger.info(f"Blur strength: {blur_strength}, Confidence: {confidence}")
|
| 212 |
+
logger.info(f"Text blur: {'enabled' if blur_text else 'disabled'}")
|
| 213 |
+
logger.info(f"NSFW blur: {'enabled' if nsfw_blur else 'disabled'}")
|
| 214 |
+
logger.info("=" * 60)
|
| 215 |
+
|
| 216 |
+
# Process frames
|
| 217 |
+
frame_count = 0
|
| 218 |
+
text_blur_interval = 15
|
| 219 |
+
nsfw_check_interval = 30
|
| 220 |
+
|
| 221 |
+
while True:
|
| 222 |
+
ret, frame = cap.read()
|
| 223 |
+
if not ret:
|
| 224 |
+
break
|
| 225 |
+
|
| 226 |
+
processed_frame = frame.copy()
|
| 227 |
+
|
| 228 |
+
# Apply NSFW/blood blur
|
| 229 |
+
if nsfw_blur and nsfw_blurrer:
|
| 230 |
+
if frame_count % nsfw_check_interval == 0:
|
| 231 |
+
try:
|
| 232 |
+
result = nsfw_blurrer.blur_unsafe_content(processed_frame, add_warning=False)
|
| 233 |
+
processed_frame = result['image']
|
| 234 |
+
except:
|
| 235 |
+
pass
|
| 236 |
+
|
| 237 |
+
# Apply text blur
|
| 238 |
+
if blur_text and text_blurrer and frame_count % text_blur_interval == 0:
|
| 239 |
+
try:
|
| 240 |
+
processed_frame = text_blurrer.blur_hate_text(processed_frame, confidence)
|
| 241 |
+
except:
|
| 242 |
+
pass
|
| 243 |
+
|
| 244 |
+
# Apply face blur
|
| 245 |
+
try:
|
| 246 |
+
processed_frame = face_blurrer.blur_faces(processed_frame)
|
| 247 |
+
except:
|
| 248 |
+
pass
|
| 249 |
+
|
| 250 |
+
out.write(processed_frame)
|
| 251 |
+
|
| 252 |
+
frame_count += 1
|
| 253 |
+
if frame_count % 30 == 0 or frame_count == total_frames:
|
| 254 |
+
progress = (frame_count / total_frames) * 100
|
| 255 |
+
logger.info(f"Progress: {progress:.1f}% ({frame_count}/{total_frames} frames)")
|
| 256 |
+
|
| 257 |
+
logger.info("=" * 60)
|
| 258 |
+
logger.info(f"✓ Successfully processed {frame_count} frames")
|
| 259 |
+
logger.info(f"✓ Saved blurred video to: {output_path}")
|
| 260 |
+
logger.info("=" * 60)
|
| 261 |
+
return True
|
| 262 |
+
|
| 263 |
+
except Exception as e:
|
| 264 |
+
logger.error(f"✗ Error processing video: {e}", exc_info=True)
|
| 265 |
+
return False
|
| 266 |
+
|
| 267 |
+
finally:
|
| 268 |
+
if cap:
|
| 269 |
+
cap.release()
|
| 270 |
+
if out:
|
| 271 |
+
out.release()
|
| 272 |
+
|
| 273 |
+
def main():
|
| 274 |
+
parser = argparse.ArgumentParser(
|
| 275 |
+
description='Smart Content Moderation - Blur Faces, Hate Speech, Blood/NSFW'
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
parser.add_argument('--input', '-i', required=True, help='Input file path')
|
| 279 |
+
parser.add_argument('--output', '-o', help='Output file path')
|
| 280 |
+
parser.add_argument('--media-type', '-t', choices=['image', 'video', 'auto'], default='auto')
|
| 281 |
+
parser.add_argument('--blur-strength', '-b', type=int, default=51, help='Blur strength (3-151)')
|
| 282 |
+
parser.add_argument('--confidence', '-c', type=float, default=0.5, help='Detection confidence (0.1-1.0)')
|
| 283 |
+
parser.add_argument('--exclude-center', action='store_true', help='Exclude center from face blur')
|
| 284 |
+
parser.add_argument('--blur-text', action='store_true', help='Enable hate speech text blurring')
|
| 285 |
+
parser.add_argument('--nsfw-blur', action='store_true', help='Enable blood/NSFW blurring')
|
| 286 |
+
parser.add_argument('--nsfw-blur-type', choices=['gaussian', 'pixelate', 'mosaic', 'black'], default='gaussian')
|
| 287 |
+
parser.add_argument('--nsfw-threshold', type=float, default=0.7, help='NSFW threshold')
|
| 288 |
+
parser.add_argument('--violence-threshold', type=float, default=0.6, help='Violence threshold')
|
| 289 |
+
parser.add_argument('--blood-threshold', type=float, default=0.5, help='Blood detection threshold')
|
| 290 |
+
parser.add_argument('--selective-nsfw', action='store_true', help='Selective NSFW blur')
|
| 291 |
+
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose logging')
|
| 292 |
+
|
| 293 |
+
args = parser.parse_args()
|
| 294 |
+
|
| 295 |
+
if args.verbose:
|
| 296 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
if not os.path.exists(args.input):
|
| 300 |
+
raise FileNotFoundError(f"Input file '{args.input}' does not exist")
|
| 301 |
+
|
| 302 |
+
media_type = get_media_type(args.input, args.media_type)
|
| 303 |
+
logger.info(f"Detected media type: {media_type}")
|
| 304 |
+
|
| 305 |
+
if not args.output:
|
| 306 |
+
input_path = Path(args.input)
|
| 307 |
+
output_dir = Path("data/output/images" if media_type == 'image' else "data/output/videos")
|
| 308 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 309 |
+
args.output = str(output_dir / f"{input_path.stem}_blurred{input_path.suffix}")
|
| 310 |
+
|
| 311 |
+
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
| 312 |
+
|
| 313 |
+
if media_type == 'image':
|
| 314 |
+
success = process_image(
|
| 315 |
+
args.input, args.output, args.blur_strength,
|
| 316 |
+
args.confidence, args.exclude_center, args.blur_text,
|
| 317 |
+
args.nsfw_blur, args.nsfw_blur_type, args.selective_nsfw,
|
| 318 |
+
args.nsfw_threshold, args.violence_threshold, args.blood_threshold
|
| 319 |
+
)
|
| 320 |
+
else:
|
| 321 |
+
success = process_video(
|
| 322 |
+
args.input, args.output, args.blur_strength,
|
| 323 |
+
args.confidence, args.exclude_center, args.blur_text,
|
| 324 |
+
args.nsfw_blur, args.nsfw_blur_type, args.selective_nsfw,
|
| 325 |
+
args.nsfw_threshold, args.violence_threshold, args.blood_threshold
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
sys.exit(0 if success else 1)
|
| 329 |
+
|
| 330 |
+
except Exception as e:
|
| 331 |
+
logger.error(f"Fatal error: {e}", exc_info=True)
|
| 332 |
+
sys.exit(1)
|
| 333 |
+
|
| 334 |
+
if __name__ == "__main__":
|
| 335 |
+
main()
|
social_moderation/__pycache__/main.cpython-310.pyc
ADDED
|
Binary file (8.68 kB). View file
|
|
|
social_moderation/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (17.7 kB). View file
|
|
|
social_moderation/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (16.2 kB). View file
|
|
|
social_moderation/app.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio Web UI for Smart Content Moderation System
|
| 3 |
+
Face Blur + Hate Speech + Blood/NSFW Detection
|
| 4 |
+
AUTO-LAUNCHES BROWSER + AUTO-SAVES TO OUTPUT FOLDER
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import tempfile
|
| 9 |
+
import os
|
| 10 |
+
import webbrowser
|
| 11 |
+
import time
|
| 12 |
+
import shutil
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import cv2
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from main import process_image, process_video, validate_blur_strength, get_media_type
|
| 17 |
+
|
| 18 |
+
# Create output folder
|
| 19 |
+
OUTPUT_DIR = Path("social_moderation/data/output")
|
| 20 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 21 |
+
|
| 22 |
+
def blur_media(input_files, blur_strength, confidence, blur_text, nsfw_blur,
|
| 23 |
+
blood_threshold, nsfw_blur_type, progress=gr.Progress()):
|
| 24 |
+
"""Process media files with complete moderation and auto-save."""
|
| 25 |
+
|
| 26 |
+
results = []
|
| 27 |
+
total = len(input_files) if input_files else 0
|
| 28 |
+
|
| 29 |
+
if total == 0:
|
| 30 |
+
return None, "❌ No files uploaded"
|
| 31 |
+
|
| 32 |
+
blur_strength = validate_blur_strength(blur_strength)
|
| 33 |
+
|
| 34 |
+
for idx, input_file in enumerate(input_files):
|
| 35 |
+
input_path = input_file.name
|
| 36 |
+
filename = Path(input_path).name
|
| 37 |
+
|
| 38 |
+
progress((idx / total), desc=f"Processing {filename}...")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
media_type = get_media_type(input_path, 'auto')
|
| 42 |
+
|
| 43 |
+
# Create output path with timestamp
|
| 44 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 45 |
+
name_without_ext = Path(filename).stem
|
| 46 |
+
file_ext = Path(filename).suffix
|
| 47 |
+
|
| 48 |
+
# Save to appropriate subfolder
|
| 49 |
+
if media_type == 'image':
|
| 50 |
+
output_subdir = OUTPUT_DIR / "images"
|
| 51 |
+
else:
|
| 52 |
+
output_subdir = OUTPUT_DIR / "videos"
|
| 53 |
+
|
| 54 |
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
| 55 |
+
|
| 56 |
+
# Generate output filename with timestamp
|
| 57 |
+
output_filename = f"{name_without_ext}_{timestamp}{file_ext}"
|
| 58 |
+
output_path = output_subdir / output_filename
|
| 59 |
+
|
| 60 |
+
# Process the file
|
| 61 |
+
success = process_image(
|
| 62 |
+
input_path, str(output_path), blur_strength, confidence,
|
| 63 |
+
False, blur_text, nsfw_blur, nsfw_blur_type, False,
|
| 64 |
+
blood_threshold=blood_threshold
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
if success:
|
| 68 |
+
results.append({
|
| 69 |
+
'path': str(output_path),
|
| 70 |
+
'filename': output_filename,
|
| 71 |
+
'type': media_type
|
| 72 |
+
})
|
| 73 |
+
progress((idx + 1) / total, desc=f"✅ Saved: {output_filename}")
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
progress((idx / total), desc=f"❌ Error: {e}")
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
if not results:
|
| 80 |
+
return None, "❌ Processing failed"
|
| 81 |
+
|
| 82 |
+
# Prepare output and status
|
| 83 |
+
output_path_str = results[0]['path']
|
| 84 |
+
status = f"✅ Successfully processed {len(results)} file(s)\n\n"
|
| 85 |
+
status += "📁 **Saved to:**\n"
|
| 86 |
+
|
| 87 |
+
for result in results:
|
| 88 |
+
status += f" • {result['filename']}\n"
|
| 89 |
+
if result['type'] == 'image':
|
| 90 |
+
status += f" 📍 `{OUTPUT_DIR / 'images' / result['filename']}`\n"
|
| 91 |
+
else:
|
| 92 |
+
status += f" 📍 `{OUTPUT_DIR / 'videos' / result['filename']}`\n"
|
| 93 |
+
|
| 94 |
+
status += "\n**Features Applied:**"
|
| 95 |
+
status += "\n 👤 Face blurring"
|
| 96 |
+
status += "\n 🔤 Hate speech detection"
|
| 97 |
+
status += "\n 🩸 Blood/NSFW detection"
|
| 98 |
+
|
| 99 |
+
return output_path_str, status
|
| 100 |
+
|
| 101 |
+
# Create Gradio Interface
|
| 102 |
+
with gr.Blocks(title="Smart Content Moderation", theme=gr.themes.Soft()) as demo:
|
| 103 |
+
gr.Markdown("# 🛡️ Smart Content Moderation System")
|
| 104 |
+
gr.Markdown("**Blur faces • Detect hate speech • Blur blood/NSFW content**")
|
| 105 |
+
gr.Markdown(f"📁 **Output Folder:** `{OUTPUT_DIR}`")
|
| 106 |
+
|
| 107 |
+
with gr.Row():
|
| 108 |
+
with gr.Column():
|
| 109 |
+
gr.Markdown("### 📤 Input")
|
| 110 |
+
input_files = gr.File(label="Upload Image/Video", file_count="multiple", file_types=["image", "video"])
|
| 111 |
+
|
| 112 |
+
with gr.Column():
|
| 113 |
+
gr.Markdown("### 📥 Output")
|
| 114 |
+
output_file = gr.File(label="Blurred Output (Download)")
|
| 115 |
+
status_text = gr.Textbox(label="Status & Save Location", interactive=False, lines=6)
|
| 116 |
+
|
| 117 |
+
gr.Markdown("---")
|
| 118 |
+
|
| 119 |
+
with gr.Row():
|
| 120 |
+
with gr.Column():
|
| 121 |
+
gr.Markdown("### ⚙️ Blur Settings")
|
| 122 |
+
blur_strength = gr.Slider(minimum=3, maximum=151, value=51, step=2, label="👤 Blur Strength")
|
| 123 |
+
confidence = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence")
|
| 124 |
+
|
| 125 |
+
with gr.Column():
|
| 126 |
+
gr.Markdown("### 🔤 Hate Speech Settings")
|
| 127 |
+
blur_text = gr.Checkbox(label="Enable Hate Speech Detection & Blur", value=True)
|
| 128 |
+
gr.Markdown("*Detects: 'Hate You', profanity, offensive text*")
|
| 129 |
+
|
| 130 |
+
with gr.Column():
|
| 131 |
+
gr.Markdown("### 🩸 Blood/NSFW Settings")
|
| 132 |
+
nsfw_blur = gr.Checkbox(label="Enable Blood/NSFW Detection & Blur", value=True)
|
| 133 |
+
blood_threshold = gr.Slider(
|
| 134 |
+
minimum=0.0, maximum=1.0, value=0.3, step=0.1,
|
| 135 |
+
label="Blood Sensitivity (lower = more sensitive)"
|
| 136 |
+
)
|
| 137 |
+
nsfw_blur_type = gr.Dropdown(
|
| 138 |
+
choices=["gaussian", "pixelate", "mosaic", "black"],
|
| 139 |
+
value="gaussian",
|
| 140 |
+
label="Blur Type"
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
gr.Markdown("---")
|
| 144 |
+
|
| 145 |
+
with gr.Row():
|
| 146 |
+
process_btn = gr.Button("🎬 Process & Save", variant="primary", size="lg")
|
| 147 |
+
|
| 148 |
+
# Connect button
|
| 149 |
+
process_btn.click(
|
| 150 |
+
blur_media,
|
| 151 |
+
inputs=[
|
| 152 |
+
input_files, blur_strength, confidence, blur_text, nsfw_blur,
|
| 153 |
+
blood_threshold, nsfw_blur_type
|
| 154 |
+
],
|
| 155 |
+
outputs=[output_file, status_text]
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
gr.Markdown("""
|
| 159 |
+
---
|
| 160 |
+
## ✨ Features:
|
| 161 |
+
|
| 162 |
+
### 👤 Face Blurring
|
| 163 |
+
- YOLOv8 face detection with 99% accuracy
|
| 164 |
+
- Adaptive Gaussian blur based on face size
|
| 165 |
+
- Works on images and videos
|
| 166 |
+
|
| 167 |
+
### 🔤 Hate Speech Detection
|
| 168 |
+
- EasyOCR text detection (45+ languages)
|
| 169 |
+
- Rule-based toxicity detection
|
| 170 |
+
- Detects offensive words, hate patterns
|
| 171 |
+
|
| 172 |
+
### 🩸 Blood/NSFW Content
|
| 173 |
+
- HSV color-based blood detection
|
| 174 |
+
- Sensitive to 5%+ red pixels
|
| 175 |
+
- Multiple blur types available
|
| 176 |
+
|
| 177 |
+
## 🚀 Usage:
|
| 178 |
+
|
| 179 |
+
1. Upload image or video
|
| 180 |
+
2. Enable desired features
|
| 181 |
+
3. Adjust sensitivity sliders
|
| 182 |
+
4. Click "Process & Save"
|
| 183 |
+
5. Download or find in output folder
|
| 184 |
+
|
| 185 |
+
## 📁 Auto-Save:
|
| 186 |
+
|
| 187 |
+
- Images → `social_moderation/data/output/images/`
|
| 188 |
+
- Videos → `social_moderation/data/output/videos/`
|
| 189 |
+
- Timestamped filenames for organization
|
| 190 |
+
|
| 191 |
+
## 📊 Recommendations:
|
| 192 |
+
|
| 193 |
+
- **Blur Strength**: 51 (default) = good balance
|
| 194 |
+
- **Confidence**: 0.5 = balanced detection
|
| 195 |
+
- **Blood Threshold**: 0.3 = very sensitive
|
| 196 |
+
""")
|
| 197 |
+
|
| 198 |
+
if __name__ == "__main__":
|
| 199 |
+
# Auto-launch browser
|
| 200 |
+
def open_browser():
|
| 201 |
+
time.sleep(2)
|
| 202 |
+
webbrowser.open("http://localhost:7860")
|
| 203 |
+
|
| 204 |
+
import threading
|
| 205 |
+
thread = threading.Thread(target=open_browser, daemon=True)
|
| 206 |
+
thread.start()
|
| 207 |
+
|
| 208 |
+
# Launch Gradio app
|
| 209 |
+
demo.launch(
|
| 210 |
+
server_name="0.0.0.0",
|
| 211 |
+
server_port=7860,
|
| 212 |
+
share=False,
|
| 213 |
+
show_error=True
|
| 214 |
+
)
|
social_moderation/config.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "cuda" # "cuda" or "cpu"
|
| 2 |
+
frame_skip: 3 # detect every N frames
|
| 3 |
+
debug: false # overlay boxes if true
|
| 4 |
+
|
| 5 |
+
face_detector:
|
| 6 |
+
type: "yolov8_standard" # options: yolov8_standard, yolov8, opencv
|
| 7 |
+
model_path: "weights/yolov8n-face.pt"
|
| 8 |
+
conf_threshold: 0.35
|
| 9 |
+
|
| 10 |
+
text_detector:
|
| 11 |
+
ocr_languages: ["en"]
|
| 12 |
+
ocr_conf_threshold: 0.45
|
| 13 |
+
preprocess:
|
| 14 |
+
use_clahe: true
|
| 15 |
+
sharpen: true
|
| 16 |
+
denoise: true
|
| 17 |
+
|
| 18 |
+
blur:
|
| 19 |
+
method: "gaussian" # gaussian | mosaic
|
| 20 |
+
gaussian_min_kernel: 15
|
| 21 |
+
gaussian_max_kernel: 121
|
| 22 |
+
mosaic_block_size_min: 6
|
| 23 |
+
|
| 24 |
+
text_blur:
|
| 25 |
+
padding_x_ratio: 0.10
|
| 26 |
+
padding_y_ratio: 0.15
|
| 27 |
+
sentiment_threshold: -0.3
|
social_moderation/config_p.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Smart Content Moderation Configuration
|
| 2 |
+
device: "cuda" # "cuda" or "cpu"
|
| 3 |
+
frame_skip: 3
|
| 4 |
+
debug: false
|
| 5 |
+
|
| 6 |
+
# Face Detection
|
| 7 |
+
face_detector:
|
| 8 |
+
type: "yolov8_standard"
|
| 9 |
+
model_path: "weights/yolov8n-face.pt"
|
| 10 |
+
conf_threshold: 0.35
|
| 11 |
+
bbox_adjustment:
|
| 12 |
+
padding_ratio: 0.08
|
| 13 |
+
neck_extension: 0.35
|
| 14 |
+
forehead_padding: 0.12
|
| 15 |
+
motion_smoothing:
|
| 16 |
+
enabled: true
|
| 17 |
+
iou_threshold: 0.5
|
| 18 |
+
smoothing_window: 5
|
| 19 |
+
max_missing_frames: 30
|
| 20 |
+
|
| 21 |
+
# Text Detection
|
| 22 |
+
text_detector:
|
| 23 |
+
ocr_languages: ["en"]
|
| 24 |
+
ocr_conf_threshold: 0.45
|
| 25 |
+
preprocessing:
|
| 26 |
+
use_clahe: true
|
| 27 |
+
sharpen: true
|
| 28 |
+
denoise: true
|
| 29 |
+
|
| 30 |
+
# NSFW/Blood/Violence Detection
|
| 31 |
+
nsfw_detection:
|
| 32 |
+
nsfw_threshold: 0.7 # NSFW content threshold
|
| 33 |
+
violence_threshold: 0.6 # Violence content threshold
|
| 34 |
+
blood_threshold: 0.5 # Blood/gore threshold
|
| 35 |
+
blood_percentage_threshold: 8.0 # Red pixel percentage for blood detection
|
| 36 |
+
|
| 37 |
+
# Blur Settings
|
| 38 |
+
blur:
|
| 39 |
+
face:
|
| 40 |
+
method: "gaussian"
|
| 41 |
+
adaptive_intensity: true
|
| 42 |
+
gaussian_min_kernel: 51
|
| 43 |
+
gaussian_max_kernel: 121
|
| 44 |
+
mosaic_block_size: 8
|
| 45 |
+
|
| 46 |
+
text:
|
| 47 |
+
method: "gaussian"
|
| 48 |
+
gaussian_min_kernel: 51
|
| 49 |
+
gaussian_max_kernel: 121
|
| 50 |
+
mosaic_block_size: 8
|
| 51 |
+
|
| 52 |
+
# System Settings
|
| 53 |
+
system:
|
| 54 |
+
benchmark_logging: true
|
social_moderation/data/output/images/1-s2.0-S0957417420305492-gr2_blurred.jpg
ADDED
|
social_moderation/data/output/images/1000_F_1566391003_PcPMXVvR99sK3Rf9YxJaChFZRhD7XuAx_blurred.jpg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/3a8ecb09-8121-4764-bdd6-cb4be203efa7_blurred.jpg
ADDED
|
social_moderation/data/output/images/Screenshot 2025-10-12 162027_blurred.png
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/Screenshot 2025-10-12 164637_blurred.png
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 1.42.43 AM (1)_blurred.jpeg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.22 PM_blurred.jpeg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.35 PM_blurred.jpeg
ADDED
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.50.50 PM_blurred.jpeg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.33 PM_blurred.jpeg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.37 PM_blurred.jpeg
ADDED
|
social_moderation/data/output/images/WhatsApp Image 2025-10-12 at 4.51.40 PM_blurred.jpeg
ADDED
|
social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred.png
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/WordItOut-word-cloud-3116414-e1610584584107_blurred_blurred.png
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/stock-photo-hand-of-man-injured-wound-from-accident-and-blood-bleeding-on-white-background-insurance-concept-519504064_blurred.jpg
ADDED
|
Git LFS Details
|
social_moderation/data/output/images/test_faces_blurred.jpeg
ADDED
|
Git LFS Details
|
social_moderation/data/output/videos/WhatsApp Video 2025-09-21 at 11.40.51 AM_blurred.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba686b173e055c913b2e491b2b4f4c857d2fb615969e6d7761032289083b665b
|
| 3 |
+
size 2637444
|
social_moderation/data/output/videos/WhatsApp Video 2025-09-21 at 11.40.59 AM_blurred.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c59e6897fa499bd10f7fad8589805283530dfca13d2f2f1a70fa855641f9739
|
| 3 |
+
size 1966808
|
social_moderation/demo_app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from social_moderation.pipeline.processor import Processor
|
| 3 |
+
import tempfile
|
| 4 |
+
|
| 5 |
+
st.title("Social Moderation Demo")
|
| 6 |
+
video = st.file_uploader("Upload a video to test", type=["mp4", "mov", "avi"])
|
| 7 |
+
if st.button("Run Moderation"):
|
| 8 |
+
proc = Processor(config_path="config.yaml")
|
| 9 |
+
tmp_in = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 10 |
+
tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 11 |
+
with open(tmp_in, "wb") as f:
|
| 12 |
+
f.write(video.getvalue())
|
| 13 |
+
proc.process_video(tmp_in, tmp_out)
|
| 14 |
+
st.video(tmp_out)
|
| 15 |
+
st.success("✅ Done! You can download the processed video below:")
|
| 16 |
+
with open(tmp_out, "rb") as f:
|
| 17 |
+
st.download_button("Download Processed Video", f, file_name="blurred_output.mp4")
|
social_moderation/demo_app_p.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Streamlit Demo App for HARI Content Moderation System
|
| 3 |
+
Interactive UI for testing moderation on uploaded videos/images
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import cv2
|
| 8 |
+
import tempfile
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
st.set_page_config(
|
| 17 |
+
page_title="HARI Content Moderation",
|
| 18 |
+
page_icon="🛡️",
|
| 19 |
+
layout="wide"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
st.title("🛡️ HARI Content Moderation System")
|
| 24 |
+
st.markdown(
|
| 25 |
+
"Upload videos or images to automatically blur faces and toxic text. "
|
| 26 |
+
"Powered by YOLOv8, EasyOCR, and Detoxify."
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Sidebar configuration
|
| 30 |
+
st.sidebar.header("⚙️ Settings")
|
| 31 |
+
|
| 32 |
+
# Moderation toggles
|
| 33 |
+
enable_face_blur = st.sidebar.checkbox("🙂 Blur Faces", value=True)
|
| 34 |
+
enable_text_blur = st.sidebar.checkbox("💬 Blur Toxic Text", value=True)
|
| 35 |
+
|
| 36 |
+
# Advanced settings
|
| 37 |
+
with st.sidebar.expander("🔧 Advanced Settings"):
|
| 38 |
+
blur_method = st.selectbox("Blur Method", ["gaussian", "mosaic", "pixelate"])
|
| 39 |
+
frame_skip = st.slider("Frame Skip (process every Nth frame)", 1, 10, 2)
|
| 40 |
+
toxicity_threshold = st.slider("Toxicity Threshold", 0.0, 1.0, 0.7, 0.05)
|
| 41 |
+
motion_smoothing = st.checkbox("Motion Smoothing (video)", value=True)
|
| 42 |
+
|
| 43 |
+
# File upload
|
| 44 |
+
uploaded_file = st.file_uploader(
|
| 45 |
+
"📤 Upload Video/Image",
|
| 46 |
+
type=['jpg', 'jpeg', 'png', 'mp4', 'avi', 'mov']
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if uploaded_file is not None:
|
| 50 |
+
# Save uploaded file
|
| 51 |
+
suffix = Path(uploaded_file.name).suffix
|
| 52 |
+
|
| 53 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
|
| 54 |
+
tmp_in.write(uploaded_file.read())
|
| 55 |
+
input_path = tmp_in.name
|
| 56 |
+
|
| 57 |
+
# Display original
|
| 58 |
+
col1, col2 = st.columns(2)
|
| 59 |
+
|
| 60 |
+
with col1:
|
| 61 |
+
st.subheader("📥 Original")
|
| 62 |
+
|
| 63 |
+
if uploaded_file.type.startswith('image'):
|
| 64 |
+
st.image(input_path, use_container_width=True)
|
| 65 |
+
else:
|
| 66 |
+
st.video(input_path)
|
| 67 |
+
|
| 68 |
+
# Process button
|
| 69 |
+
if st.button("🚀 Run Moderation", type="primary"):
|
| 70 |
+
with col2:
|
| 71 |
+
st.subheader("📤 Moderated")
|
| 72 |
+
|
| 73 |
+
with st.spinner("Processing... This may take a moment."):
|
| 74 |
+
try:
|
| 75 |
+
# Initialize processor
|
| 76 |
+
from social_moderation.pipeline.processor import Processor
|
| 77 |
+
|
| 78 |
+
processor = Processor(config_path="config.yaml")
|
| 79 |
+
|
| 80 |
+
# Update config with UI settings
|
| 81 |
+
processor.config["blur"]["face"]["method"] = blur_method
|
| 82 |
+
processor.config["blur"]["text"]["method"] = blur_method
|
| 83 |
+
processor.config["system"]["frame_skip"] = frame_skip
|
| 84 |
+
processor.config["toxicity"]["threshold"] = toxicity_threshold
|
| 85 |
+
processor.config["face_detector"]["motion_smoothing"]["enabled"] = motion_smoothing
|
| 86 |
+
|
| 87 |
+
# Process
|
| 88 |
+
output_path = input_path.replace(suffix, f"_moderated{suffix}")
|
| 89 |
+
|
| 90 |
+
if uploaded_file.type.startswith('image'):
|
| 91 |
+
# Image processing
|
| 92 |
+
image = cv2.imread(input_path)
|
| 93 |
+
|
| 94 |
+
if enable_face_blur:
|
| 95 |
+
image = processor.face_blurrer.blur_faces(image)
|
| 96 |
+
|
| 97 |
+
if enable_text_blur:
|
| 98 |
+
image = processor.text_blurrer.blur_toxic_text(image)
|
| 99 |
+
|
| 100 |
+
cv2.imwrite(output_path, image)
|
| 101 |
+
st.image(output_path, use_container_width=True)
|
| 102 |
+
|
| 103 |
+
else:
|
| 104 |
+
# Video processing
|
| 105 |
+
processor.process_video(input_path, output_path)
|
| 106 |
+
st.video(output_path)
|
| 107 |
+
|
| 108 |
+
st.success("✅ Processing complete!")
|
| 109 |
+
|
| 110 |
+
# Download button
|
| 111 |
+
with open(output_path, 'rb') as f:
|
| 112 |
+
st.download_button(
|
| 113 |
+
label="💾 Download Moderated File",
|
| 114 |
+
data=f,
|
| 115 |
+
file_name=f"moderated_{uploaded_file.name}",
|
| 116 |
+
mime=uploaded_file.type
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
st.error(f"❌ Processing failed: {str(e)}")
|
| 121 |
+
logger.exception("Processing error")
|
| 122 |
+
|
| 123 |
+
# Footer
|
| 124 |
+
st.sidebar.markdown("---")
|
| 125 |
+
st.sidebar.markdown("### 📚 About")
|
| 126 |
+
st.sidebar.info(
|
| 127 |
+
"HARI Content Moderation System uses state-of-the-art AI models to "
|
| 128 |
+
"automatically detect and blur faces and toxic text in media content."
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if __name__ == '__main__':
|
| 132 |
+
main()
|
social_moderation/detectors/__pycache__/hatebert_detector.cpython-311.pyc
ADDED
|
Binary file (5.03 kB). View file
|
|
|
social_moderation/detectors/__pycache__/nsfw_detector.cpython-310.pyc
ADDED
|
Binary file (5.67 kB). View file
|
|
|
social_moderation/detectors/__pycache__/nsfw_detector.cpython-311.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
social_moderation/detectors/__pycache__/nsfw_detector.cpython-313.pyc
ADDED
|
Binary file (14.8 kB). View file
|
|
|
social_moderation/detectors/__pycache__/offensive_word_detector.cpython-311.pyc
ADDED
|
Binary file (7.29 kB). View file
|
|
|
social_moderation/detectors/__pycache__/text_detector.cpython-310.pyc
ADDED
|
Binary file (2.52 kB). View file
|
|
|
social_moderation/detectors/__pycache__/text_detector.cpython-311.pyc
ADDED
|
Binary file (12.4 kB). View file
|
|
|
social_moderation/detectors/__pycache__/yolov8_face.cpython-310.pyc
ADDED
|
Binary file (2.46 kB). View file
|
|
|
social_moderation/detectors/__pycache__/yolov8_face.cpython-311.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
social_moderation/detectors/__pycache__/yolov8_standard.cpython-311.pyc
ADDED
|
Binary file (2.25 kB). View file
|
|
|
social_moderation/detectors/__pycache__/yolov8_standard.cpython-313.pyc
ADDED
|
Binary file (2.07 kB). View file
|
|
|
social_moderation/detectors/hatebert_detector.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# detectors/hatebert_detector.py
|
| 2 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 3 |
+
import torch
|
| 4 |
+
import easyocr
|
| 5 |
+
import cv2
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
class HateBERTDetector:
|
| 9 |
+
def __init__(self, confidence_threshold=0.7):
|
| 10 |
+
"""
|
| 11 |
+
Initialize hate speech detection model.
|
| 12 |
+
:param confidence_threshold: minimum confidence for hate speech classification
|
| 13 |
+
"""
|
| 14 |
+
self.confidence_threshold = confidence_threshold
|
| 15 |
+
|
| 16 |
+
# Load fine-tuned hate speech detection model
|
| 17 |
+
print("Loading hate speech detection model...")
|
| 18 |
+
# Using a properly fine-tuned model for hate speech detection
|
| 19 |
+
self.model_name = "Hate-speech-CNERG/dehatebert-mono-english"
|
| 20 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 21 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
|
| 22 |
+
self.model.eval()
|
| 23 |
+
|
| 24 |
+
# Initialize OCR reader
|
| 25 |
+
print("Loading EasyOCR...")
|
| 26 |
+
self.reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
| 27 |
+
print("HateBERT and OCR loaded successfully.")
|
| 28 |
+
|
| 29 |
+
def detect_text_regions(self, image):
|
| 30 |
+
"""
|
| 31 |
+
Detect text regions in an image using OCR.
|
| 32 |
+
:param image: input image (BGR format)
|
| 33 |
+
:return: list of (bbox, text) tuples
|
| 34 |
+
"""
|
| 35 |
+
# Convert BGR to RGB for EasyOCR
|
| 36 |
+
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 37 |
+
|
| 38 |
+
# Detect text
|
| 39 |
+
results = self.reader.readtext(rgb_image)
|
| 40 |
+
|
| 41 |
+
text_regions = []
|
| 42 |
+
for (bbox, text, prob) in results:
|
| 43 |
+
# Convert bbox to [x1, y1, x2, y2] format
|
| 44 |
+
bbox_array = np.array(bbox)
|
| 45 |
+
x1 = int(bbox_array[:, 0].min())
|
| 46 |
+
y1 = int(bbox_array[:, 1].min())
|
| 47 |
+
x2 = int(bbox_array[:, 0].max())
|
| 48 |
+
y2 = int(bbox_array[:, 1].max())
|
| 49 |
+
|
| 50 |
+
text_regions.append(([x1, y1, x2, y2], text, prob))
|
| 51 |
+
|
| 52 |
+
return text_regions
|
| 53 |
+
|
| 54 |
+
def is_hate_speech(self, text):
|
| 55 |
+
"""
|
| 56 |
+
Check if text contains hate speech using HateBERT.
|
| 57 |
+
:param text: input text
|
| 58 |
+
:return: (is_hate, confidence_score)
|
| 59 |
+
"""
|
| 60 |
+
# Tokenize and predict
|
| 61 |
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
| 62 |
+
|
| 63 |
+
with torch.no_grad():
|
| 64 |
+
outputs = self.model(**inputs)
|
| 65 |
+
probabilities = torch.softmax(outputs.logits, dim=-1)
|
| 66 |
+
|
| 67 |
+
# Get hate speech probability (assuming class 1 is hate)
|
| 68 |
+
hate_prob = probabilities[0][1].item()
|
| 69 |
+
|
| 70 |
+
is_hate = hate_prob >= self.confidence_threshold
|
| 71 |
+
return is_hate, hate_prob
|
| 72 |
+
|
| 73 |
+
def detect_hate_regions(self, image):
|
| 74 |
+
"""
|
| 75 |
+
Detect regions containing hate speech in an image.
|
| 76 |
+
:param image: input image (BGR format)
|
| 77 |
+
:return: list of bounding boxes containing hate speech
|
| 78 |
+
"""
|
| 79 |
+
# Detect all text regions
|
| 80 |
+
text_regions = self.detect_text_regions(image)
|
| 81 |
+
|
| 82 |
+
hate_regions = []
|
| 83 |
+
for (bbox, text, ocr_prob) in text_regions:
|
| 84 |
+
# Check if text contains hate speech
|
| 85 |
+
is_hate, hate_prob = self.is_hate_speech(text)
|
| 86 |
+
|
| 87 |
+
if is_hate:
|
| 88 |
+
print(f"Detected hate speech: '{text}' (confidence: {hate_prob:.2f})")
|
| 89 |
+
hate_regions.append(bbox)
|
| 90 |
+
|
| 91 |
+
return hate_regions
|
social_moderation/detectors/nsfw_detector.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Enhanced NSFW, Violence, and Blood Detection System
|
| 3 |
+
Multi-model approach with improved accuracy
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
import logging
|
| 10 |
+
from transformers import pipeline, AutoModelForImageClassification, AutoFeatureExtractor
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
class NSFWDetector:
|
| 17 |
+
"""Multi-model NSFW, Violence, and Blood detection."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, nsfw_threshold=0.7, violence_threshold=0.6, blood_threshold=0.5, blood_percentage_threshold=8.0):
|
| 20 |
+
"""
|
| 21 |
+
Initialize detector with configurable thresholds.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
nsfw_threshold: NSFW confidence (0-1)
|
| 25 |
+
violence_threshold: Violence confidence (0-1)
|
| 26 |
+
blood_threshold: Blood/gore confidence (0-1)
|
| 27 |
+
blood_percentage_threshold: Red pixel percentage threshold
|
| 28 |
+
"""
|
| 29 |
+
self.nsfw_threshold = nsfw_threshold
|
| 30 |
+
self.violence_threshold = violence_threshold
|
| 31 |
+
self.blood_threshold = blood_threshold
|
| 32 |
+
self.blood_percentage_threshold = blood_percentage_threshold
|
| 33 |
+
|
| 34 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
| 35 |
+
logger.info(f"✓ Using device: {'GPU' if self.device == 0 else 'CPU'}")
|
| 36 |
+
|
| 37 |
+
# Initialize models
|
| 38 |
+
self.nsfw_model = None
|
| 39 |
+
self.violence_model = None
|
| 40 |
+
self._init_models()
|
| 41 |
+
|
| 42 |
+
logger.info(f"✓ NSFW Detector initialized (NSFW:{nsfw_threshold}, Violence:{violence_threshold}, Blood:{blood_threshold})")
|
| 43 |
+
|
| 44 |
+
def _init_models(self):
|
| 45 |
+
"""Initialize classification models."""
|
| 46 |
+
try:
|
| 47 |
+
self.nsfw_model = pipeline(
|
| 48 |
+
"image-classification",
|
| 49 |
+
model="Falconsai/nsfw_image_detection",
|
| 50 |
+
device=self.device
|
| 51 |
+
)
|
| 52 |
+
logger.info("✓ NSFW model loaded")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.warning(f"⚠️ NSFW model failed: {e}")
|
| 55 |
+
self.nsfw_model = None
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
self.violence_model = pipeline(
|
| 59 |
+
"image-classification",
|
| 60 |
+
model="microbiophoton/Violence_Detection_Using_Deep_Learning",
|
| 61 |
+
device=self.device
|
| 62 |
+
)
|
| 63 |
+
logger.info("✓ Violence model loaded")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
logger.warning(f"⚠️ Violence model failed: {e}")
|
| 66 |
+
self.violence_model = None
|
| 67 |
+
|
| 68 |
+
def detect_blood_by_color(self, image):
|
| 69 |
+
"""
|
| 70 |
+
Detect blood using HSV color range analysis.
|
| 71 |
+
Returns: (has_blood, blood_percentage, confidence)
|
| 72 |
+
"""
|
| 73 |
+
# Convert to HSV
|
| 74 |
+
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
| 75 |
+
|
| 76 |
+
# Define red color range in HSV
|
| 77 |
+
lower_red1 = np.array([0, 50, 50])
|
| 78 |
+
upper_red1 = np.array([10, 255, 255])
|
| 79 |
+
lower_red2 = np.array([170, 50, 50])
|
| 80 |
+
upper_red2 = np.array([180, 255, 255])
|
| 81 |
+
|
| 82 |
+
# Create masks
|
| 83 |
+
mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
|
| 84 |
+
mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
|
| 85 |
+
red_mask = cv2.bitwise_or(mask1, mask2)
|
| 86 |
+
|
| 87 |
+
# Calculate percentage
|
| 88 |
+
red_pixels = cv2.countNonZero(red_mask)
|
| 89 |
+
total_pixels = image.shape[0] * image.shape[1]
|
| 90 |
+
blood_percentage = (red_pixels / total_pixels) * 100
|
| 91 |
+
|
| 92 |
+
# Determine if blood is present
|
| 93 |
+
has_blood = blood_percentage >= self.blood_percentage_threshold
|
| 94 |
+
confidence = min(blood_percentage / 20.0, 1.0) # Normalize to 0-1
|
| 95 |
+
|
| 96 |
+
return has_blood, blood_percentage, confidence
|
| 97 |
+
|
| 98 |
+
def detect_nsfw(self, image):
|
| 99 |
+
"""
|
| 100 |
+
Detect NSFW content using model.
|
| 101 |
+
Returns: (is_nsfw, scores_dict)
|
| 102 |
+
"""
|
| 103 |
+
if self.nsfw_model is None:
|
| 104 |
+
return False, {}
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
| 108 |
+
results = self.nsfw_model(pil_image)
|
| 109 |
+
|
| 110 |
+
scores = {r['label']: r['score'] for r in results}
|
| 111 |
+
|
| 112 |
+
# Check if NSFW score exceeds threshold
|
| 113 |
+
nsfw_score = scores.get('nsfw', 0.0)
|
| 114 |
+
is_nsfw = nsfw_score > self.nsfw_threshold
|
| 115 |
+
|
| 116 |
+
return is_nsfw, scores
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.warning(f"NSFW detection failed: {e}")
|
| 119 |
+
return False, {}
|
| 120 |
+
|
| 121 |
+
def detect_violence(self, image):
|
| 122 |
+
"""
|
| 123 |
+
Detect violence using model.
|
| 124 |
+
Returns: (is_violent, scores_dict)
|
| 125 |
+
"""
|
| 126 |
+
if self.violence_model is None:
|
| 127 |
+
return False, {}
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
| 131 |
+
results = self.violence_model(pil_image)
|
| 132 |
+
|
| 133 |
+
scores = {r['label']: r['score'] for r in results}
|
| 134 |
+
|
| 135 |
+
# Check violence score
|
| 136 |
+
violence_score = scores.get('Violence', scores.get('violence', 0.0))
|
| 137 |
+
is_violent = violence_score > self.violence_threshold
|
| 138 |
+
|
| 139 |
+
return is_violent, scores
|
| 140 |
+
except Exception as e:
|
| 141 |
+
logger.warning(f"Violence detection failed: {e}")
|
| 142 |
+
return False, {}
|
| 143 |
+
|
| 144 |
+
def analyze(self, image):
|
| 145 |
+
"""
|
| 146 |
+
Full analysis: NSFW, Violence, and Blood.
|
| 147 |
+
Returns: dict with all detections
|
| 148 |
+
"""
|
| 149 |
+
analysis = {
|
| 150 |
+
'is_safe': True,
|
| 151 |
+
'reasons': [],
|
| 152 |
+
'scores': {
|
| 153 |
+
'nsfw': 0.0,
|
| 154 |
+
'violence': 0.0,
|
| 155 |
+
'blood': 0.0
|
| 156 |
+
},
|
| 157 |
+
'flags': []
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
# Blood detection (color-based, fastest)
|
| 161 |
+
has_blood, blood_pct, blood_conf = self.detect_blood_by_color(image)
|
| 162 |
+
analysis['scores']['blood'] = blood_conf
|
| 163 |
+
|
| 164 |
+
if has_blood:
|
| 165 |
+
analysis['is_safe'] = False
|
| 166 |
+
analysis['reasons'].append(f"Blood detected ({blood_pct:.1f}%)")
|
| 167 |
+
analysis['flags'].append('blood')
|
| 168 |
+
logger.info(f"🔴 BLOOD DETECTED: {blood_pct:.1f}%")
|
| 169 |
+
|
| 170 |
+
# NSFW detection
|
| 171 |
+
is_nsfw, nsfw_scores = self.detect_nsfw(image)
|
| 172 |
+
if nsfw_scores:
|
| 173 |
+
analysis['scores']['nsfw'] = nsfw_scores.get('nsfw', 0.0)
|
| 174 |
+
|
| 175 |
+
if is_nsfw:
|
| 176 |
+
analysis['is_safe'] = False
|
| 177 |
+
analysis['reasons'].append("NSFW content detected")
|
| 178 |
+
analysis['flags'].append('nsfw')
|
| 179 |
+
logger.info("🔴 NSFW CONTENT DETECTED")
|
| 180 |
+
|
| 181 |
+
# Violence detection
|
| 182 |
+
is_violent, violence_scores = self.detect_violence(image)
|
| 183 |
+
if violence_scores:
|
| 184 |
+
violence_score = violence_scores.get('Violence', violence_scores.get('violence', 0.0))
|
| 185 |
+
analysis['scores']['violence'] = violence_score
|
| 186 |
+
|
| 187 |
+
if is_violent:
|
| 188 |
+
analysis['is_safe'] = False
|
| 189 |
+
analysis['reasons'].append("Violent content detected")
|
| 190 |
+
analysis['flags'].append('violence')
|
| 191 |
+
logger.info("🔴 VIOLENT CONTENT DETECTED")
|
| 192 |
+
|
| 193 |
+
return analysis
|
social_moderation/detectors/offensive_word_detector.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# detectors/offensive_word_detector.py
|
| 2 |
+
import easyocr
|
| 3 |
+
import cv2
|
| 4 |
+
import numpy as np
|
| 5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
class OffensiveWordDetector:
|
| 9 |
+
def __init__(self, confidence_threshold=0.6):
|
| 10 |
+
"""
|
| 11 |
+
Detector that combines keyword matching with ML model for better detection.
|
| 12 |
+
Works well for both sentences and individual words.
|
| 13 |
+
"""
|
| 14 |
+
self.confidence_threshold = confidence_threshold
|
| 15 |
+
|
| 16 |
+
# Hate/offensive keyword list (common hate speech terms)
|
| 17 |
+
self.offensive_keywords = {
|
| 18 |
+
# Hate-related terms
|
| 19 |
+
'hate', 'hatred', 'hater', 'haters', 'hating',
|
| 20 |
+
# Violence terms
|
| 21 |
+
'kill', 'murder', 'death', 'die', 'violence', 'violent', 'attack', 'attacks',
|
| 22 |
+
# Slurs and discrimination
|
| 23 |
+
'racist', 'racism', 'sexist', 'sexism', 'bigot', 'bigotry', 'slur', 'slurs',
|
| 24 |
+
# Offensive descriptors
|
| 25 |
+
'stupid', 'idiot', 'moron', 'dumb', 'retard', 'retarded',
|
| 26 |
+
# Threats
|
| 27 |
+
'threat', 'threaten', 'terrorize', 'terror',
|
| 28 |
+
# Derogatory terms
|
| 29 |
+
'scum', 'trash', 'garbage', 'worthless', 'inferior',
|
| 30 |
+
# Extremism
|
| 31 |
+
'supremacy', 'nazi', 'fascist',
|
| 32 |
+
# Additional hate indicators
|
| 33 |
+
'discriminate', 'discrimination', 'prejudice', 'prejudicial',
|
| 34 |
+
'misogyny', 'misogynist', 'homophobe', 'homophobia',
|
| 35 |
+
'xenophobe', 'xenophobia', 'islamophobe', 'islamophobia',
|
| 36 |
+
'antisemite', 'antisemitism',
|
| 37 |
+
# Add common slurs (redacted versions)
|
| 38 |
+
# Note: You can expand this list based on your needs
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Load ML model for phrase-level detection
|
| 42 |
+
print("Loading offensive content detection model...")
|
| 43 |
+
self.model_name = "Hate-speech-CNERG/dehatebert-mono-english"
|
| 44 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 45 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
|
| 46 |
+
self.model.eval()
|
| 47 |
+
|
| 48 |
+
# Initialize OCR
|
| 49 |
+
print("Loading EasyOCR...")
|
| 50 |
+
self.reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
| 51 |
+
print("Model loaded successfully.")
|
| 52 |
+
|
| 53 |
+
def detect_text_regions(self, image):
|
| 54 |
+
"""
|
| 55 |
+
Detect text regions with improved handling for word clouds.
|
| 56 |
+
"""
|
| 57 |
+
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 58 |
+
|
| 59 |
+
# Use EasyOCR with adjusted parameters for better word cloud detection
|
| 60 |
+
results = self.reader.readtext(
|
| 61 |
+
rgb_image,
|
| 62 |
+
paragraph=False, # Don't group into paragraphs
|
| 63 |
+
min_size=10, # Detect smaller text
|
| 64 |
+
text_threshold=0.6 # Lower threshold for varied fonts
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
text_regions = []
|
| 68 |
+
for (bbox, text, prob) in results:
|
| 69 |
+
bbox_array = np.array(bbox)
|
| 70 |
+
x1 = int(bbox_array[:, 0].min())
|
| 71 |
+
y1 = int(bbox_array[:, 1].min())
|
| 72 |
+
x2 = int(bbox_array[:, 0].max())
|
| 73 |
+
y2 = int(bbox_array[:, 1].max())
|
| 74 |
+
|
| 75 |
+
text_regions.append(([x1, y1, x2, y2], text, prob))
|
| 76 |
+
|
| 77 |
+
return text_regions
|
| 78 |
+
|
| 79 |
+
def is_offensive_keyword(self, text):
|
| 80 |
+
"""
|
| 81 |
+
Check if text contains offensive keywords.
|
| 82 |
+
"""
|
| 83 |
+
text_lower = text.lower().strip()
|
| 84 |
+
|
| 85 |
+
# Check exact match
|
| 86 |
+
if text_lower in self.offensive_keywords:
|
| 87 |
+
return True, 1.0
|
| 88 |
+
|
| 89 |
+
# Check if any keyword is contained in the text
|
| 90 |
+
for keyword in self.offensive_keywords:
|
| 91 |
+
if keyword in text_lower:
|
| 92 |
+
return True, 0.9
|
| 93 |
+
|
| 94 |
+
return False, 0.0
|
| 95 |
+
|
| 96 |
+
def is_offensive_ml(self, text):
|
| 97 |
+
"""
|
| 98 |
+
Use ML model to detect offensive content in phrases.
|
| 99 |
+
"""
|
| 100 |
+
if len(text.strip()) < 3:
|
| 101 |
+
return False, 0.0
|
| 102 |
+
|
| 103 |
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True,
|
| 104 |
+
max_length=512, padding=True)
|
| 105 |
+
|
| 106 |
+
with torch.no_grad():
|
| 107 |
+
outputs = self.model(**inputs)
|
| 108 |
+
probabilities = torch.softmax(outputs.logits, dim=-1)
|
| 109 |
+
|
| 110 |
+
offensive_prob = probabilities[0][1].item()
|
| 111 |
+
is_offensive = offensive_prob >= self.confidence_threshold
|
| 112 |
+
|
| 113 |
+
return is_offensive, offensive_prob
|
| 114 |
+
|
| 115 |
+
def is_offensive(self, text):
|
| 116 |
+
"""
|
| 117 |
+
Combined detection: keyword matching + ML model.
|
| 118 |
+
Returns True if either method detects offensive content.
|
| 119 |
+
"""
|
| 120 |
+
# First check keywords (fast)
|
| 121 |
+
is_keyword_match, keyword_score = self.is_offensive_keyword(text)
|
| 122 |
+
if is_keyword_match:
|
| 123 |
+
return True, keyword_score
|
| 124 |
+
|
| 125 |
+
# Then use ML model for context-aware detection
|
| 126 |
+
is_ml_offensive, ml_score = self.is_offensive_ml(text)
|
| 127 |
+
if is_ml_offensive:
|
| 128 |
+
return True, ml_score
|
| 129 |
+
|
| 130 |
+
return False, max(keyword_score, ml_score)
|
| 131 |
+
|
| 132 |
+
def detect_offensive_regions(self, image, verbose=True):
|
| 133 |
+
"""
|
| 134 |
+
Detect all regions containing offensive content.
|
| 135 |
+
"""
|
| 136 |
+
text_regions = self.detect_text_regions(image)
|
| 137 |
+
|
| 138 |
+
if verbose:
|
| 139 |
+
print(f"Detected {len(text_regions)} text region(s)")
|
| 140 |
+
|
| 141 |
+
offensive_regions = []
|
| 142 |
+
|
| 143 |
+
for (bbox, text, ocr_prob) in text_regions:
|
| 144 |
+
# Clean the text
|
| 145 |
+
text_cleaned = text.strip()
|
| 146 |
+
|
| 147 |
+
if len(text_cleaned) < 2:
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
+
# Check if offensive
|
| 151 |
+
is_offensive, confidence = self.is_offensive(text_cleaned)
|
| 152 |
+
|
| 153 |
+
if is_offensive:
|
| 154 |
+
if verbose:
|
| 155 |
+
print(f"⚠️ Offensive: '{text_cleaned}' (confidence: {confidence:.2f})")
|
| 156 |
+
offensive_regions.append(bbox)
|
| 157 |
+
elif verbose and len(text_cleaned) > 2:
|
| 158 |
+
print(f"✓ Clean: '{text_cleaned}' (score: {confidence:.2f})")
|
| 159 |
+
|
| 160 |
+
return offensive_regions
|
social_moderation/detectors/opencv_face.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# detectors/opencv_face.py
|
| 2 |
+
import cv2
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
class OpenCVFace:
|
| 6 |
+
def __init__(self, conf=0.5):
|
| 7 |
+
"""
|
| 8 |
+
Face detection using OpenCV's built-in Haar cascades.
|
| 9 |
+
"""
|
| 10 |
+
self.conf = conf
|
| 11 |
+
|
| 12 |
+
# Load pre-trained Haar cascade for face detection
|
| 13 |
+
cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 14 |
+
if not os.path.exists(cascade_path):
|
| 15 |
+
# Download if needed (though it should be included with OpenCV)
|
| 16 |
+
print("Haar cascade not found. Please ensure OpenCV is properly installed.")
|
| 17 |
+
raise FileNotFoundError("Haar cascade not found")
|
| 18 |
+
|
| 19 |
+
self.face_cascade = cv2.CascadeClassifier(cascade_path)
|
| 20 |
+
|
| 21 |
+
def detect_faces(self, image, confidence_threshold=None):
|
| 22 |
+
"""
|
| 23 |
+
Detect faces using OpenCV's Haar cascades.
|
| 24 |
+
"""
|
| 25 |
+
# Convert to grayscale for detection
|
| 26 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 27 |
+
|
| 28 |
+
# Detect faces
|
| 29 |
+
faces = self.face_cascade.detectMultiScale(
|
| 30 |
+
gray,
|
| 31 |
+
scaleFactor=1.1,
|
| 32 |
+
minNeighbors=5,
|
| 33 |
+
minSize=(30, 30)
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Convert to [x1, y1, x2, y2] format
|
| 37 |
+
boxes = []
|
| 38 |
+
for (x, y, w, h) in faces:
|
| 39 |
+
boxes.append([x, y, x + w, y + h])
|
| 40 |
+
|
| 41 |
+
return boxes
|
social_moderation/detectors/opencv_face_p.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
OpenCV Haar Cascade Face Detector (Fallback)
|
| 3 |
+
Lightweight CPU-based face detection
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import logging
|
| 8 |
+
import numpy as np
|
| 9 |
+
from typing import List, Tuple
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
class OpenCVFace:
|
| 14 |
+
"""OpenCV Haar Cascade face detector as lightweight fallback."""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
"""Initialize Haar Cascade classifier."""
|
| 18 |
+
try:
|
| 19 |
+
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
|
| 20 |
+
self.face_cascade = cv2.CascadeClassifier(cascade_path)
|
| 21 |
+
|
| 22 |
+
if self.face_cascade.empty():
|
| 23 |
+
raise IOError("Failed to load Haar cascade")
|
| 24 |
+
|
| 25 |
+
logger.info("✅ OpenCV Haar Cascade face detector loaded")
|
| 26 |
+
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.error(f"Failed to load OpenCV face detector: {e}")
|
| 29 |
+
raise
|
| 30 |
+
|
| 31 |
+
def detect_faces(self, image: np.ndarray,
|
| 32 |
+
conf_threshold: float = 0.5) -> List[Tuple[int, int, int, int, float]]:
|
| 33 |
+
"""
|
| 34 |
+
Detect faces using Haar Cascade.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
image: Input image (BGR)
|
| 38 |
+
conf_threshold: Unused (kept for API consistency)
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
List of (x1, y1, x2, y2, confidence)
|
| 42 |
+
"""
|
| 43 |
+
if image is None or image.size == 0:
|
| 44 |
+
return []
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Convert to grayscale
|
| 48 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 49 |
+
|
| 50 |
+
# Detect faces
|
| 51 |
+
faces = self.face_cascade.detectMultiScale(
|
| 52 |
+
gray,
|
| 53 |
+
scaleFactor=1.1,
|
| 54 |
+
minNeighbors=5,
|
| 55 |
+
minSize=(30, 30),
|
| 56 |
+
flags=cv2.CASCADE_SCALE_IMAGE
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Convert to (x1, y1, x2, y2, conf) format
|
| 60 |
+
detections = []
|
| 61 |
+
for (x, y, w, h) in faces:
|
| 62 |
+
detections.append((x, y, x + w, y + h, 0.85)) # Fixed confidence
|
| 63 |
+
|
| 64 |
+
logger.debug(f"OpenCV detected {len(detections)} faces")
|
| 65 |
+
return detections
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.error(f"OpenCV face detection failed: {e}")
|
| 69 |
+
return []
|