Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +282 -59
src/streamlit_app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
Review Validator -
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
|
@@ -13,9 +14,20 @@ import matplotlib
|
|
| 13 |
matplotlib.use('Agg') # use non-GUI backend for Streamlit
|
| 14 |
import matplotlib.pyplot as plt
|
| 15 |
import requests
|
| 16 |
-
import urllib.parse
|
| 17 |
import math
|
| 18 |
import warnings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# --- Setup: Silence the technical noise ---
|
| 21 |
warnings.filterwarnings("ignore")
|
|
@@ -30,7 +42,7 @@ st.set_page_config(
|
|
| 30 |
)
|
| 31 |
|
| 32 |
# ==========================================
|
| 33 |
-
# π§
|
| 34 |
# ==========================================
|
| 35 |
|
| 36 |
# 1. Text AI Detector: ModernBERT-based detector (0 = human, 1 = AI)
|
|
@@ -54,9 +66,9 @@ MODEL_CAPTION = "Salesforce/blip-image-captioning-base"
|
|
| 54 |
def get_token():
|
| 55 |
"""
|
| 56 |
Safely retrieves HF_TOKEN.
|
| 57 |
-
Priority 1:
|
| 58 |
Priority 2: Streamlit Secrets (Local)
|
| 59 |
-
Optional
|
| 60 |
"""
|
| 61 |
token = os.environ.get("HF_TOKEN")
|
| 62 |
if token:
|
|
@@ -164,7 +176,6 @@ def load_ai_squad():
|
|
| 164 |
squad = {}
|
| 165 |
errors = []
|
| 166 |
|
| 167 |
-
# token is optional for public models β only pass if available
|
| 168 |
token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
|
| 169 |
|
| 170 |
# TEXT MODELS
|
|
@@ -213,20 +224,113 @@ def load_ai_squad():
|
|
| 213 |
**token_arg
|
| 214 |
)
|
| 215 |
except Exception as e:
|
| 216 |
-
# Caption is optional β not critical
|
| 217 |
errors.append(f"Caption model: {e}")
|
| 218 |
|
| 219 |
-
# If literally nothing loaded, return None so main() can show a clean error
|
| 220 |
if not squad:
|
| 221 |
return None, "No models could be loaded. Check internet / HF token / requirements."
|
| 222 |
|
| 223 |
-
# If some models failed, join messages (optional debug info)
|
| 224 |
err_msg = "\n".join(errors) if errors else None
|
| 225 |
return squad, err_msg
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
# --- Logic: Analyze Text ---
|
| 228 |
def check_text(text, squad):
|
| 229 |
-
# If fake detector missing, no point pretending
|
| 230 |
if 'fake' not in squad:
|
| 231 |
return {
|
| 232 |
"bot_score": 0,
|
|
@@ -237,15 +341,14 @@ def check_text(text, squad):
|
|
| 237 |
"error_msg": "AI text detector not loaded."
|
| 238 |
}
|
| 239 |
|
| 240 |
-
# 1. Bot / AI Check
|
| 241 |
-
res_fake = squad['fake'](text[:512])[0]
|
| 242 |
raw_label = res_fake.get('label', '1')
|
| 243 |
raw_score = float(res_fake.get('score', 0.5))
|
| 244 |
|
| 245 |
try:
|
| 246 |
label_id = int(raw_label)
|
| 247 |
except ValueError:
|
| 248 |
-
# in case model changes label format later
|
| 249 |
label_id = 1 if "1" in str(raw_label) else 0
|
| 250 |
|
| 251 |
if label_id == 1:
|
|
@@ -255,7 +358,7 @@ def check_text(text, squad):
|
|
| 255 |
|
| 256 |
bot_score = ai_prob * 100.0
|
| 257 |
|
| 258 |
-
# 2. Mood
|
| 259 |
mood_label = "Unknown"
|
| 260 |
if 'mood' in squad:
|
| 261 |
try:
|
|
@@ -264,7 +367,7 @@ def check_text(text, squad):
|
|
| 264 |
except Exception:
|
| 265 |
mood_label = "Unknown"
|
| 266 |
|
| 267 |
-
# 3. Grammar
|
| 268 |
grammar_score = 50.0
|
| 269 |
if 'grammar' in squad:
|
| 270 |
try:
|
|
@@ -286,14 +389,9 @@ def check_text(text, squad):
|
|
| 286 |
|
| 287 |
# --- Logic: Analyze Image ---
|
| 288 |
def check_image(img, squad):
|
| 289 |
-
"""
|
| 290 |
-
Returns AI probability and debug scores.
|
| 291 |
-
Uses a single main detector to avoid conflicts.
|
| 292 |
-
"""
|
| 293 |
caption_text = "Caption unavailable"
|
| 294 |
ai_chance = 0.0
|
| 295 |
|
| 296 |
-
# 1. Image AI Detector
|
| 297 |
if 'img_main' in squad:
|
| 298 |
try:
|
| 299 |
preds = squad['img_main'](img)
|
|
@@ -307,14 +405,12 @@ def check_image(img, squad):
|
|
| 307 |
elif "real" in label:
|
| 308 |
ai_prob = 1 - score
|
| 309 |
else:
|
| 310 |
-
# unknown label β assume score ~ AI probability
|
| 311 |
ai_prob = score
|
| 312 |
|
| 313 |
ai_chance = ai_prob * 100.0
|
| 314 |
except Exception:
|
| 315 |
ai_chance = 0.0
|
| 316 |
|
| 317 |
-
# 2. Captioning (optional, does not affect AI score)
|
| 318 |
if 'caption' in squad:
|
| 319 |
try:
|
| 320 |
cap_res = squad['caption'](img)
|
|
@@ -325,17 +421,13 @@ def check_image(img, squad):
|
|
| 325 |
|
| 326 |
return {
|
| 327 |
"ai_chance": ai_chance,
|
| 328 |
-
"match": 1.0,
|
| 329 |
"score_a": ai_chance,
|
| 330 |
"score_b": ai_chance,
|
| 331 |
"caption": caption_text
|
| 332 |
}
|
| 333 |
|
| 334 |
def get_image_from_url(url):
|
| 335 |
-
"""
|
| 336 |
-
Safely fetch image from URL.
|
| 337 |
-
If anything goes wrong, return None instead of crashing (prevents 402-style front-end errors).
|
| 338 |
-
"""
|
| 339 |
try:
|
| 340 |
headers = {
|
| 341 |
'User-Agent': (
|
|
@@ -352,12 +444,12 @@ def get_image_from_url(url):
|
|
| 352 |
return None
|
| 353 |
|
| 354 |
# --- Plotting ---
|
|
|
|
| 355 |
def breakdown_chart(stats):
|
| 356 |
-
"""Horizontal bar chart for text analysis breakdown"""
|
| 357 |
labels = ['AI-Likeness', 'Grammar Quality']
|
| 358 |
values = [stats['bot_score'], stats['grammar_score']]
|
| 359 |
|
| 360 |
-
fig, ax = plt.subplots(figsize=(
|
| 361 |
y_pos = np.arange(len(labels))
|
| 362 |
|
| 363 |
ax.barh(y_pos, values, align='center', height=0.6)
|
|
@@ -375,6 +467,103 @@ def breakdown_chart(stats):
|
|
| 375 |
plt.tight_layout()
|
| 376 |
return fig
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
# --- PAGES ---
|
| 379 |
|
| 380 |
def landing_page():
|
|
@@ -382,7 +571,7 @@ def landing_page():
|
|
| 382 |
<div class="hero-box">
|
| 383 |
<div class="hero-title">π‘οΈ Review Validator</div>
|
| 384 |
<div class="hero-subtitle">
|
| 385 |
-
|
| 386 |
</div>
|
| 387 |
</div>
|
| 388 |
""", unsafe_allow_html=True)
|
|
@@ -392,24 +581,24 @@ def landing_page():
|
|
| 392 |
st.markdown("""
|
| 393 |
<div class="feature-card">
|
| 394 |
<span class="emoji-icon">π€</span>
|
| 395 |
-
<h3>AI Text
|
| 396 |
-
<p>
|
| 397 |
</div>
|
| 398 |
""", unsafe_allow_html=True)
|
| 399 |
with c2:
|
| 400 |
st.markdown("""
|
| 401 |
<div class="feature-card">
|
| 402 |
<span class="emoji-icon">πΈ</span>
|
| 403 |
-
<h3>
|
| 404 |
-
<p>
|
| 405 |
</div>
|
| 406 |
""", unsafe_allow_html=True)
|
| 407 |
with c3:
|
| 408 |
st.markdown("""
|
| 409 |
<div class="feature-card">
|
| 410 |
-
<span class="emoji-icon"
|
| 411 |
-
<h3>
|
| 412 |
-
<p>
|
| 413 |
</div>
|
| 414 |
""", unsafe_allow_html=True)
|
| 415 |
|
|
@@ -465,12 +654,13 @@ def detector_page(squad, warnings_text=None):
|
|
| 465 |
if txt_input.strip():
|
| 466 |
with st.spinner("Analyzing text..."):
|
| 467 |
res = check_text(txt_input, squad)
|
| 468 |
-
|
|
|
|
| 469 |
else:
|
| 470 |
st.warning("Please paste a review first.")
|
| 471 |
|
| 472 |
if 'text_res' in st.session_state:
|
| 473 |
-
res,
|
| 474 |
|
| 475 |
if res.get("error"):
|
| 476 |
st.error(res.get("error_msg", "Text models failed to load."))
|
|
@@ -481,8 +671,8 @@ def detector_page(squad, warnings_text=None):
|
|
| 481 |
grammar_score = res['grammar_score']
|
| 482 |
mood_label = res['mood_label']
|
| 483 |
|
| 484 |
-
# Thresholds
|
| 485 |
-
if
|
| 486 |
t_high = 90
|
| 487 |
t_mid = 70
|
| 488 |
else:
|
|
@@ -524,25 +714,57 @@ def detector_page(squad, warnings_text=None):
|
|
| 524 |
)
|
| 525 |
|
| 526 |
st.write("")
|
| 527 |
-
|
| 528 |
-
with
|
| 529 |
-
st.markdown("#### π
|
| 530 |
fig = breakdown_chart(res)
|
| 531 |
-
st.pyplot(fig)
|
| 532 |
-
with
|
| 533 |
-
st.markdown("####
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
# --- IMAGE TAB ---
|
| 548 |
with tab2:
|
|
@@ -586,6 +808,8 @@ def detector_page(squad, warnings_text=None):
|
|
| 586 |
data = check_image(target_img, squad)
|
| 587 |
st.session_state['img_res'] = (data, strict_img)
|
| 588 |
st.session_state['current_img'] = target_img
|
|
|
|
|
|
|
| 589 |
|
| 590 |
with col_view:
|
| 591 |
if 'current_img' in st.session_state:
|
|
@@ -647,8 +871,7 @@ def main():
|
|
| 647 |
|
| 648 |
warnings_text = None
|
| 649 |
if err:
|
| 650 |
-
|
| 651 |
-
warnings_text = "Some features may be limited:\n" + err.replace("\n", "<br>")
|
| 652 |
|
| 653 |
if st.session_state['page'] == 'landing':
|
| 654 |
landing_page()
|
|
|
|
| 1 |
"""
|
| 2 |
+
Review Validator - Advanced Edition
|
| 3 |
+
With explainability graphs + PDF report download
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 14 |
matplotlib.use('Agg') # use non-GUI backend for Streamlit
|
| 15 |
import matplotlib.pyplot as plt
|
| 16 |
import requests
|
|
|
|
| 17 |
import math
|
| 18 |
import warnings
|
| 19 |
+
import re
|
| 20 |
+
from collections import Counter
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
import textwrap
|
| 23 |
+
|
| 24 |
+
# Try to import ReportLab for PDF generation
|
| 25 |
+
try:
|
| 26 |
+
from reportlab.lib.pagesizes import A4
|
| 27 |
+
from reportlab.pdfgen import canvas
|
| 28 |
+
HAVE_REPORTLAB = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
HAVE_REPORTLAB = False
|
| 31 |
|
| 32 |
# --- Setup: Silence the technical noise ---
|
| 33 |
warnings.filterwarnings("ignore")
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
# ==========================================
|
| 45 |
+
# π§ MODELS (Better public detectors)
|
| 46 |
# ==========================================
|
| 47 |
|
| 48 |
# 1. Text AI Detector: ModernBERT-based detector (0 = human, 1 = AI)
|
|
|
|
| 66 |
def get_token():
|
| 67 |
"""
|
| 68 |
Safely retrieves HF_TOKEN.
|
| 69 |
+
Priority 1: Env var (Spaces)
|
| 70 |
Priority 2: Streamlit Secrets (Local)
|
| 71 |
+
Optional β app still runs if missing.
|
| 72 |
"""
|
| 73 |
token = os.environ.get("HF_TOKEN")
|
| 74 |
if token:
|
|
|
|
| 176 |
squad = {}
|
| 177 |
errors = []
|
| 178 |
|
|
|
|
| 179 |
token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
|
| 180 |
|
| 181 |
# TEXT MODELS
|
|
|
|
| 224 |
**token_arg
|
| 225 |
)
|
| 226 |
except Exception as e:
|
|
|
|
| 227 |
errors.append(f"Caption model: {e}")
|
| 228 |
|
|
|
|
| 229 |
if not squad:
|
| 230 |
return None, "No models could be loaded. Check internet / HF token / requirements."
|
| 231 |
|
|
|
|
| 232 |
err_msg = "\n".join(errors) if errors else None
|
| 233 |
return squad, err_msg
|
| 234 |
|
| 235 |
+
# --- Utility: Basic text stats for explainability ---
|
| 236 |
+
STOPWORDS = set([
|
| 237 |
+
"the","a","an","is","are","am","and","or","in","on","at","of","to","for",
|
| 238 |
+
"this","that","it","was","with","as","by","be","from","has","have","had",
|
| 239 |
+
"i","you","we","they","he","she","my","our","their","your"
|
| 240 |
+
])
|
| 241 |
+
|
| 242 |
+
def split_sentences(text: str):
|
| 243 |
+
# simple sentence splitter
|
| 244 |
+
parts = re.split(r'[.!?]+', text)
|
| 245 |
+
return [s.strip() for s in parts if s.strip()]
|
| 246 |
+
|
| 247 |
+
def tokenize_words(text: str):
|
| 248 |
+
tokens = re.findall(r"[A-Za-z']+", text.lower())
|
| 249 |
+
return tokens
|
| 250 |
+
|
| 251 |
+
def analyze_text_structure(text: str):
|
| 252 |
+
sentences = split_sentences(text)
|
| 253 |
+
words = tokenize_words(text)
|
| 254 |
+
|
| 255 |
+
num_sentences = max(len(sentences), 1)
|
| 256 |
+
num_words = len(words)
|
| 257 |
+
|
| 258 |
+
sent_lengths = [len(tokenize_words(s)) for s in sentences] or [0]
|
| 259 |
+
avg_sent_len = sum(sent_lengths) / len(sent_lengths)
|
| 260 |
+
var_sent_len = float(np.var(sent_lengths)) if len(sent_lengths) > 1 else 0.0
|
| 261 |
+
|
| 262 |
+
# vocabulary diversity
|
| 263 |
+
vocab = set(w for w in words if w not in STOPWORDS)
|
| 264 |
+
vocab_size = len(vocab)
|
| 265 |
+
ttr = (vocab_size / num_words) if num_words > 0 else 0.0 # type-token ratio
|
| 266 |
+
|
| 267 |
+
# top words
|
| 268 |
+
filtered = [w for w in words if w not in STOPWORDS]
|
| 269 |
+
counter = Counter(filtered)
|
| 270 |
+
top_words = counter.most_common(10)
|
| 271 |
+
|
| 272 |
+
return {
|
| 273 |
+
"num_sentences": num_sentences,
|
| 274 |
+
"num_words": num_words,
|
| 275 |
+
"avg_sentence_len": avg_sent_len,
|
| 276 |
+
"var_sentence_len": var_sent_len,
|
| 277 |
+
"ttr": ttr,
|
| 278 |
+
"top_words": top_words,
|
| 279 |
+
"sentence_lengths": sent_lengths,
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
def explain_text(res, stats, strict_mode: bool):
|
| 283 |
+
"""
|
| 284 |
+
Heuristic explanation based on AI score + grammar + structure.
|
| 285 |
+
Returns list of bullet strings.
|
| 286 |
+
"""
|
| 287 |
+
bot = res["bot_score"]
|
| 288 |
+
gram = res["grammar_score"]
|
| 289 |
+
mood = res["mood_label"]
|
| 290 |
+
avg_len = stats["avg_sentence_len"]
|
| 291 |
+
var_len = stats["var_sentence_len"]
|
| 292 |
+
ttr = stats["ttr"]
|
| 293 |
+
|
| 294 |
+
reasons = []
|
| 295 |
+
|
| 296 |
+
# AI-likeness
|
| 297 |
+
if bot >= 85:
|
| 298 |
+
reasons.append("High AI-likeness score β model strongly associates this style with AI text.")
|
| 299 |
+
elif bot >= 65:
|
| 300 |
+
reasons.append("Moderate AI-likeness score β some patterns resemble AI-generated writing.")
|
| 301 |
+
else:
|
| 302 |
+
reasons.append("Low AI-likeness score β style leans closer to typical human-written reviews.")
|
| 303 |
+
|
| 304 |
+
# Grammar
|
| 305 |
+
if gram >= 85 and bot >= 70:
|
| 306 |
+
reasons.append("Grammar is near-perfect and very consistent, which is common in AI text.")
|
| 307 |
+
elif gram >= 85 and bot < 50:
|
| 308 |
+
reasons.append("Grammar is very clean but the AI score is low, could be a careful human reviewer.")
|
| 309 |
+
elif gram < 60:
|
| 310 |
+
reasons.append("Grammar has noticeable imperfections, more typical of casual human writing.")
|
| 311 |
+
|
| 312 |
+
# Sentence structure
|
| 313 |
+
if var_len < 5 and avg_len > 12 and bot >= 70:
|
| 314 |
+
reasons.append("Sentence length is very uniform and long, which often appears in AI outputs.")
|
| 315 |
+
elif var_len > 15:
|
| 316 |
+
reasons.append("Sentence length varies a lot, which is more natural for human writing.")
|
| 317 |
+
|
| 318 |
+
# Vocabulary diversity
|
| 319 |
+
if ttr < 0.3 and bot >= 70:
|
| 320 |
+
reasons.append("Vocabulary diversity is low despite longer text, hinting at templated or generated style.")
|
| 321 |
+
elif ttr > 0.45:
|
| 322 |
+
reasons.append("Vocabulary diversity is relatively high, which often indicates a human author.")
|
| 323 |
+
|
| 324 |
+
# Mood-based explanation
|
| 325 |
+
reasons.append(f"Overall sentiment detected: **{mood}**.")
|
| 326 |
+
|
| 327 |
+
if strict_mode:
|
| 328 |
+
reasons.append("Strict mode: thresholds are higher, so AI flags are more conservative but precise.")
|
| 329 |
+
|
| 330 |
+
return reasons
|
| 331 |
+
|
| 332 |
# --- Logic: Analyze Text ---
|
| 333 |
def check_text(text, squad):
|
|
|
|
| 334 |
if 'fake' not in squad:
|
| 335 |
return {
|
| 336 |
"bot_score": 0,
|
|
|
|
| 341 |
"error_msg": "AI text detector not loaded."
|
| 342 |
}
|
| 343 |
|
| 344 |
+
# 1. Bot / AI Check
|
| 345 |
+
res_fake = squad['fake'](text[:512])[0]
|
| 346 |
raw_label = res_fake.get('label', '1')
|
| 347 |
raw_score = float(res_fake.get('score', 0.5))
|
| 348 |
|
| 349 |
try:
|
| 350 |
label_id = int(raw_label)
|
| 351 |
except ValueError:
|
|
|
|
| 352 |
label_id = 1 if "1" in str(raw_label) else 0
|
| 353 |
|
| 354 |
if label_id == 1:
|
|
|
|
| 358 |
|
| 359 |
bot_score = ai_prob * 100.0
|
| 360 |
|
| 361 |
+
# 2. Mood
|
| 362 |
mood_label = "Unknown"
|
| 363 |
if 'mood' in squad:
|
| 364 |
try:
|
|
|
|
| 367 |
except Exception:
|
| 368 |
mood_label = "Unknown"
|
| 369 |
|
| 370 |
+
# 3. Grammar (CoLA)
|
| 371 |
grammar_score = 50.0
|
| 372 |
if 'grammar' in squad:
|
| 373 |
try:
|
|
|
|
| 389 |
|
| 390 |
# --- Logic: Analyze Image ---
|
| 391 |
def check_image(img, squad):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
caption_text = "Caption unavailable"
|
| 393 |
ai_chance = 0.0
|
| 394 |
|
|
|
|
| 395 |
if 'img_main' in squad:
|
| 396 |
try:
|
| 397 |
preds = squad['img_main'](img)
|
|
|
|
| 405 |
elif "real" in label:
|
| 406 |
ai_prob = 1 - score
|
| 407 |
else:
|
|
|
|
| 408 |
ai_prob = score
|
| 409 |
|
| 410 |
ai_chance = ai_prob * 100.0
|
| 411 |
except Exception:
|
| 412 |
ai_chance = 0.0
|
| 413 |
|
|
|
|
| 414 |
if 'caption' in squad:
|
| 415 |
try:
|
| 416 |
cap_res = squad['caption'](img)
|
|
|
|
| 421 |
|
| 422 |
return {
|
| 423 |
"ai_chance": ai_chance,
|
| 424 |
+
"match": 1.0,
|
| 425 |
"score_a": ai_chance,
|
| 426 |
"score_b": ai_chance,
|
| 427 |
"caption": caption_text
|
| 428 |
}
|
| 429 |
|
| 430 |
def get_image_from_url(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
try:
|
| 432 |
headers = {
|
| 433 |
'User-Agent': (
|
|
|
|
| 444 |
return None
|
| 445 |
|
| 446 |
# --- Plotting ---
|
| 447 |
+
|
| 448 |
def breakdown_chart(stats):
|
|
|
|
| 449 |
labels = ['AI-Likeness', 'Grammar Quality']
|
| 450 |
values = [stats['bot_score'], stats['grammar_score']]
|
| 451 |
|
| 452 |
+
fig, ax = plt.subplots(figsize=(4, 2))
|
| 453 |
y_pos = np.arange(len(labels))
|
| 454 |
|
| 455 |
ax.barh(y_pos, values, align='center', height=0.6)
|
|
|
|
| 467 |
plt.tight_layout()
|
| 468 |
return fig
|
| 469 |
|
| 470 |
+
def sentence_length_chart(stats):
|
| 471 |
+
lens = stats["sentence_lengths"]
|
| 472 |
+
fig, ax = plt.subplots(figsize=(4, 2))
|
| 473 |
+
ax.hist(lens, bins=min(len(lens), 8) or 1, edgecolor='black')
|
| 474 |
+
ax.set_xlabel("Sentence length (words)")
|
| 475 |
+
ax.set_ylabel("Count")
|
| 476 |
+
ax.set_title("Sentence Length Distribution")
|
| 477 |
+
plt.tight_layout()
|
| 478 |
+
return fig
|
| 479 |
+
|
| 480 |
+
def word_freq_chart(stats):
|
| 481 |
+
top_words = stats["top_words"]
|
| 482 |
+
if not top_words:
|
| 483 |
+
fig, ax = plt.subplots(figsize=(4, 2))
|
| 484 |
+
ax.text(0.5, 0.5, "Not enough text", ha='center', va='center')
|
| 485 |
+
ax.axis('off')
|
| 486 |
+
return fig
|
| 487 |
+
|
| 488 |
+
words, freqs = zip(*top_words)
|
| 489 |
+
fig, ax = plt.subplots(figsize=(4, 2))
|
| 490 |
+
x = np.arange(len(words))
|
| 491 |
+
ax.bar(x, freqs)
|
| 492 |
+
ax.set_xticks(x)
|
| 493 |
+
ax.set_xticklabels(words, rotation=45, ha='right')
|
| 494 |
+
ax.set_ylabel("Frequency")
|
| 495 |
+
ax.set_title("Top Words (excluding stopwords)")
|
| 496 |
+
plt.tight_layout()
|
| 497 |
+
return fig
|
| 498 |
+
|
| 499 |
+
# --- PDF REPORT GENERATION ---
|
| 500 |
+
|
| 501 |
+
def generate_pdf_report(platform, review_text, text_res, text_stats, image_info):
|
| 502 |
+
"""
|
| 503 |
+
Returns PDF bytes. Requires ReportLab.
|
| 504 |
+
image_info: dict or None
|
| 505 |
+
"""
|
| 506 |
+
buffer = io.BytesIO()
|
| 507 |
+
c = canvas.Canvas(buffer, pagesize=A4)
|
| 508 |
+
width, height = A4
|
| 509 |
+
y = height - 50
|
| 510 |
+
|
| 511 |
+
def write_line(text, font="Helvetica", size=10, leading=14):
|
| 512 |
+
nonlocal y
|
| 513 |
+
c.setFont(font, size)
|
| 514 |
+
wrapped = textwrap.wrap(text, width=90)
|
| 515 |
+
for line in wrapped:
|
| 516 |
+
if y < 50:
|
| 517 |
+
c.showPage()
|
| 518 |
+
y = height - 50
|
| 519 |
+
c.setFont(font, size)
|
| 520 |
+
c.drawString(50, y, line)
|
| 521 |
+
y -= leading
|
| 522 |
+
|
| 523 |
+
# Header
|
| 524 |
+
c.setFont("Helvetica-Bold", 16)
|
| 525 |
+
c.drawString(50, y, "Review Validator Report")
|
| 526 |
+
y -= 25
|
| 527 |
+
c.setFont("Helvetica", 10)
|
| 528 |
+
c.drawString(50, y, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 529 |
+
y -= 15
|
| 530 |
+
c.drawString(50, y, f"Platform: {platform}")
|
| 531 |
+
y -= 25
|
| 532 |
+
|
| 533 |
+
# Scores
|
| 534 |
+
write_line("=== Text Analysis ===", font="Helvetica-Bold", size=12)
|
| 535 |
+
write_line(f"AI-Likeness Score: {text_res['bot_score']:.1f}%")
|
| 536 |
+
write_line(f"Grammar Quality: {text_res['grammar_score']:.1f}%")
|
| 537 |
+
write_line(f"Sentiment: {text_res['mood_label']}")
|
| 538 |
+
y -= 10
|
| 539 |
+
|
| 540 |
+
# Structure stats
|
| 541 |
+
write_line("Text Structure:", font="Helvetica-Bold", size=11)
|
| 542 |
+
write_line(f"- Sentences: {text_stats['num_sentences']}")
|
| 543 |
+
write_line(f"- Words: {text_stats['num_words']}")
|
| 544 |
+
write_line(f"- Average sentence length: {text_stats['avg_sentence_len']:.1f} words")
|
| 545 |
+
write_line(f"- Sentence length variance: {text_stats['var_sentence_len']:.1f}")
|
| 546 |
+
write_line(f"- Vocabulary diversity (TTR): {text_stats['ttr']:.2f}")
|
| 547 |
+
y -= 10
|
| 548 |
+
|
| 549 |
+
# Review text
|
| 550 |
+
write_line("Original Review:", font="Helvetica-Bold", size=11)
|
| 551 |
+
write_line(review_text or "[empty review]")
|
| 552 |
+
y -= 10
|
| 553 |
+
|
| 554 |
+
# Image analysis
|
| 555 |
+
if image_info is not None:
|
| 556 |
+
write_line("=== Image Analysis ===", font="Helvetica-Bold", size=12)
|
| 557 |
+
write_line(f"AI Probability: {image_info['ai_chance']:.1f}%")
|
| 558 |
+
write_line(f"Caption (approx): {image_info['caption']}")
|
| 559 |
+
y -= 10
|
| 560 |
+
|
| 561 |
+
c.showPage()
|
| 562 |
+
c.save()
|
| 563 |
+
pdf_bytes = buffer.getvalue()
|
| 564 |
+
buffer.close()
|
| 565 |
+
return pdf_bytes
|
| 566 |
+
|
| 567 |
# --- PAGES ---
|
| 568 |
|
| 569 |
def landing_page():
|
|
|
|
| 571 |
<div class="hero-box">
|
| 572 |
<div class="hero-title">π‘οΈ Review Validator</div>
|
| 573 |
<div class="hero-subtitle">
|
| 574 |
+
Advanced AI-powered review and image analysis with graphs, explainability, and exportable reports.
|
| 575 |
</div>
|
| 576 |
</div>
|
| 577 |
""", unsafe_allow_html=True)
|
|
|
|
| 581 |
st.markdown("""
|
| 582 |
<div class="feature-card">
|
| 583 |
<span class="emoji-icon">π€</span>
|
| 584 |
+
<h3>AI Text Detector</h3>
|
| 585 |
+
<p>Modern models estimate whether a review looks AI-generated or human-written.</p>
|
| 586 |
</div>
|
| 587 |
""", unsafe_allow_html=True)
|
| 588 |
with c2:
|
| 589 |
st.markdown("""
|
| 590 |
<div class="feature-card">
|
| 591 |
<span class="emoji-icon">πΈ</span>
|
| 592 |
+
<h3>Image Authenticity</h3>
|
| 593 |
+
<p>Checks if product photos look real or AI-generated, with approximate captions.</p>
|
| 594 |
</div>
|
| 595 |
""", unsafe_allow_html=True)
|
| 596 |
with c3:
|
| 597 |
st.markdown("""
|
| 598 |
<div class="feature-card">
|
| 599 |
+
<span class="emoji-icon">π</span>
|
| 600 |
+
<h3>Explainable Reports</h3>
|
| 601 |
+
<p>Graphs, breakdowns, explanations, and PDF report downloads for sharing.</p>
|
| 602 |
</div>
|
| 603 |
""", unsafe_allow_html=True)
|
| 604 |
|
|
|
|
| 654 |
if txt_input.strip():
|
| 655 |
with st.spinner("Analyzing text..."):
|
| 656 |
res = check_text(txt_input, squad)
|
| 657 |
+
stats = analyze_text_structure(txt_input)
|
| 658 |
+
st.session_state['text_res'] = (res, stats, strict_mode, platform, txt_input)
|
| 659 |
else:
|
| 660 |
st.warning("Please paste a review first.")
|
| 661 |
|
| 662 |
if 'text_res' in st.session_state:
|
| 663 |
+
res, stats, strict_mode_saved, platform_saved, review_text_saved = st.session_state['text_res']
|
| 664 |
|
| 665 |
if res.get("error"):
|
| 666 |
st.error(res.get("error_msg", "Text models failed to load."))
|
|
|
|
| 671 |
grammar_score = res['grammar_score']
|
| 672 |
mood_label = res['mood_label']
|
| 673 |
|
| 674 |
+
# Thresholds
|
| 675 |
+
if strict_mode_saved:
|
| 676 |
t_high = 90
|
| 677 |
t_mid = 70
|
| 678 |
else:
|
|
|
|
| 714 |
)
|
| 715 |
|
| 716 |
st.write("")
|
| 717 |
+
g1, g2, g3 = st.columns(3)
|
| 718 |
+
with g1:
|
| 719 |
+
st.markdown("#### π Scores")
|
| 720 |
fig = breakdown_chart(res)
|
| 721 |
+
st.pyplot(fig, use_container_width=True)
|
| 722 |
+
with g2:
|
| 723 |
+
st.markdown("#### π Sentence Lengths")
|
| 724 |
+
fig2 = sentence_length_chart(stats)
|
| 725 |
+
st.pyplot(fig2, use_container_width=True)
|
| 726 |
+
with g3:
|
| 727 |
+
st.markdown("#### π€ Top Words")
|
| 728 |
+
fig3 = word_freq_chart(stats)
|
| 729 |
+
st.pyplot(fig3, use_container_width=True)
|
| 730 |
+
|
| 731 |
+
st.markdown("#### π‘ Verdict & Explanation")
|
| 732 |
+
if verdict_type == "error":
|
| 733 |
+
st.error(verdict_text)
|
| 734 |
+
elif verdict_type == "warning":
|
| 735 |
+
st.warning(verdict_text)
|
| 736 |
+
else:
|
| 737 |
+
st.success(verdict_text)
|
| 738 |
+
|
| 739 |
+
reasons = explain_text(res, stats, strict_mode_saved)
|
| 740 |
+
for r in reasons:
|
| 741 |
+
st.markdown(f"- {r}")
|
| 742 |
|
| 743 |
+
st.markdown(
|
| 744 |
+
"<small>Note: These scores and explanations are signals, not absolute proof. "
|
| 745 |
+
"Always combine them with your own judgement.</small>",
|
| 746 |
+
unsafe_allow_html=True
|
| 747 |
+
)
|
| 748 |
+
|
| 749 |
+
# PDF report button
|
| 750 |
+
st.write("")
|
| 751 |
+
if HAVE_REPORTLAB:
|
| 752 |
+
img_info_for_pdf = st.session_state.get("img_res_for_pdf", None)
|
| 753 |
+
pdf_bytes = generate_pdf_report(
|
| 754 |
+
platform_saved,
|
| 755 |
+
review_text_saved,
|
| 756 |
+
res,
|
| 757 |
+
stats,
|
| 758 |
+
img_info_for_pdf
|
| 759 |
)
|
| 760 |
+
st.download_button(
|
| 761 |
+
"π Download PDF Report",
|
| 762 |
+
data=pdf_bytes,
|
| 763 |
+
file_name="review_validator_report.pdf",
|
| 764 |
+
mime="application/pdf",
|
| 765 |
+
)
|
| 766 |
+
else:
|
| 767 |
+
st.info("PDF report requires reportlab. Add `reportlab` to requirements.txt to enable export.")
|
| 768 |
|
| 769 |
# --- IMAGE TAB ---
|
| 770 |
with tab2:
|
|
|
|
| 808 |
data = check_image(target_img, squad)
|
| 809 |
st.session_state['img_res'] = (data, strict_img)
|
| 810 |
st.session_state['current_img'] = target_img
|
| 811 |
+
# store a simplified version for PDF report
|
| 812 |
+
st.session_state['img_res_for_pdf'] = data
|
| 813 |
|
| 814 |
with col_view:
|
| 815 |
if 'current_img' in st.session_state:
|
|
|
|
| 871 |
|
| 872 |
warnings_text = None
|
| 873 |
if err:
|
| 874 |
+
warnings_text = "Some features may be limited:<br>" + err.replace("\n", "<br>")
|
|
|
|
| 875 |
|
| 876 |
if st.session_state['page'] == 'landing':
|
| 877 |
landing_page()
|