Spaces:
Running
Running
| """ | |
| Review Validator - Final Version with SerpAPI Integration | |
| """ | |
| import os | |
| import io | |
| import warnings | |
| from collections import Counter | |
| import numpy as np | |
| import streamlit as st | |
| from transformers import pipeline, logging as hf_logging | |
| from PIL import Image | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import requests | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.platypus import ( | |
| SimpleDocTemplate, | |
| Paragraph, | |
| Spacer, | |
| Table, | |
| TableStyle, | |
| ) | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.lib import colors | |
| # ------------------- SILENCE NOISE ------------------- | |
| warnings.filterwarnings("ignore") | |
| hf_logging.set_verbosity_error() | |
| os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | |
| matplotlib.use("Agg") | |
| st.set_page_config( | |
| page_title="Review Validator", | |
| page_icon="🛡️", | |
| layout="wide", | |
| initial_sidebar_state="collapsed", | |
| ) | |
| # ------------------- MODEL NAMES ------------------- | |
| MODEL_FAKE = "openai-community/roberta-base-openai-detector" | |
| MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
| MODEL_GRAMMAR = "textattack/roberta-base-CoLA" | |
| MODEL_IMG_A = "dima806/ai_generated_image_detection" | |
| MODEL_IMG_B = "umm-maybe/AI-image-detector" | |
| MODEL_CAPTION = "Salesforce/blip-image-captioning-base" | |
| # ------------------- TOKENS / SECRETS ------------------- | |
| def get_hf_token(): | |
| token = os.environ.get("HF_TOKEN") | |
| if token: | |
| return token | |
| try: | |
| if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets: | |
| return st.secrets["HF_TOKEN"] | |
| except Exception: | |
| pass | |
| return None | |
| def get_serpapi_key(): | |
| key = os.environ.get("SERPAPI_KEY") | |
| if key: | |
| return key | |
| try: | |
| if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets: | |
| return st.secrets["SERPAPI_KEY"] | |
| except Exception: | |
| pass | |
| return None | |
| HF_TOKEN = get_hf_token() | |
| # ------------------- CSS ------------------- | |
| def inject_custom_css(): | |
| st.markdown( | |
| """ | |
| <style> | |
| .stApp { | |
| background-color: #ffffff; | |
| color: #333333; | |
| font-family: "Helvetica Neue", sans-serif; | |
| } | |
| h1 { color:#2C3E50; font-weight:800; } | |
| h2 { color:#34495E; font-weight:600; } | |
| .hero-box { | |
| padding:40px; | |
| background:linear-gradient(135deg,#667eea 0%,#764ba2 100%); | |
| border-radius:20px; | |
| color:white; | |
| text-align:center; | |
| margin-bottom:30px; | |
| } | |
| .hero-title{font-size:3rem;font-weight:bold;margin-bottom:10px;} | |
| .hero-subtitle{font-size:1.2rem;opacity:0.9;} | |
| .feature-card{ | |
| background:#F8F9FA; | |
| padding:20px; | |
| border-radius:15px; | |
| border:1px solid #EEEEEE; | |
| text-align:center; | |
| transition:transform 0.2s; | |
| } | |
| .feature-card:hover{transform:translateY(-5px);border-color:#764ba2;} | |
| .emoji-icon{font-size:3rem;margin-bottom:10px;display:block;} | |
| .stat-box{ | |
| text-align:center; | |
| padding:15px; | |
| border-radius:12px; | |
| background:white; | |
| box-shadow:0 4px 6px rgba(0,0,0,0.05); | |
| border:1px solid #EEE; | |
| } | |
| .stat-num{font-size:24px;font-weight:900;color:#333;} | |
| .stat-txt{font-size:12px;text-transform:uppercase;color:#777;letter-spacing:1px;} | |
| .analysis-box{ | |
| background:#f0f7ff; | |
| border-left:5px solid #4285F4; | |
| padding:15px; | |
| border-radius:5px; | |
| margin-top:15px; | |
| } | |
| .stButton>button{ | |
| border-radius:30px; | |
| font-weight:bold; | |
| border:none; | |
| padding:0.5rem 2rem; | |
| transition:all 0.3s; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # ------------------- LOAD MODELS ------------------- | |
| def load_ai_squad(): | |
| squad = {} | |
| if not HF_TOKEN: | |
| return None, "HF_TOKEN missing. Set it in env or Streamlit secrets." | |
| try: | |
| try: | |
| squad["fake"] = pipeline( | |
| "text-classification", model=MODEL_FAKE, token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| print("Fake model error:", e) | |
| try: | |
| squad["mood"] = pipeline( | |
| "sentiment-analysis", | |
| model=MODEL_MOOD, | |
| tokenizer=MODEL_MOOD, | |
| token=HF_TOKEN, | |
| ) | |
| except Exception as e: | |
| print("Mood model error:", e) | |
| try: | |
| squad["grammar"] = pipeline( | |
| "text-classification", model=MODEL_GRAMMAR, token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| print("Grammar model error:", e) | |
| try: | |
| squad["img_a"] = pipeline( | |
| "image-classification", model=MODEL_IMG_A, token=HF_TOKEN | |
| ) | |
| squad["img_b"] = pipeline( | |
| "image-classification", model=MODEL_IMG_B, token=HF_TOKEN | |
| ) | |
| squad["caption"] = pipeline( | |
| "image-to-text", model=MODEL_CAPTION, token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| print("Image model error:", e) | |
| except Exception as e: | |
| return None, str(e) | |
| return squad, None | |
| # ------------------- TEXT HELPERS ------------------- | |
| def compute_text_stats(text: str): | |
| sentences = [ | |
| s.strip() | |
| for s in text.replace("!", ".").replace("?", ".").split(".") | |
| if s.strip() | |
| ] | |
| words = text.split() | |
| word_count = len(words) | |
| sent_lengths = [len(s.split()) for s in sentences] if sentences else [] | |
| avg_sent_len = np.mean(sent_lengths) if sent_lengths else 0.0 | |
| vocab = {w.lower().strip(".,!?\"'") for w in words if w.strip()} | |
| vocab_size = len(vocab) | |
| ttr = (vocab_size / word_count * 100) if word_count > 0 else 0.0 | |
| cleaned = [w.lower().strip(".,!?\"'") for w in words if w.strip()] | |
| common = Counter(cleaned).most_common(8) | |
| return { | |
| "sentence_count": len(sentences), | |
| "word_count": word_count, | |
| "avg_sentence_length": avg_sent_len, | |
| "vocab_size": vocab_size, | |
| "type_token_ratio": ttr, | |
| "sentence_lengths": sent_lengths, | |
| "top_words": common, | |
| } | |
| def explain_text(res, stats): | |
| lines = [] | |
| bot = res["bot_score"] | |
| gram = res["grammar_score"] | |
| mood = res["mood_label"] | |
| if bot > 70: | |
| lines.append( | |
| "The AI-likeness score is high, indicating that the review strongly resembles machine-generated text." | |
| ) | |
| elif bot > 40: | |
| lines.append( | |
| "The AI-likeness score is in a borderline range, so the review should be treated with caution." | |
| ) | |
| else: | |
| lines.append( | |
| "The AI-likeness score is low, suggesting the review is likely human-written." | |
| ) | |
| if gram > 80: | |
| lines.append( | |
| "Grammar quality is unusually clean and consistent, which sometimes correlates with AI-written or heavily edited content." | |
| ) | |
| elif gram < 40: | |
| lines.append( | |
| "Grammar quality is weak, which can indicate spammy content but usually not advanced AI writing." | |
| ) | |
| else: | |
| lines.append( | |
| "Grammar quality is moderate and falls within a typical human writing range." | |
| ) | |
| lines.append( | |
| f"The sentiment model detects a {mood.lower()} tone, which can be cross-checked with the context of the review." | |
| ) | |
| lines.append( | |
| f"The review contains {stats['sentence_count']} sentences and {stats['word_count']} words, with an average of {stats['avg_sentence_length']:.1f} words per sentence." | |
| ) | |
| lines.append( | |
| f"The vocabulary richness (type-token ratio) is approximately {stats['type_token_ratio']:.1f}%, indicating how repetitive or diverse the language is." | |
| ) | |
| return "\n\n".join(lines) | |
| def check_text(text, squad): | |
| if "fake" not in squad: | |
| return {"error": True} | |
| res_fake = squad["fake"](text[:512])[0] | |
| bot = res_fake["score"] if res_fake["label"] == "Fake" else 1 - res_fake["score"] | |
| mood_label = "Unknown" | |
| if "mood" in squad: | |
| res_m = squad["mood"](text[:512])[0] | |
| mood_label = res_m["label"] | |
| grammar_score = 0.5 | |
| if "grammar" in squad: | |
| res_g = squad["grammar"](text[:512])[0] | |
| grammar_score = ( | |
| res_g["score"] if res_g["label"] == "LABEL_1" else 1 - res_g["score"] | |
| ) | |
| stats = compute_text_stats(text) | |
| return { | |
| "bot_score": bot * 100, | |
| "mood_label": mood_label, | |
| "grammar_score": grammar_score * 100, | |
| "stats": stats, | |
| "error": False, | |
| } | |
| # ------------------- IMAGE HELPERS ------------------- | |
| def get_image_from_url(url: str): | |
| """ | |
| Returns (PIL.Image or None, error_message or None) | |
| Handles 403 cleanly instead of throwing exceptions. | |
| """ | |
| try: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/120.0 Safari/537.36" | |
| } | |
| r = requests.get(url, headers=headers, timeout=10) | |
| if r.status_code == 403: | |
| return None, ( | |
| "The image host returned HTTP 403 (Forbidden). " | |
| "This usually means the server is blocking automated downloads. " | |
| "Download the image manually and upload it as a file instead." | |
| ) | |
| if r.status_code != 200: | |
| return None, f"Image host returned HTTP {r.status_code}." | |
| img = Image.open(io.BytesIO(r.content)).convert("RGB") | |
| return img, None | |
| except Exception as e: | |
| return None, f"Error fetching image: {e}" | |
| def check_image(img, squad): | |
| score_a = 0.0 | |
| score_b = 0.0 | |
| caption = "Analysis unavailable." | |
| ai_words = ["fake", "artificial", "ai", "generated"] | |
| if "img_a" in squad: | |
| try: | |
| for r in squad["img_a"](img): | |
| if any(w in r["label"].lower() for w in ai_words): | |
| score_a = max(score_a, r["score"]) | |
| except Exception as e: | |
| print("img_a error:", e) | |
| if "img_b" in squad: | |
| try: | |
| for r in squad["img_b"](img): | |
| if any(w in r["label"].lower() for w in ai_words): | |
| score_b = max(score_b, r["score"]) | |
| except Exception as e: | |
| print("img_b error:", e) | |
| else: | |
| score_b = score_a | |
| if "caption" in squad: | |
| try: | |
| cap_res = squad["caption"](img) | |
| caption = cap_res[0]["generated_text"] | |
| except Exception: | |
| pass | |
| avg_ai = (score_a + score_b) / 2 | |
| match = 1.0 - abs(score_a - score_b) | |
| return { | |
| "ai_chance": avg_ai * 100, | |
| "match": match, | |
| "score_a": score_a * 100, | |
| "score_b": score_b * 100, | |
| "caption": caption, | |
| } | |
| # ------------------- SERPAPI REVERSE IMAGE ------------------- | |
| def serpapi_reverse_image_search(image_url: str, api_key: str): | |
| """ | |
| Google Reverse Image Search using SerpAPI. | |
| Returns dict or None, and error_message if any. | |
| """ | |
| if not api_key: | |
| return None, "SerpAPI key not configured." | |
| if not image_url: | |
| return None, "No image URL provided." | |
| try: | |
| params = { | |
| "engine": "google_reverse_image", | |
| "image_url": image_url, | |
| "api_key": api_key, | |
| "output": "json", | |
| } | |
| resp = requests.get("https://serpapi.com/search", params=params, timeout=25) | |
| if resp.status_code == 403: | |
| return None, ( | |
| "SerpAPI returned HTTP 403 (Forbidden). " | |
| "Check that the API key is valid and you have enough quota." | |
| ) | |
| if resp.status_code != 200: | |
| return None, f"SerpAPI HTTP {resp.status_code}: {resp.text[:180]}" | |
| data = resp.json() | |
| result = { | |
| "best_guess": data.get("image_guess"), | |
| "visual_matches": data.get("visual_matches", []), | |
| } | |
| return result, None | |
| except Exception as e: | |
| return None, f"Error calling SerpAPI: {e}" | |
| # ------------------- PLOTS ------------------- | |
| def breakdown_chart(res): | |
| labels = ["Bot Probability", "Grammar Quality"] | |
| vals = [res["bot_score"], res["grammar_score"]] | |
| fig, ax = plt.subplots(figsize=(4, 2.2)) | |
| y = np.arange(len(labels)) | |
| ax.barh(y, vals) | |
| ax.set_yticks(y) | |
| ax.set_yticklabels(labels) | |
| ax.invert_yaxis() | |
| ax.set_xlim(0, 100) | |
| for i, v in enumerate(vals): | |
| ax.text(v + 1, i, f"{v:.0f}%", va="center", fontsize=8) | |
| plt.tight_layout() | |
| return fig | |
| def sentence_length_hist(stats): | |
| fig, ax = plt.subplots(figsize=(4, 2.2)) | |
| if stats["sentence_lengths"]: | |
| ax.hist( | |
| stats["sentence_lengths"], | |
| bins=min(8, len(stats["sentence_lengths"])), | |
| ) | |
| ax.set_xlabel("Words per sentence") | |
| ax.set_ylabel("Frequency") | |
| ax.set_title("Sentence Length Distribution") | |
| plt.tight_layout() | |
| return fig | |
| def word_frequency_chart(stats): | |
| fig, ax = plt.subplots(figsize=(4, 2.2)) | |
| top = stats["top_words"] | |
| if top: | |
| words = [w for w, _ in top] | |
| counts = [c for _, c in top] | |
| ax.bar(words, counts) | |
| ax.set_xticklabels(words, rotation=45, ha="right", fontsize=8) | |
| ax.set_title("Top Word Frequency") | |
| plt.tight_layout() | |
| return fig | |
| # ------------------- PDF REPORT ------------------- | |
| def generate_pdf(text_input, text_res, image_res, reverse_res, platform): | |
| buf = io.BytesIO() | |
| doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=30, rightMargin=30) | |
| styles = getSampleStyleSheet() | |
| elems = [] | |
| elems.append(Paragraph("Review Validator Report", styles["Title"])) | |
| elems.append(Spacer(1, 6)) | |
| elems.append(Paragraph(f"Platform: {platform}", styles["Normal"])) | |
| elems.append(Spacer(1, 10)) | |
| if text_input: | |
| elems.append(Paragraph("Input Review Text", styles["Heading2"])) | |
| elems.append(Spacer(1, 4)) | |
| safe = text_input.replace("\n", "<br/>") | |
| elems.append(Paragraph(safe, styles["Normal"])) | |
| elems.append(Spacer(1, 8)) | |
| if text_res and not text_res.get("error", False): | |
| stats = text_res["stats"] | |
| elems.append(Paragraph("Text Authenticity Analysis", styles["Heading2"])) | |
| data = [ | |
| ["Bot-likeness", f"{text_res['bot_score']:.1f}%"], | |
| ["Grammar Quality", f"{text_res['grammar_score']:.1f}%"], | |
| ["Sentiment", text_res["mood_label"]], | |
| ["Sentence Count", str(stats["sentence_count"])], | |
| ["Word Count", str(stats["word_count"])], | |
| ["Avg. Sentence Length", f"{stats['avg_sentence_length']:.1f}"], | |
| ["Type-Token Ratio", f"{stats['type_token_ratio']:.1f}%"], | |
| ] | |
| tbl = Table(data, hAlign="LEFT") | |
| tbl.setStyle( | |
| TableStyle( | |
| [ | |
| ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey), | |
| ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), | |
| ("BOX", (0, 0), (-1, -1), 0.25, colors.black), | |
| ] | |
| ) | |
| ) | |
| elems.append(tbl) | |
| elems.append(Spacer(1, 8)) | |
| explanation = explain_text(text_res, stats) | |
| elems.append(Paragraph("Interpretation", styles["Heading3"])) | |
| for para in explanation.split("\n\n"): | |
| elems.append(Paragraph(para, styles["Normal"])) | |
| elems.append(Spacer(1, 3)) | |
| if image_res: | |
| elems.append(Spacer(1, 8)) | |
| elems.append(Paragraph("Image Authenticity Analysis", styles["Heading2"])) | |
| data2 = [ | |
| ["AI-likeness (avg)", f"{image_res['ai_chance']:.1f}%"], | |
| ["Model A Score", f"{image_res['score_a']:.1f}%"], | |
| ["Model B Score", f"{image_res['score_b']:.1f}%"], | |
| ["Model Agreement", f"{image_res['match']*100:.1f}%"], | |
| ] | |
| t2 = Table(data2, hAlign="LEFT") | |
| t2.setStyle( | |
| TableStyle( | |
| [ | |
| ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey), | |
| ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), | |
| ("BOX", (0, 0), (-1, -1), 0.25, colors.black), | |
| ] | |
| ) | |
| ) | |
| elems.append(t2) | |
| elems.append(Spacer(1, 4)) | |
| elems.append(Paragraph(f"Caption: {image_res['caption']}", styles["Normal"])) | |
| if reverse_res: | |
| elems.append(Spacer(1, 8)) | |
| elems.append(Paragraph("Reverse Image Search (SerpAPI)", styles["Heading2"])) | |
| best = reverse_res.get("best_guess") | |
| count = reverse_res.get("count", 0) | |
| elems.append(Paragraph(f"Visual matches found: {count}", styles["Normal"])) | |
| if best: | |
| elems.append(Paragraph(f"Google best guess: {best}", styles["Normal"])) | |
| links = reverse_res.get("top_links", []) | |
| if links: | |
| elems.append(Spacer(1, 4)) | |
| elems.append(Paragraph("Top Matching Sources:", styles["Heading3"])) | |
| for item in links: | |
| line = f"{item.get('title') or item.get('link')} (source: {item.get('source')})" | |
| elems.append(Paragraph(line, styles["Normal"])) | |
| elems.append(Spacer(1, 2)) | |
| doc.build(elems) | |
| pdf_bytes = buf.getvalue() | |
| buf.close() | |
| return pdf_bytes | |
| # ------------------- UI: LANDING ------------------- | |
| def landing_page(): | |
| st.markdown( | |
| """ | |
| <div class="hero-box"> | |
| <div class="hero-title">🛡️ Review Validator</div> | |
| <div class="hero-subtitle"> | |
| Detect AI-written reviews, AI-generated product images, and reused images via Google Reverse Image Search. | |
| </div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| c1, c2, c3 = st.columns(3) | |
| with c1: | |
| st.markdown( | |
| """ | |
| <div class="feature-card"> | |
| <span class="emoji-icon">🤖</span> | |
| <h3>Text Authenticity</h3> | |
| <p>Transformer-based models estimate how likely a review is written by AI.</p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| with c2: | |
| st.markdown( | |
| """ | |
| <div class="feature-card"> | |
| <span class="emoji-icon">📸</span> | |
| <h3>Image Authenticity</h3> | |
| <p>Dual detectors and captioning analyze whether an image is real or AI-generated.</p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| with c3: | |
| st.markdown( | |
| """ | |
| <div class="feature-card"> | |
| <span class="emoji-icon">🔎</span> | |
| <h3>Reverse Search</h3> | |
| <p>SerpAPI + Google Reverse Image API to see where else the image appears online.</p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| _, mid, _ = st.columns([1, 2, 1]) | |
| with mid: | |
| if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True): | |
| st.session_state["page"] = "detector" | |
| st.rerun() | |
| # ------------------- UI: DETECTOR ------------------- | |
| def detector_page(squad): | |
| c1, c2 = st.columns([3, 1]) | |
| with c1: | |
| st.markdown("### 🛒 Select Platform") | |
| platform = st.selectbox( | |
| "Platform", ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"], | |
| label_visibility="collapsed", | |
| ) | |
| with c2: | |
| if st.button("⬅️ Back Home"): | |
| st.session_state["page"] = "landing" | |
| st.rerun() | |
| st.divider() | |
| tab_text, tab_img = st.tabs(["📝 Text Review", "📸 Product Image"]) | |
| # -------- TEXT TAB -------- | |
| with tab_text: | |
| col_left, col_right = st.columns([2, 1]) | |
| with col_left: | |
| txt = st.text_area( | |
| "Paste Review Here:", | |
| height=180, | |
| placeholder="Example: I ordered this yesterday and it exceeded expectations...", | |
| ) | |
| with col_right: | |
| st.info("Tip: Paste full review text for more accurate analysis.") | |
| if st.button("Analyze Text", type="primary", use_container_width=True): | |
| if not txt.strip(): | |
| st.error("Please paste a review first.") | |
| else: | |
| with st.spinner("Analyzing review..."): | |
| res = check_text(txt.strip(), squad) | |
| st.session_state["text_res"] = res | |
| st.session_state["text_raw"] = txt.strip() | |
| st.session_state["platform"] = platform | |
| if "text_res" in st.session_state: | |
| res = st.session_state["text_res"] | |
| if res.get("error"): | |
| st.error("Text models failed to load. Check HF_TOKEN.") | |
| else: | |
| stats = res["stats"] | |
| st.markdown("---") | |
| k1, k2, k3 = st.columns(3) | |
| color = "red" if res["bot_score"] > 50 else "green" | |
| k1.markdown( | |
| f'<div class="stat-box"><div class="stat-num" style="color:{color}">{res["bot_score"]:.0f}%</div><div class="stat-txt">Bot Chance</div></div>', | |
| unsafe_allow_html=True, | |
| ) | |
| k2.markdown( | |
| f'<div class="stat-box"><div class="stat-num">{res["grammar_score"]:.0f}%</div><div class="stat-txt">Grammar</div></div>', | |
| unsafe_allow_html=True, | |
| ) | |
| k3.markdown( | |
| f'<div class="stat-box"><div class="stat-num">{stats["word_count"]}</div><div class="stat-txt">Total Words</div></div>', | |
| unsafe_allow_html=True, | |
| ) | |
| g1, g2, g3 = st.columns(3) | |
| with g1: | |
| st.pyplot(breakdown_chart(res)) | |
| with g2: | |
| st.pyplot(sentence_length_hist(stats)) | |
| with g3: | |
| st.pyplot(word_frequency_chart(stats)) | |
| st.markdown("#### Explanation") | |
| st.markdown(explain_text(res, stats)) | |
| st.markdown("---") | |
| if st.button("Generate PDF (Text Only)", use_container_width=False): | |
| pdf = generate_pdf( | |
| st.session_state.get("text_raw", ""), | |
| res, | |
| st.session_state.get("img_res"), | |
| st.session_state.get("reverse_search_results"), | |
| st.session_state.get("platform", platform), | |
| ) | |
| st.session_state["pdf_text"] = pdf | |
| if "pdf_text" in st.session_state: | |
| st.download_button( | |
| "⬇️ Download Text Analysis PDF", | |
| data=st.session_state["pdf_text"], | |
| file_name="review_validator_text.pdf", | |
| mime="application/pdf", | |
| ) | |
| # -------- IMAGE TAB -------- | |
| with tab_img: | |
| col_in, col_out = st.columns([1, 1]) | |
| with col_in: | |
| st.markdown("#### Step 1: Provide Image") | |
| method = st.radio( | |
| "Input type", | |
| ["Paste URL", "Upload File"], | |
| horizontal=True, | |
| label_visibility="collapsed", | |
| ) | |
| with st.form("image_form"): | |
| img_file = None | |
| url = "" | |
| auto_reverse = False | |
| if method == "Paste URL": | |
| url = st.text_input("Image URL") | |
| auto_reverse = st.checkbox( | |
| "Also perform Google Reverse Image Search on this URL", | |
| value=True, | |
| ) | |
| else: | |
| img_file = st.file_uploader( | |
| "Upload Image", type=["jpg", "jpeg", "png"] | |
| ) | |
| submitted = st.form_submit_button( | |
| "Analyze Image", type="primary", use_container_width=True | |
| ) | |
| if submitted: | |
| target = None | |
| err_msg = None | |
| if method == "Paste URL": | |
| if not url.strip(): | |
| st.error("Please enter a valid image URL.") | |
| else: | |
| img, err = get_image_from_url(url.strip()) | |
| if err: | |
| st.error(err) | |
| else: | |
| target = img | |
| st.session_state["last_image_url"] = url.strip() | |
| else: | |
| if not img_file: | |
| st.error("Please upload an image file.") | |
| else: | |
| try: | |
| target = Image.open(img_file).convert("RGB") | |
| st.session_state["last_image_url"] = None | |
| except Exception as e: | |
| st.error(f"Error reading image: {e}") | |
| if target is not None: | |
| with st.spinner("Running image authenticity checks..."): | |
| img_res = check_image(target, squad) | |
| st.session_state["current_img"] = target | |
| st.session_state["img_res"] = img_res | |
| # Auto reverse search if URL + checkbox + key available | |
| if method == "Paste URL" and auto_reverse: | |
| serp_key = get_serpapi_key() | |
| if not serp_key: | |
| st.warning( | |
| "SerpAPI key not configured. Skipping reverse image search." | |
| ) | |
| else: | |
| with st.spinner("Performing reverse image search via SerpAPI..."): | |
| rev, err = serpapi_reverse_image_search( | |
| url.strip(), serp_key | |
| ) | |
| if err: | |
| st.error(err) | |
| elif rev: | |
| matches = rev.get("visual_matches", []) | |
| st.session_state["reverse_search_results"] = { | |
| "best_guess": rev.get("best_guess"), | |
| "count": len(matches), | |
| "top_links": [ | |
| { | |
| "title": m.get("title"), | |
| "link": m.get("link"), | |
| "source": m.get("source"), | |
| } | |
| for m in matches[:5] | |
| ], | |
| } | |
| with col_out: | |
| if "current_img" in st.session_state: | |
| st.image( | |
| st.session_state["current_img"], | |
| use_column_width=True, | |
| caption="Analyzed Image", | |
| ) | |
| if "img_res" in st.session_state: | |
| data = st.session_state["img_res"] | |
| ai = data["ai_chance"] | |
| st.markdown("#### Step 2: Image Analysis Result") | |
| st.markdown( | |
| f""" | |
| <div class="analysis-box"> | |
| <strong>Visual Caption:</strong><br/> | |
| {data['caption']} | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| if data["match"] < 0.6: | |
| st.warning( | |
| "Detectors disagree significantly. Image may be heavily edited or ambiguous." | |
| ) | |
| elif ai > 60: | |
| st.error(f"Likely AI-generated image ({ai:.0f}% probability).") | |
| else: | |
| st.success( | |
| f"Likely real photograph ({100 - ai:.0f}% probability)." | |
| ) | |
| st.progress(ai / 100.0, text=f"AI-likeness: {ai:.1f}%") | |
| with st.expander("Detector Breakdown"): | |
| st.write(f"Model A: {data['score_a']:.1f}%") | |
| st.write(f"Model B: {data['score_b']:.1f}%") | |
| st.write(f"Agreement: {data['match']*100:.1f}%") | |
| st.markdown("---") | |
| st.markdown("### 🔎 Reverse Image Search (Manual Call)") | |
| r_col1, r_col2 = st.columns([2, 1]) | |
| with r_col1: | |
| manual_url = st.text_input( | |
| "Public image URL (optional, for manual reverse search):", | |
| value=st.session_state.get("last_image_url", "") or "", | |
| ) | |
| with r_col2: | |
| if st.button("Run Reverse Search", use_container_width=True): | |
| key = get_serpapi_key() | |
| if not key: | |
| st.error("SerpAPI key not configured.") | |
| elif not manual_url.strip(): | |
| st.error("Please enter an image URL.") | |
| else: | |
| with st.spinner("Calling SerpAPI Google Reverse Image API..."): | |
| rev, err = serpapi_reverse_image_search( | |
| manual_url.strip(), key | |
| ) | |
| if err: | |
| st.error(err) | |
| elif rev: | |
| matches = rev.get("visual_matches", []) | |
| st.success("Reverse image search completed.") | |
| if rev.get("best_guess"): | |
| st.write(f"Google best guess: {rev['best_guess']}") | |
| st.write(f"Total visual matches: {len(matches)}") | |
| if matches: | |
| st.markdown("**Top sources:**") | |
| for m in matches[:5]: | |
| st.markdown( | |
| f"- [{m.get('title') or m.get('link')}]({m.get('link')}) _(source: {m.get('source')})_" | |
| ) | |
| st.session_state["reverse_search_results"] = { | |
| "best_guess": rev.get("best_guess"), | |
| "count": len(matches), | |
| "top_links": [ | |
| { | |
| "title": m.get("title"), | |
| "link": m.get("link"), | |
| "source": m.get("source"), | |
| } | |
| for m in matches[:5] | |
| ], | |
| } | |
| st.markdown("---") | |
| if st.button("Generate Full PDF (Text + Image + Reverse)", use_container_width=False): | |
| pdf_full = generate_pdf( | |
| st.session_state.get("text_raw", ""), | |
| st.session_state.get("text_res"), | |
| st.session_state.get("img_res"), | |
| st.session_state.get("reverse_search_results"), | |
| st.session_state.get("platform", "Unknown"), | |
| ) | |
| st.session_state["pdf_full"] = pdf_full | |
| if "pdf_full" in st.session_state: | |
| st.download_button( | |
| "⬇️ Download Full Analysis PDF", | |
| data=st.session_state["pdf_full"], | |
| file_name="review_validator_full.pdf", | |
| mime="application/pdf", | |
| ) | |
| # ------------------- MAIN ------------------- | |
| def main(): | |
| inject_custom_css() | |
| if "page" not in st.session_state: | |
| st.session_state["page"] = "landing" | |
| with st.spinner("Loading AI models..."): | |
| squad, err = load_ai_squad() | |
| if not squad: | |
| st.error(err) | |
| return | |
| if st.session_state["page"] == "landing": | |
| landing_page() | |
| else: | |
| detector_page(squad) | |
| if __name__ == "__main__": | |
| main() | |