""" Review Validator - Final Version with SerpAPI Integration """ import os import io import warnings from collections import Counter import numpy as np import streamlit as st from transformers import pipeline, logging as hf_logging from PIL import Image import matplotlib import matplotlib.pyplot as plt import requests from reportlab.lib.pagesizes import A4 from reportlab.platypus import ( SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, ) from reportlab.lib.styles import getSampleStyleSheet from reportlab.lib import colors # ------------------- SILENCE NOISE ------------------- warnings.filterwarnings("ignore") hf_logging.set_verbosity_error() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" matplotlib.use("Agg") st.set_page_config( page_title="Review Validator", page_icon="🛡️", layout="wide", initial_sidebar_state="collapsed", ) # ------------------- MODEL NAMES ------------------- MODEL_FAKE = "openai-community/roberta-base-openai-detector" MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest" MODEL_GRAMMAR = "textattack/roberta-base-CoLA" MODEL_IMG_A = "dima806/ai_generated_image_detection" MODEL_IMG_B = "umm-maybe/AI-image-detector" MODEL_CAPTION = "Salesforce/blip-image-captioning-base" # ------------------- TOKENS / SECRETS ------------------- def get_hf_token(): token = os.environ.get("HF_TOKEN") if token: return token try: if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets: return st.secrets["HF_TOKEN"] except Exception: pass return None def get_serpapi_key(): key = os.environ.get("SERPAPI_KEY") if key: return key try: if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets: return st.secrets["SERPAPI_KEY"] except Exception: pass return None HF_TOKEN = get_hf_token() # ------------------- CSS ------------------- def inject_custom_css(): st.markdown( """ """, unsafe_allow_html=True, ) # ------------------- LOAD MODELS ------------------- @st.cache_resource(show_spinner=False) def load_ai_squad(): squad = {} if not HF_TOKEN: return None, "HF_TOKEN missing. Set it in env or Streamlit secrets." try: try: squad["fake"] = pipeline( "text-classification", model=MODEL_FAKE, token=HF_TOKEN ) except Exception as e: print("Fake model error:", e) try: squad["mood"] = pipeline( "sentiment-analysis", model=MODEL_MOOD, tokenizer=MODEL_MOOD, token=HF_TOKEN, ) except Exception as e: print("Mood model error:", e) try: squad["grammar"] = pipeline( "text-classification", model=MODEL_GRAMMAR, token=HF_TOKEN ) except Exception as e: print("Grammar model error:", e) try: squad["img_a"] = pipeline( "image-classification", model=MODEL_IMG_A, token=HF_TOKEN ) squad["img_b"] = pipeline( "image-classification", model=MODEL_IMG_B, token=HF_TOKEN ) squad["caption"] = pipeline( "image-to-text", model=MODEL_CAPTION, token=HF_TOKEN ) except Exception as e: print("Image model error:", e) except Exception as e: return None, str(e) return squad, None # ------------------- TEXT HELPERS ------------------- def compute_text_stats(text: str): sentences = [ s.strip() for s in text.replace("!", ".").replace("?", ".").split(".") if s.strip() ] words = text.split() word_count = len(words) sent_lengths = [len(s.split()) for s in sentences] if sentences else [] avg_sent_len = np.mean(sent_lengths) if sent_lengths else 0.0 vocab = {w.lower().strip(".,!?\"'") for w in words if w.strip()} vocab_size = len(vocab) ttr = (vocab_size / word_count * 100) if word_count > 0 else 0.0 cleaned = [w.lower().strip(".,!?\"'") for w in words if w.strip()] common = Counter(cleaned).most_common(8) return { "sentence_count": len(sentences), "word_count": word_count, "avg_sentence_length": avg_sent_len, "vocab_size": vocab_size, "type_token_ratio": ttr, "sentence_lengths": sent_lengths, "top_words": common, } def explain_text(res, stats): lines = [] bot = res["bot_score"] gram = res["grammar_score"] mood = res["mood_label"] if bot > 70: lines.append( "The AI-likeness score is high, indicating that the review strongly resembles machine-generated text." ) elif bot > 40: lines.append( "The AI-likeness score is in a borderline range, so the review should be treated with caution." ) else: lines.append( "The AI-likeness score is low, suggesting the review is likely human-written." ) if gram > 80: lines.append( "Grammar quality is unusually clean and consistent, which sometimes correlates with AI-written or heavily edited content." ) elif gram < 40: lines.append( "Grammar quality is weak, which can indicate spammy content but usually not advanced AI writing." ) else: lines.append( "Grammar quality is moderate and falls within a typical human writing range." ) lines.append( f"The sentiment model detects a {mood.lower()} tone, which can be cross-checked with the context of the review." ) lines.append( f"The review contains {stats['sentence_count']} sentences and {stats['word_count']} words, with an average of {stats['avg_sentence_length']:.1f} words per sentence." ) lines.append( f"The vocabulary richness (type-token ratio) is approximately {stats['type_token_ratio']:.1f}%, indicating how repetitive or diverse the language is." ) return "\n\n".join(lines) def check_text(text, squad): if "fake" not in squad: return {"error": True} res_fake = squad["fake"](text[:512])[0] bot = res_fake["score"] if res_fake["label"] == "Fake" else 1 - res_fake["score"] mood_label = "Unknown" if "mood" in squad: res_m = squad["mood"](text[:512])[0] mood_label = res_m["label"] grammar_score = 0.5 if "grammar" in squad: res_g = squad["grammar"](text[:512])[0] grammar_score = ( res_g["score"] if res_g["label"] == "LABEL_1" else 1 - res_g["score"] ) stats = compute_text_stats(text) return { "bot_score": bot * 100, "mood_label": mood_label, "grammar_score": grammar_score * 100, "stats": stats, "error": False, } # ------------------- IMAGE HELPERS ------------------- def get_image_from_url(url: str): """ Returns (PIL.Image or None, error_message or None) Handles 403 cleanly instead of throwing exceptions. """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0 Safari/537.36" } r = requests.get(url, headers=headers, timeout=10) if r.status_code == 403: return None, ( "The image host returned HTTP 403 (Forbidden). " "This usually means the server is blocking automated downloads. " "Download the image manually and upload it as a file instead." ) if r.status_code != 200: return None, f"Image host returned HTTP {r.status_code}." img = Image.open(io.BytesIO(r.content)).convert("RGB") return img, None except Exception as e: return None, f"Error fetching image: {e}" def check_image(img, squad): score_a = 0.0 score_b = 0.0 caption = "Analysis unavailable." ai_words = ["fake", "artificial", "ai", "generated"] if "img_a" in squad: try: for r in squad["img_a"](img): if any(w in r["label"].lower() for w in ai_words): score_a = max(score_a, r["score"]) except Exception as e: print("img_a error:", e) if "img_b" in squad: try: for r in squad["img_b"](img): if any(w in r["label"].lower() for w in ai_words): score_b = max(score_b, r["score"]) except Exception as e: print("img_b error:", e) else: score_b = score_a if "caption" in squad: try: cap_res = squad["caption"](img) caption = cap_res[0]["generated_text"] except Exception: pass avg_ai = (score_a + score_b) / 2 match = 1.0 - abs(score_a - score_b) return { "ai_chance": avg_ai * 100, "match": match, "score_a": score_a * 100, "score_b": score_b * 100, "caption": caption, } # ------------------- SERPAPI REVERSE IMAGE ------------------- def serpapi_reverse_image_search(image_url: str, api_key: str): """ Google Reverse Image Search using SerpAPI. Returns dict or None, and error_message if any. """ if not api_key: return None, "SerpAPI key not configured." if not image_url: return None, "No image URL provided." try: params = { "engine": "google_reverse_image", "image_url": image_url, "api_key": api_key, "output": "json", } resp = requests.get("https://serpapi.com/search", params=params, timeout=25) if resp.status_code == 403: return None, ( "SerpAPI returned HTTP 403 (Forbidden). " "Check that the API key is valid and you have enough quota." ) if resp.status_code != 200: return None, f"SerpAPI HTTP {resp.status_code}: {resp.text[:180]}" data = resp.json() result = { "best_guess": data.get("image_guess"), "visual_matches": data.get("visual_matches", []), } return result, None except Exception as e: return None, f"Error calling SerpAPI: {e}" # ------------------- PLOTS ------------------- def breakdown_chart(res): labels = ["Bot Probability", "Grammar Quality"] vals = [res["bot_score"], res["grammar_score"]] fig, ax = plt.subplots(figsize=(4, 2.2)) y = np.arange(len(labels)) ax.barh(y, vals) ax.set_yticks(y) ax.set_yticklabels(labels) ax.invert_yaxis() ax.set_xlim(0, 100) for i, v in enumerate(vals): ax.text(v + 1, i, f"{v:.0f}%", va="center", fontsize=8) plt.tight_layout() return fig def sentence_length_hist(stats): fig, ax = plt.subplots(figsize=(4, 2.2)) if stats["sentence_lengths"]: ax.hist( stats["sentence_lengths"], bins=min(8, len(stats["sentence_lengths"])), ) ax.set_xlabel("Words per sentence") ax.set_ylabel("Frequency") ax.set_title("Sentence Length Distribution") plt.tight_layout() return fig def word_frequency_chart(stats): fig, ax = plt.subplots(figsize=(4, 2.2)) top = stats["top_words"] if top: words = [w for w, _ in top] counts = [c for _, c in top] ax.bar(words, counts) ax.set_xticklabels(words, rotation=45, ha="right", fontsize=8) ax.set_title("Top Word Frequency") plt.tight_layout() return fig # ------------------- PDF REPORT ------------------- def generate_pdf(text_input, text_res, image_res, reverse_res, platform): buf = io.BytesIO() doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=30, rightMargin=30) styles = getSampleStyleSheet() elems = [] elems.append(Paragraph("Review Validator Report", styles["Title"])) elems.append(Spacer(1, 6)) elems.append(Paragraph(f"Platform: {platform}", styles["Normal"])) elems.append(Spacer(1, 10)) if text_input: elems.append(Paragraph("Input Review Text", styles["Heading2"])) elems.append(Spacer(1, 4)) safe = text_input.replace("\n", "
") elems.append(Paragraph(safe, styles["Normal"])) elems.append(Spacer(1, 8)) if text_res and not text_res.get("error", False): stats = text_res["stats"] elems.append(Paragraph("Text Authenticity Analysis", styles["Heading2"])) data = [ ["Bot-likeness", f"{text_res['bot_score']:.1f}%"], ["Grammar Quality", f"{text_res['grammar_score']:.1f}%"], ["Sentiment", text_res["mood_label"]], ["Sentence Count", str(stats["sentence_count"])], ["Word Count", str(stats["word_count"])], ["Avg. Sentence Length", f"{stats['avg_sentence_length']:.1f}"], ["Type-Token Ratio", f"{stats['type_token_ratio']:.1f}%"], ] tbl = Table(data, hAlign="LEFT") tbl.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey), ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), ("BOX", (0, 0), (-1, -1), 0.25, colors.black), ] ) ) elems.append(tbl) elems.append(Spacer(1, 8)) explanation = explain_text(text_res, stats) elems.append(Paragraph("Interpretation", styles["Heading3"])) for para in explanation.split("\n\n"): elems.append(Paragraph(para, styles["Normal"])) elems.append(Spacer(1, 3)) if image_res: elems.append(Spacer(1, 8)) elems.append(Paragraph("Image Authenticity Analysis", styles["Heading2"])) data2 = [ ["AI-likeness (avg)", f"{image_res['ai_chance']:.1f}%"], ["Model A Score", f"{image_res['score_a']:.1f}%"], ["Model B Score", f"{image_res['score_b']:.1f}%"], ["Model Agreement", f"{image_res['match']*100:.1f}%"], ] t2 = Table(data2, hAlign="LEFT") t2.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey), ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), ("BOX", (0, 0), (-1, -1), 0.25, colors.black), ] ) ) elems.append(t2) elems.append(Spacer(1, 4)) elems.append(Paragraph(f"Caption: {image_res['caption']}", styles["Normal"])) if reverse_res: elems.append(Spacer(1, 8)) elems.append(Paragraph("Reverse Image Search (SerpAPI)", styles["Heading2"])) best = reverse_res.get("best_guess") count = reverse_res.get("count", 0) elems.append(Paragraph(f"Visual matches found: {count}", styles["Normal"])) if best: elems.append(Paragraph(f"Google best guess: {best}", styles["Normal"])) links = reverse_res.get("top_links", []) if links: elems.append(Spacer(1, 4)) elems.append(Paragraph("Top Matching Sources:", styles["Heading3"])) for item in links: line = f"{item.get('title') or item.get('link')} (source: {item.get('source')})" elems.append(Paragraph(line, styles["Normal"])) elems.append(Spacer(1, 2)) doc.build(elems) pdf_bytes = buf.getvalue() buf.close() return pdf_bytes # ------------------- UI: LANDING ------------------- def landing_page(): st.markdown( """

🛡️ Review Validator

Detect AI-written reviews, AI-generated product images, and reused images via Google Reverse Image Search.

""", unsafe_allow_html=True, ) c1, c2, c3 = st.columns(3) with c1: st.markdown( """

🤖

Text Authenticity

Transformer-based models estimate how likely a review is written by AI.

""", unsafe_allow_html=True, ) with c2: st.markdown( """

📸

Image Authenticity

Dual detectors and captioning analyze whether an image is real or AI-generated.

""", unsafe_allow_html=True, ) with c3: st.markdown( """

🔎

Reverse Search

SerpAPI + Google Reverse Image API to see where else the image appears online.

""", unsafe_allow_html=True, ) _, mid, _ = st.columns([1, 2, 1]) with mid: if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True): st.session_state["page"] = "detector" st.rerun() # ------------------- UI: DETECTOR ------------------- def detector_page(squad): c1, c2 = st.columns([3, 1]) with c1: st.markdown("### 🛒 Select Platform") platform = st.selectbox( "Platform", ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"], label_visibility="collapsed", ) with c2: if st.button("⬅️ Back Home"): st.session_state["page"] = "landing" st.rerun() st.divider() tab_text, tab_img = st.tabs(["📝 Text Review", "📸 Product Image"]) # -------- TEXT TAB -------- with tab_text: col_left, col_right = st.columns([2, 1]) with col_left: txt = st.text_area( "Paste Review Here:", height=180, placeholder="Example: I ordered this yesterday and it exceeded expectations...", ) with col_right: st.info("Tip: Paste full review text for more accurate analysis.") if st.button("Analyze Text", type="primary", use_container_width=True): if not txt.strip(): st.error("Please paste a review first.") else: with st.spinner("Analyzing review..."): res = check_text(txt.strip(), squad) st.session_state["text_res"] = res st.session_state["text_raw"] = txt.strip() st.session_state["platform"] = platform if "text_res" in st.session_state: res = st.session_state["text_res"] if res.get("error"): st.error("Text models failed to load. Check HF_TOKEN.") else: stats = res["stats"] st.markdown("---") k1, k2, k3 = st.columns(3) color = "red" if res["bot_score"] > 50 else "green" k1.markdown( f'

{res["bot_score"]:.0f}%

Bot Chance

', unsafe_allow_html=True, ) k2.markdown( f'

{res["grammar_score"]:.0f}%

Grammar

', unsafe_allow_html=True, ) k3.markdown( f'

{stats["word_count"]}

Total Words

', unsafe_allow_html=True, ) g1, g2, g3 = st.columns(3) with g1: st.pyplot(breakdown_chart(res)) with g2: st.pyplot(sentence_length_hist(stats)) with g3: st.pyplot(word_frequency_chart(stats)) st.markdown("#### Explanation") st.markdown(explain_text(res, stats)) st.markdown("---") if st.button("Generate PDF (Text Only)", use_container_width=False): pdf = generate_pdf( st.session_state.get("text_raw", ""), res, st.session_state.get("img_res"), st.session_state.get("reverse_search_results"), st.session_state.get("platform", platform), ) st.session_state["pdf_text"] = pdf if "pdf_text" in st.session_state: st.download_button( "⬇️ Download Text Analysis PDF", data=st.session_state["pdf_text"], file_name="review_validator_text.pdf", mime="application/pdf", ) # -------- IMAGE TAB -------- with tab_img: col_in, col_out = st.columns([1, 1]) with col_in: st.markdown("#### Step 1: Provide Image") method = st.radio( "Input type", ["Paste URL", "Upload File"], horizontal=True, label_visibility="collapsed", ) with st.form("image_form"): img_file = None url = "" auto_reverse = False if method == "Paste URL": url = st.text_input("Image URL") auto_reverse = st.checkbox( "Also perform Google Reverse Image Search on this URL", value=True, ) else: img_file = st.file_uploader( "Upload Image", type=["jpg", "jpeg", "png"] ) submitted = st.form_submit_button( "Analyze Image", type="primary", use_container_width=True ) if submitted: target = None err_msg = None if method == "Paste URL": if not url.strip(): st.error("Please enter a valid image URL.") else: img, err = get_image_from_url(url.strip()) if err: st.error(err) else: target = img st.session_state["last_image_url"] = url.strip() else: if not img_file: st.error("Please upload an image file.") else: try: target = Image.open(img_file).convert("RGB") st.session_state["last_image_url"] = None except Exception as e: st.error(f"Error reading image: {e}") if target is not None: with st.spinner("Running image authenticity checks..."): img_res = check_image(target, squad) st.session_state["current_img"] = target st.session_state["img_res"] = img_res # Auto reverse search if URL + checkbox + key available if method == "Paste URL" and auto_reverse: serp_key = get_serpapi_key() if not serp_key: st.warning( "SerpAPI key not configured. Skipping reverse image search." ) else: with st.spinner("Performing reverse image search via SerpAPI..."): rev, err = serpapi_reverse_image_search( url.strip(), serp_key ) if err: st.error(err) elif rev: matches = rev.get("visual_matches", []) st.session_state["reverse_search_results"] = { "best_guess": rev.get("best_guess"), "count": len(matches), "top_links": [ { "title": m.get("title"), "link": m.get("link"), "source": m.get("source"), } for m in matches[:5] ], } with col_out: if "current_img" in st.session_state: st.image( st.session_state["current_img"], use_column_width=True, caption="Analyzed Image", ) if "img_res" in st.session_state: data = st.session_state["img_res"] ai = data["ai_chance"] st.markdown("#### Step 2: Image Analysis Result") st.markdown( f"""

Visual Caption:
{data['caption']}

""", unsafe_allow_html=True, ) if data["match"] < 0.6: st.warning( "Detectors disagree significantly. Image may be heavily edited or ambiguous." ) elif ai > 60: st.error(f"Likely AI-generated image ({ai:.0f}% probability).") else: st.success( f"Likely real photograph ({100 - ai:.0f}% probability)." ) st.progress(ai / 100.0, text=f"AI-likeness: {ai:.1f}%") with st.expander("Detector Breakdown"): st.write(f"Model A: {data['score_a']:.1f}%") st.write(f"Model B: {data['score_b']:.1f}%") st.write(f"Agreement: {data['match']*100:.1f}%") st.markdown("---") st.markdown("### 🔎 Reverse Image Search (Manual Call)") r_col1, r_col2 = st.columns([2, 1]) with r_col1: manual_url = st.text_input( "Public image URL (optional, for manual reverse search):", value=st.session_state.get("last_image_url", "") or "", ) with r_col2: if st.button("Run Reverse Search", use_container_width=True): key = get_serpapi_key() if not key: st.error("SerpAPI key not configured.") elif not manual_url.strip(): st.error("Please enter an image URL.") else: with st.spinner("Calling SerpAPI Google Reverse Image API..."): rev, err = serpapi_reverse_image_search( manual_url.strip(), key ) if err: st.error(err) elif rev: matches = rev.get("visual_matches", []) st.success("Reverse image search completed.") if rev.get("best_guess"): st.write(f"Google best guess: {rev['best_guess']}") st.write(f"Total visual matches: {len(matches)}") if matches: st.markdown("**Top sources:**") for m in matches[:5]: st.markdown( f"- [{m.get('title') or m.get('link')}]({m.get('link')}) _(source: {m.get('source')})_" ) st.session_state["reverse_search_results"] = { "best_guess": rev.get("best_guess"), "count": len(matches), "top_links": [ { "title": m.get("title"), "link": m.get("link"), "source": m.get("source"), } for m in matches[:5] ], } st.markdown("---") if st.button("Generate Full PDF (Text + Image + Reverse)", use_container_width=False): pdf_full = generate_pdf( st.session_state.get("text_raw", ""), st.session_state.get("text_res"), st.session_state.get("img_res"), st.session_state.get("reverse_search_results"), st.session_state.get("platform", "Unknown"), ) st.session_state["pdf_full"] = pdf_full if "pdf_full" in st.session_state: st.download_button( "⬇️ Download Full Analysis PDF", data=st.session_state["pdf_full"], file_name="review_validator_full.pdf", mime="application/pdf", ) # ------------------- MAIN ------------------- def main(): inject_custom_css() if "page" not in st.session_state: st.session_state["page"] = "landing" with st.spinner("Loading AI models..."): squad, err = load_ai_squad() if not squad: st.error(err) return if st.session_state["page"] == "landing": landing_page() else: detector_page(squad) if __name__ == "__main__": main()