Duv

Sleeping

App Files Files Community

akshit4857 commited on Nov 20

Commit

fa0f465

verified ·

1 Parent(s): b8a2bdc

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +315 -629

src/streamlit_app.py CHANGED Viewed

@@ -5,14 +5,31 @@ Optimized for Hugging Face Spaces deployment
 import os
 import io
 from collections import Counter
-from typing import Dict, Optional, List
 import streamlit as st
-from transformers import pipeline
 from PIL import Image
 import matplotlib.pyplot as plt
 import matplotlib
 import requests
 # Set matplotlib backend for server environments
 matplotlib.use('Agg')
@@ -21,689 +38,358 @@ matplotlib.use('Agg')
 # Configuration
 # -------------------------
 st.set_page_config(
-    page_title="Fake Review Detector 🧠",
-    page_icon="🧠",
-    layout="centered",
     initial_sidebar_state="collapsed"
 )
 # Constants
-MODEL_NAME = "akshit4857/autotrain-razz4-h7crd"
-MAX_TEXT_LENGTH = 5000
-MAX_IMAGE_SIZE = 10 * 1024 * 1024  # 10MB
-# Feature detection keywords
-POSITIVE_KEYWORDS = ["amazing", "best", "superb", "excellent", "love", "perfect", "awesome"]
-NEGATIVE_KEYWORDS = ["worst", "terrible", "bad", "awful", "hate", "horrible", "useless"]
-ALL_KEYWORDS = POSITIVE_KEYWORDS + NEGATIVE_KEYWORDS
 # -------------------------
 # Secrets & Environment Management
 # -------------------------
 def get_secret(key: str, default: str = None) -> Optional[str]:
-    """Safely retrieve secrets from Streamlit secrets or environment variables"""
     try:
-        # Try Streamlit secrets first
         if hasattr(st, 'secrets') and key in st.secrets:
             return st.secrets[key]
     except Exception:
         pass
-    # Fall back to environment variables
     return os.environ.get(key, default)
-# Retrieve tokens
 HF_TOKEN = get_secret("HF_TOKEN")
 OPENAI_API_KEY = get_secret("OPENAI_API_KEY")
 # -------------------------
-# Model Loading
 # -------------------------
 @st.cache_resource(show_spinner=False)
-def load_classifier(model_name: str = MODEL_NAME, hf_token: Optional[str] = None):
     """
-    Load the text classification model with proper error handling
-    Uses caching to avoid reloading on every interaction
     """
-    try:
-        kwargs = {"model": model_name}
-        # Use 'token' parameter (updated API, not deprecated 'use_auth_token')
-        if hf_token:
-            kwargs["token"] = hf_token
-        return pipeline("text-classification", **kwargs)
     except Exception as e:
-        error_msg = str(e)
-        st.error(f"❌ **Failed to load model:** {error_msg}")
-        # Provide helpful error messages
-        if "404" in error_msg:
-            st.info("💡 Model not found. Please verify the model name is correct.")
-        elif "401" in error_msg or "403" in error_msg:
-            st.info("💡 Authentication failed. Set HF_TOKEN in Hugging Face Spaces secrets.")
-        else:
-            st.info("💡 Check your internet connection and model availability.")
-        st.stop()
-# Initialize classifier
-with st.spinner("🔄 Loading AI model..."):
-    classifier = load_classifier(MODEL_NAME, HF_TOKEN)
-# -------------------------
-# Feature Extraction Functions
-# -------------------------
-def extract_text_features(text: str) -> Dict:
-    """
-    Extract explainable features from review text
-    Returns a dictionary of features for analysis
-    """
-    text = text.strip()
-    tokens = text.split()
-    text_lower = text.lower()
-    # Basic structural features
-    features = {
-        "length_chars": len(text),
-        "length_words": len(tokens),
-        "avg_word_length": sum(len(w) for w in tokens) / len(tokens) if tokens else 0,
-        "exclamations": text.count("!"),
-        "questions": text.count("?"),
-        "caps_words": sum(1 for w in tokens if w.isupper() and len(w) > 1),
-        "sentences": max(1, text.count(".") + text.count("!") + text.count("?")),
-    }
-    # Keyword analysis
-    features["keyword_counts"] = {
-        k: text_lower.count(k) for k in ALL_KEYWORDS
-    }
-    # Sentiment scores
-    features["pos_score"] = sum(text_lower.count(k) for k in POSITIVE_KEYWORDS)
-    features["neg_score"] = sum(text_lower.count(k) for k in NEGATIVE_KEYWORDS)
-    features["sentiment_balance"] = features["pos_score"] - features["neg_score"]
-    # Token importance (heuristic: length × frequency)
-    cleaned_tokens = [
-        w.strip(".,!?;:\"'").lower()
-        for w in tokens
-        if len(w.strip(".,!?;:\"'")) > 2  # Filter very short words
-    ]
-    word_counts = Counter(cleaned_tokens)
-    importance = {
-        word: len(word) * count
-        for word, count in word_counts.items()
-    }
-    # Top tokens by importance
-    features["top_tokens"] = dict(
-        sorted(importance.items(), key=lambda x: x[1], reverse=True)[:10]
-    )
-    return features
-# -------------------------
-# AI-Powered Explanation (Optional)
-# -------------------------
-def generate_ai_explanation(
-    review_text: str,
-    label: str,
-    confidence: float,
-    timeout: int = 15
-) -> Optional[str]:
-    """
-    Generate AI-powered explanation using OpenAI API
-    Returns None if API key is not available
-    """
-    if not OPENAI_API_KEY:
-        return None
-    url = "https://api.openai.com/v1/chat/completions"
-    headers = {
-        "Authorization": f"Bearer {OPENAI_API_KEY}",
-        "Content-Type": "application/json"
-    }
-    # Truncate review for API call
-    review_snippet = review_text[:500] + ("..." if len(review_text) > 500 else "")
-    prompt = f"""You are an AI explainability assistant for a fake review detection system.
-**Prediction:** {label}
-**Confidence:** {confidence:.1f}%
-**Review Text:**
-{review_snippet}
-Provide a concise, bullet-point explanation covering:
-1. Key linguistic features that influenced this prediction
-2. Common patterns found in {label.lower()} reviews
-3. One specific observation about this review
-Format: 3-4 bullet points, max 120 words total."""
-    payload = {
-        "model": "gpt-4o-mini",
-        "messages": [{"role": "user", "content": prompt}],
-        "max_tokens": 180,
-        "temperature": 0.3
-    }
-    try:
-        response = requests.post(
-            url,
-            headers=headers,
-            json=payload,
-            timeout=timeout
-        )
-        response.raise_for_status()
-        content = response.json()["choices"][0]["message"]["content"]
-        return content.strip()
-    except requests.exceptions.Timeout:
-        return "⏱️ AI explanation timed out. Using local analysis instead."
-    except requests.exceptions.RequestException as e:
-        return f"⚠️ AI explanation unavailable: {type(e).__name__}"
-    except Exception as e:
-        return f"⚠️ Error generating AI explanation: {str(e)}"
 # -------------------------
-# Local Rule-Based Explanation
 # -------------------------
-def generate_local_explanation(features: Dict, label: str) -> List[str]:
-    """
-    Generate rule-based explanation from extracted features
-    Returns a list of explanation bullet points
-    """
-    explanations = []
-    # Keyword analysis
-    found_keywords = [k for k, v in features["keyword_counts"].items() if v > 0]
-    if found_keywords:
-        keyword_str = ", ".join(found_keywords[:5])
-        if len(found_keywords) > 5:
-            keyword_str += f" (+{len(found_keywords)-5} more)"
-        explanations.append(
-            f"**Emotional keywords detected:** {keyword_str} — "
-            f"may indicate promotional or exaggerated language"
-        )
-    # Punctuation patterns
-    if features["exclamations"] >= 3:
-        explanations.append(
-            f"**Excessive exclamation marks** ({features['exclamations']}) — "
-            f"common in fake reviews trying to appear enthusiastic"
-        )
-    # Length analysis
-    words = features["length_words"]
-    if words < 10:
-        explanations.append(
-            f"**Very brief review** ({words} words) — "
-            f"may lack authentic detail or personal experience"
-        )
-    elif words > 250:
-        explanations.append(
-            f"**Unusually long review** ({words} words) — "
-            f"atypical for casual customers"
-        )
-    # Sentiment analysis
-    sentiment = features["sentiment_balance"]
-    if sentiment >= 5:
-        explanations.append(
-            f"**Heavily positive sentiment** (+{sentiment}) — "
-            f"may indicate promotional intent"
-        )
-    elif sentiment <= -5:
-        explanations.append(
-            f"**Heavily negative sentiment** ({sentiment}) — "
-            f"could be competitor sabotage or genuine dissatisfaction"
-        )
-    # ALL CAPS usage
-    if features["caps_words"] >= 3:
-        explanations.append(
-            f"**Multiple ALL-CAPS words** ({features['caps_words']}) — "
-            f"aggressive emphasis uncommon in genuine reviews"
-        )
-    # Average word length
-    if features["avg_word_length"] > 7:
-        explanations.append(
-            f"**Complex vocabulary** (avg {features['avg_word_length']:.1f} chars/word) — "
-            f"may indicate professional/paid writing"
-        )
-    # Fallback if no strong signals
-    if not explanations:
-        explanations.append(
-            "**No strong manipulation signals detected** — "
-            "review appears relatively natural based on heuristic analysis"
-        )
-    return explanations
 # -------------------------
-# Visualization Functions
 # -------------------------
-def create_confidence_chart(confidence: float, label: str) -> plt.Figure:
-    """Create a horizontal bar chart showing model confidence"""
-    fig, ax = plt.subplots(figsize=(8, 1.8))
-    # Color coding based on prediction and confidence
-    if "FAKE" in label.upper():
-        color = '#ff4b4b' if confidence > 70 else '#ff8c42'
     else:
-        color = '#4CAF50' if confidence > 70 else '#8bc34a'
-    ax.barh([0], [confidence], height=0.5, color=color, alpha=0.8)
-    ax.set_xlim(0, 100)
-    ax.set_xlabel("Confidence (%)", fontsize=11, fontweight='bold')
-    ax.set_yticks([])
-    ax.set_title(f"Model Confidence: {confidence:.1f}%", fontsize=12, fontweight='bold')
-    ax.grid(axis='x', alpha=0.3, linestyle='--')
-    # Add confidence threshold lines
-    ax.axvline(x=50, color='gray', linestyle='--', alpha=0.5, linewidth=1)
-    ax.axvline(x=75, color='gray', linestyle='--', alpha=0.3, linewidth=1)
-    plt.tight_layout()
-    return fig
-def create_keyword_chart(keyword_counts: Dict) -> Optional[plt.Figure]:
-    """Create bar chart of keyword occurrences"""
-    nonzero = {k: v for k, v in keyword_counts.items() if v > 0}
-    if not nonzero:
-        return None
-    # Sort by count
-    sorted_items = sorted(nonzero.items(), key=lambda x: x[1], reverse=True)[:10]
-    keywords, counts = zip(*sorted_items)
-    fig, ax = plt.subplots(figsize=(8, 4))
-    # Color positive vs negative keywords
-    colors = [
-        '#4CAF50' if k in POSITIVE_KEYWORDS else '#ff4b4b'
-        for k in keywords
     ]
-    ax.bar(keywords, counts, color=colors, alpha=0.7, edgecolor='black', linewidth=0.5)
-    ax.set_title("Keyword Frequency Analysis", fontsize=12, fontweight='bold')
-    ax.set_ylabel("Occurrences", fontsize=10)
-    ax.set_xlabel("Keywords", fontsize=10)
-    ax.grid(axis='y', alpha=0.3, linestyle='--')
-    plt.xticks(rotation=45, ha='right')
-    plt.tight_layout()
     return fig
-def create_token_importance_chart(top_tokens: Dict) -> Optional[plt.Figure]:
-    """Create horizontal bar chart of most important tokens"""
-    if not top_tokens or len(top_tokens) == 0:
-        return None
-    # Get top 8 tokens
-    items = list(top_tokens.items())[:8]
-    if not items:
         return None
-    tokens, scores = zip(*items)
-    fig, ax = plt.subplots(figsize=(8, 4))
-    y_pos = range(len(tokens))
-    ax.barh(y_pos, scores, color='coral', alpha=0.7, edgecolor='black', linewidth=0.5)
-    ax.set_yticks(y_pos)
-    ax.set_yticklabels(tokens)
-    ax.invert_yaxis()
-    ax.set_title("Top Tokens by Importance", fontsize=12, fontweight='bold')
-    ax.set_xlabel("Heuristic Score (length × frequency)", fontsize=10)
-    ax.grid(axis='x', alpha=0.3, linestyle='--')
-    plt.tight_layout()
-    return fig
-def create_feature_summary_chart(features: Dict) -> plt.Figure:
-    """Create a summary dashboard of key metrics"""
-    fig, axes = plt.subplots(2, 2, figsize=(10, 6))
-    fig.suptitle("Review Feature Summary", fontsize=14, fontweight='bold')
-    # 1. Length metrics
-    ax1 = axes[0, 0]
-    metrics = ['Words', 'Chars', 'Sentences']
-    values = [features['length_words'], features['length_chars']/10, features['sentences']]
-    ax1.bar(metrics, values, color=['#3498db', '#2ecc71', '#9b59b6'], alpha=0.7)
-    ax1.set_title("Text Structure")
-    ax1.set_ylabel("Count")
-    ax1.grid(axis='y', alpha=0.3)
-    # 2. Punctuation
-    ax2 = axes[0, 1]
-    punct = ['Exclamations', 'Questions', 'CAPS Words']
-    punct_values = [features['exclamations'], features['questions'], features['caps_words']]
-    ax2.bar(punct, punct_values, color=['#e74c3c', '#f39c12', '#e67e22'], alpha=0.7)
-    ax2.set_title("Emphasis Indicators")
-    ax2.set_ylabel("Count")
-    ax2.grid(axis='y', alpha=0.3)
-    # 3. Sentiment balance
-    ax3 = axes[1, 0]
-    sentiment = ['Positive', 'Negative']
-    sent_values = [features['pos_score'], features['neg_score']]
-    colors = ['#2ecc71', '#e74c3c']
-    ax3.bar(sentiment, sent_values, color=colors, alpha=0.7)
-    ax3.set_title("Sentiment Score")
-    ax3.set_ylabel("Keyword Count")
-    ax3.grid(axis='y', alpha=0.3)
-    # 4. Overall stats
-    ax4 = axes[1, 1]
-    ax4.axis('off')
-    stats_text = f"""
-    Avg Word Length: {features['avg_word_length']:.1f}
-    Sentiment Balance: {features['sentiment_balance']:+d}
-    Total Keywords: {sum(features['keyword_counts'].values())}
-    Unique Tokens: {len(features['top_tokens'])}
-    """
-    ax4.text(0.1, 0.5, stats_text, fontsize=11, verticalalignment='center',
-             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
-    ax4.set_title("Key Statistics")
-    plt.tight_layout()
-    return fig
 # -------------------------
-# Main Application UI
 # -------------------------
 def main():
-    """Main application function"""
-    # Header
-    st.markdown(
-        "<h1 style='text-align:center'>🧠 Fake Review Detector</h1>",
-        unsafe_allow_html=True
-    )
-    st.markdown(
-        "<p style='text-align:center;color:#666;font-size:16px'>"
-        "AI-powered analysis to identify potentially fake product reviews"
-        "</p>",
-        unsafe_allow_html=True
-    )
     st.divider()
-    # Sidebar information
-    with st.sidebar:
-        st.header("ℹ️ About This Tool")
-        st.markdown(f"""
-        **Model:** `{MODEL_NAME.split('/')[-1]}`
-        **Status:**
-        - Model: {'🔒 Private' if HF_TOKEN else '🔓 Public'}
-        - AI Explanations: {'✅ Enabled' if OPENAI_API_KEY else '❌ Disabled'}
-        """)
-        st.divider()
-        st.header("📊 Analysis Features")
-        st.markdown("""
-        - **Text Classification:** Deep learning model
-        - **Feature Extraction:** 10+ linguistic signals
-        - **Keyword Analysis:** Sentiment patterns
-        - **Writing Style:** Structure & emphasis
-        - **Visual Insights:** Multiple charts
-        """)
-        st.divider()
-        st.header("🎯 How It Works")
-        st.markdown("""
-        1. Paste a product review
-        2. AI analyzes text patterns
-        3. Get prediction + confidence score
-        4. Review detailed explanations
-        5. See visual feature breakdown
-        """)
-        st.divider()
-        st.caption("⚠️ Use as a decision-support tool, not sole arbiter")
-    # Main input section
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        platform = st.selectbox(
-            "Platform",
-            ["Amazon", "Flipkart", "Zomato", "Yelp", "TripAdvisor", "Generic"],
-            help="Select the review platform (for context)"
-        )
-    with col2:
-        st.metric(
-            "Max Length",
-            f"{MAX_TEXT_LENGTH}",
-            delta="characters",
-            help="Maximum review length"
-        )
-    # Text input
-    review_text = st.text_area(
-        "📝 Review Text",
-        placeholder="Example: This product is amazing! Best purchase ever!!! Highly recommend to everyone!!!",
-        height=200,
-        max_chars=MAX_TEXT_LENGTH,
-        help=f"Paste a review (max {MAX_TEXT_LENGTH} characters)"
-    )
-    # Character counter
-    if review_text:
-        char_count = len(review_text)
-        st.caption(f"Characters: {char_count}/{MAX_TEXT_LENGTH}")
-    # Optional image upload - DISABLED due to HF Spaces restrictions
-    st.markdown("### 🖼️ Product Image")
-    # Image upload is disabled due to Hugging Face Spaces CORS/403 restrictions
-    # This is a known limitation and doesn't affect the core functionality
-    st.info("📷 **Image upload temporarily disabled** due to Hugging Face Spaces security restrictions. Text analysis is fully functional!")
-    with st.expander("ℹ️ Why is image upload disabled?"):
-        st.markdown("""
-        Hugging Face Spaces has CORS (Cross-Origin Resource Sharing) restrictions that prevent
-        client-side file uploads via Streamlit's file_uploader component.
-        **Workaround options:**
-        1. Run the app locally (no restrictions)
-        2. Use Docker deployment
-        3. Deploy on Streamlit Cloud instead
-        4. Wait for HF Spaces to update their security policies
-        **Good news:** The AI model only needs text to detect fake reviews, so this doesn't
-        affect accuracy!
-        """)
-    uploaded_image = None  # Disabled for now
-    st.divider()
-    # Analyze button
-    col1, col2, col3 = st.columns([1, 2, 1])
-    with col2:
-        analyze_button = st.button(
-            "🔍 Analyze Review",
-            type="primary",
-            use_container_width=True
-        )
-    # Analysis logic
-    if analyze_button:
-        # Input validation
-        if not review_text or not review_text.strip():
-            st.warning("⚠️ Please enter a review text first.")
-            st.stop()
-        if len(review_text.strip()) < 10:
-            st.warning("⚠️ Review too short. Please enter at least 10 characters.")
-            st.stop()
-        # Run classification
-        with st.spinner("🤖 Analyzing review with AI model..."):
-            try:
-                result = classifier(review_text)[0]
-                label = result.get("label", "Unknown")
-                score = float(result.get("score", 0.0))
-                confidence = round(score * 100, 2)
-            except Exception as e:
-                st.error(f"❌ Classification failed: {str(e)}")
-                st.info("💡 Try refreshing the page or simplifying the review text.")
-                st.stop()
-        # Extract features
-        features = extract_text_features(review_text)
-        # Display results
-        st.markdown("---")
-        st.markdown("## 📊 Analysis Results")
-        # Result metrics
-        col1, col2, col3, col4 = st.columns(4)
-        with col1:
-            st.metric("Platform", platform)
-        with col2:
-            st.metric("Prediction", label.upper())
-        with col3:
-            st.metric("Confidence", f"{confidence}%")
-        with col4:
-            reliability = "High" if confidence > 75 else "Medium" if confidence > 50 else "Low"
-            st.metric("Reliability", reliability)
-        # Visual indicator
-        if "FAKE" in label.upper():
-            st.error(f"⚠️ **Likely FAKE Review** (Confidence: {confidence}%)")
-        else:
-            st.success(f"✅ **Likely REAL Review** (Confidence: {confidence}%)")
-        # Image display (REMOVED - not functional on HF Spaces)
-        # Image upload is disabled due to platform restrictions
-        # This section is kept for reference but won't execute
-        pass
-        # Explanation section
-        st.markdown("---")
-        st.markdown("## 💡 Detailed Explanation")
-        # Try AI explanation first
-        ai_explanation = None
-        if OPENAI_API_KEY:
-            with st.spinner("Generating AI-powered explanation..."):
-                ai_explanation = generate_ai_explanation(review_text, label, confidence)
-        # Display explanation
-        if ai_explanation and not ai_explanation.startswith(("⏱️", "⚠️")):
-            st.markdown("### 🤖 AI-Powered Analysis")
-            st.info(ai_explanation)
-            with st.expander("📋 See Rule-Based Analysis"):
-                local_explanations = generate_local_explanation(features, label)
-                for exp in local_explanations:
-                    st.markdown(f"• {exp}")
-        else:
-            if ai_explanation:
-                st.warning(ai_explanation)
-            st.markdown("### 📋 Rule-Based Analysis")
-            local_explanations = generate_local_explanation(features, label)
-            for exp in local_explanations:
-                st.markdown(f"• {exp}")
-        # Visualizations
-        st.markdown("---")
-        st.markdown("## 📈 Visual Analysis")
-        # Confidence chart
-        fig1 = create_confidence_chart(confidence, label)
-        st.pyplot(fig1)
-        plt.close(fig1)
-        st.markdown("---")
-        # Keyword chart
-        fig2 = create_keyword_chart(features["keyword_counts"])
-        if fig2:
-            st.pyplot(fig2)
-            plt.close(fig2)
-        else:
-            st.info("ℹ️ No tracked emotional keywords found in this review")
-        st.markdown("---")
-        # Token importance
-        fig3 = create_token_importance_chart(features["top_tokens"])
-        if fig3:
-            st.pyplot(fig3)
-            plt.close(fig3)
-        st.markdown("---")
-        # Feature summary dashboard
-        st.markdown("### 📊 Complete Feature Dashboard")
-        fig4 = create_feature_summary_chart(features)
-        st.pyplot(fig4)
-        plt.close(fig4)
-        # Raw data
-        st.markdown("---")
-        with st.expander("🔍 View Raw Feature Data (Advanced)"):
-            st.json(features)
-        # Download option
-        st.markdown("---")
-        st.markdown("### 💾 Export Results")
-        export_data = {
-            "platform": platform,
-            "review_text": review_text,
-            "prediction": label,
-            "confidence": confidence,
-            "features": features
-        }
-        st.download_button(
-            label="📥 Download Analysis (JSON)",
-            data=str(export_data),
-            file_name="review_analysis.json",
-            mime="application/json"
-        )
-    # Footer
-    st.markdown("---")
-    st.markdown(
-        "<p style='text-align:center;color:#888;font-size:12px'>"
-        "⚠️ <b>Disclaimer:</b> This tool provides AI-assisted analysis for educational and research purposes. "
-        "Always apply human judgment and verify findings independently."
-        "</p>",
-        unsafe_allow_html=True
-    )
-    st.markdown(
-        "<p style='text-align:center;color:#888;font-size:11px'>"
-        "Powered by Transformers 🤗 | Streamlit | Hugging Face Spaces"
-        "</p>",
-        unsafe_allow_html=True
-    )
-# -------------------------
-# Run Application
-# -------------------------
 if __name__ == "__main__":
     main()

 import os
 import io
+import numpy as np
 from collections import Counter
+from typing import Dict, Optional, List, Tuple
 import streamlit as st
+from transformers import pipeline, logging as hf_logging
 from PIL import Image
 import matplotlib.pyplot as plt
 import matplotlib
 import requests
+import urllib.parse
+import math
+import warnings
+# -------------------------
+# Log Suppression
+# -------------------------
+# 1. Suppress Python Warnings (Deprecation, UserWarning)
+warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
+warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
+# 2. Suppress Hugging Face Informational Logs (Weights initialization, CPU usage)
+hf_logging.set_verbosity_error()
+# 3. Suppress TensorFlow/PyTorch logs if backend triggers them
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 # Set matplotlib backend for server environments
 matplotlib.use('Agg')
 # Configuration
 # -------------------------
 st.set_page_config(
+    page_title="Deep Forensic Review Detector 🕵️",
+    page_icon="🕵️",
+    layout="wide",
     initial_sidebar_state="collapsed"
 )
 # Constants
+FAKE_MODEL_NAME = "akshit4857/autotrain-razz4-h7crd"
+SENTIMENT_MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
+EMOTION_MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
+# Primary Image Model (High Precision)
+IMAGE_MODEL_PRIMARY = "dima806/ai_generated_image_detection"
+# Backup Image Model (High Reliability)
+IMAGE_MODEL_BACKUP = "umm-maybe/AI-image-detector"
+MAX_TEXT_LENGTH = 5000
 # -------------------------
 # Secrets & Environment Management
 # -------------------------
 def get_secret(key: str, default: str = None) -> Optional[str]:
     try:
         if hasattr(st, 'secrets') and key in st.secrets:
             return st.secrets[key]
     except Exception:
         pass
     return os.environ.get(key, default)
 HF_TOKEN = get_secret("HF_TOKEN")
 OPENAI_API_KEY = get_secret("OPENAI_API_KEY")
 # -------------------------
+# Model Loading (Ensemble)
 # -------------------------
 @st.cache_resource(show_spinner=False)
+def load_models() -> Tuple[Dict, List[str]]:
     """
+    Load all models for the ensemble with individual error handling.
+    Returns: (models_dictionary, list_of_error_messages)
     """
+    models = {}
+    errors = []
+    # 1. Fake Detector (Critical)
+    try:
+        models['fake'] = pipeline("text-classification", model=FAKE_MODEL_NAME, token=HF_TOKEN)
     except Exception as e:
+        errors.append(f"Fake Detector: {str(e)}")
+    # 2. Sentiment
+    try:
+        models['sentiment'] = pipeline("sentiment-analysis", model=SENTIMENT_MODEL_NAME, tokenizer=SENTIMENT_MODEL_NAME, token=HF_TOKEN)
+    except Exception as e:
+        errors.append(f"Sentiment Model: {str(e)}")
+    # 3. Emotion
+    try:
+        # top_k=None replaces deprecated return_all_scores=True
+        models['emotion'] = pipeline("text-classification", model=EMOTION_MODEL_NAME, top_k=None, token=HF_TOKEN)
+    except Exception as e:
+        errors.append(f"Emotion Model: {str(e)}")
+    # 4. Image (With Failover Strategy)
+    try:
+        # Try primary precision model first
+        models['image'] = pipeline("image-classification", model=IMAGE_MODEL_PRIMARY, token=HF_TOKEN)
+    except Exception as e:
+        print(f"Primary image model failed: {e}")
+        try:
+            # Fallback to backup model if primary fails
+            models['image'] = pipeline("image-classification", model=IMAGE_MODEL_BACKUP, token=HF_TOKEN)
+            # Note: We cannot use st.toast here inside a cached function
+            errors.append(f"Note: Switched to backup image model ({IMAGE_MODEL_BACKUP}) due to primary failure.")
+        except Exception as e2:
+            errors.append(f"Image Model (Both Primary & Backup failed): {str(e2)}")
+    return models, errors
+# Initialize models
+with st.spinner("🔄 Initializing Forensic AI Ensemble (This may take a minute)..."):
+    ensemble, load_errors = load_models()
+# Handle Errors (Outside the cached function)
+if 'fake' not in ensemble:
+    st.error("❌ Critical Error: Failed to load the core Fake Detection model.")
+    if load_errors:
+        st.error(f"Details: {load_errors}")
+    st.stop()
+if load_errors:
+    # Display non-critical errors/warnings
+    with st.expander("⚠️ System Warnings (Non-Critical)", expanded=False):
+        for err in load_errors:
+            st.warning(err)
 # -------------------------
+# Advanced Feature Extraction
 # -------------------------
+def calculate_complexity_score(text: str) -> float:
+    """Calculate linguistic complexity (0-100)"""
+    words = text.split()
+    if not words: return 0
+    avg_len = sum(len(w) for w in words) / len(words)
+    ttr = len(set(words)) / len(words)
+    score = (avg_len * 5) + (ttr * 50)
+    return min(100, max(0, score))
+def extract_deep_features(text: str, models: dict) -> Dict:
+    """Run multi-model analysis"""
+    # Safe defaults if auxiliary models failed
+    sent_label = "Unknown"
+    sent_score = 0.0
+    top_emo = {'label': 'Unknown', 'score': 0.0}
+    emo_res = []
+    # 1. Fake Detection (Guaranteed to exist due to check above)
+    fake_res = models['fake'](text[:512])[0]
+    is_fake_prob = fake_res['score'] if fake_res['label'] == 'Fake' else (1 - fake_res['score'])
+    # 2. Sentiment
+    if 'sentiment' in models:
+        try:
+            sent_res = models['sentiment'](text[:512])[0]
+            sent_score = sent_res['score']
+            sent_label = sent_res['label']
+        except Exception:
+            pass
+    # 3. Emotion
+    if 'emotion' in models:
+        try:
+            # top_k=None returns a list of lists like [[{'label': 'joy', 'score': 0.9}, ...]]
+            # So we access [0] to get the list for the first input text
+            emo_res = models['emotion'](text[:512])[0]
+            top_emo = max(emo_res, key=lambda x: x['score'])
+        except Exception:
+            pass
+    # 4. Complexity
+    complexity = calculate_complexity_score(text)
+    return {
+        "fake_probability": is_fake_prob * 100,
+        "sentiment_label": sent_label,
+        "sentiment_confidence": sent_score * 100,
+        "primary_emotion": top_emo['label'],
+        "emotion_confidence": top_emo['score'] * 100,
+        "complexity_score": complexity,
+        "raw_emotion_scores": emo_res
+    }
 # -------------------------
+# AI-Powered Dynamic Explanation
 # -------------------------
+def generate_forensic_report(text: str, features: Dict) -> str:
+    """Generates a dynamic, detailed explanation using OpenAI if available"""
+    if not OPENAI_API_KEY:
+        return generate_fallback_report(features)
+    prompt = (
+        f"Act as a Forensic Data Scientist. Analyze this review.\n\n"
+        f"DATA:\n"
+        f"- Fake Probability Model: {features['fake_probability']:.1f}%\n"
+        f"- Sentiment: {features['sentiment_label']} ({features['sentiment_confidence']:.1f}%)\n"
+        f"- Primary Emotion: {features['primary_emotion']}\n"
+        f"- Linguistic Complexity: {features['complexity_score']:.1f}/100\n"
+        f"- Review Snippet: {text[:600]}...\n\n"
+        f"TASK:\n"
+        f"Provide a 'Forensic Verdict' explaining WHY it looks real or fake based on the combination of these factors. "
+        f"For example, if sentiment is extreme and emotion is purely 'joy' but complexity is low, suggest bot behavior. "
+        f"If complexity is high and emotion is nuanced, suggest human.\n\n"
+        f"FORMAT:\n"
+        f"Return 3 distinct paragraphs with headers: '1. Linguistic Analysis', '2. Emotional Consistency', '3. Final Verdict'."
+    )
+    try:
+        headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
+        payload = {
+            "model": "gpt-4o-mini",
+            "messages": [{"role": "user", "content": prompt}],
+            "temperature": 0.4
+        }
+        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=15)
+        return response.json()["choices"][0]["message"]["content"]
+    except:
+        return generate_fallback_report(features)
+def generate_fallback_report(features: Dict) -> str:
+    """Dynamic rule-based report if AI is offline"""
+    f_prob = features['fake_probability']
+    emo = features['primary_emotion']
+    comp = features['complexity_score']
+    report = "### 1. Linguistic Analysis\n"
+    if comp < 40:
+        report += "The vocabulary is repetitive and simple. This low lexical density often correlates with generated content or bulk-written reviews.\n\n"
     else:
+        report += "The sentence structure is complex and varied, which is a strong indicator of human authorship.\n\n"
+    report += "### 2. Emotional Consistency\n"
+    if f_prob > 70 and emo in ['joy', 'surprise']:
+        report += f"The review displays extreme levels of '{emo}'. Fake reviews often exaggerate positive emotions to boost ratings artificially.\n\n"
+    elif f_prob > 70 and emo in ['anger', 'disgust']:
+        report += f"The review is heavily driven by '{emo}'. Competitor sabotage reviews often utilize aggressive negative emotions.\n\n"
+    else:
+        report += f"The detected emotion is '{emo}', which appears contextually appropriate for the sentiment expressed.\n\n"
+    report += "### 3. Final Verdict\n"
+    if f_prob > 50:
+        report += f"Based on the ensemble analysis, there is a {f_prob:.1f}% probability this is inauthentic."
+    else:
+        report += "Multiple data points suggest this review represents a genuine user experience."
+    return report
+# -------------------------
+# Visualization: Radar Chart
+# -------------------------
+def create_radar_chart(features: Dict) -> plt.Figure:
+    """Creates a multi-aspect radar chart"""
+    categories = ['Fake Probability', 'Sentiment Intensity', 'Emotional Intensity', 'Complexity (Inv)']
+    inv_complexity = 100 - features['complexity_score']
+    values = [
+        features['fake_probability'],
+        features['sentiment_confidence'],
+        features['emotion_confidence'],
+        inv_complexity
     ]
+    N = len(categories)
+    angles = [n / float(N) * 2 * math.pi for n in range(N)]
+    values += values[:1]
+    angles += angles[:1]
+    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
+    ax.plot(angles, values, linewidth=2, linestyle='solid', color='#FF4B4B')
+    ax.fill(angles, values, '#FF4B4B', alpha=0.25)
+    ax.set_xticks(angles[:-1])
+    ax.set_xticklabels(categories, size=10, weight='bold')
+    ax.set_yticks([20, 40, 60, 80, 100])
+    ax.set_yticklabels(["20", "40", "60", "80", "100"], color="grey", size=7)
+    ax.set_ylim(0, 100)
     return fig
+# -------------------------
+# Image Functions
+# -------------------------
+def get_image_from_url(url: str) -> Optional[Image.Image]:
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, headers=headers, timeout=10, stream=True)
+        response.raise_for_status()
+        return Image.open(io.BytesIO(response.content)).convert("RGB")
+    except Exception:
         return None
+def get_google_lens_url(image_url: str) -> str:
+    return f"https://lens.google.com/uploadbyurl?url={urllib.parse.quote(image_url)}"
 # -------------------------
+# Main UI
 # -------------------------
 def main():
+    st.markdown("<h1 style='text-align:center'>🕵️ Deep Forensic Review Investigator</h1>", unsafe_allow_html=True)
+    st.markdown("<p style='text-align:center;color:#666;'>Multi-Aspect Ensemble Analysis | Text & Image Forensics</p>", unsafe_allow_html=True)
     st.divider()
+    tab1, tab2 = st.tabs(["📝 Multi-Aspect Text Forensics", "🖼️ Image Forensics"])
+    # --- TAB 1: TEXT ---
+    with tab1:
+        col_in1, col_in2 = st.columns([3, 1])
+        with col_in1:
+            review_text = st.text_area("Paste Review for Forensic Analysis", height=150)
+        with col_in2:
+            st.info("ℹ️ This tool combines 3 AI models (Fake Detection, Sentiment, Emotion) to achieve high precision.")
+        if st.button("🔍 Run Deep Analysis", type="primary"):
+            if not review_text:
+                st.warning("Input required.")
+                st.stop()
+            with st.spinner("⚙️ Running Ensemble Models..."):
+                features = extract_deep_features(review_text, ensemble)
+                report = generate_forensic_report(review_text, features)
+                st.markdown("---")
+                m1, m2, m3, m4 = st.columns(4)
+                m1.metric("Fake Probability", f"{features['fake_probability']:.1f}%",
+                          delta="High Risk" if features['fake_probability'] > 70 else "Low Risk",
+                          delta_color="inverse")
+                m2.metric("Sentiment", features['sentiment_label'], f"{features['sentiment_confidence']:.1f}% conf")
+                m3.metric("Primary Emotion", features['primary_emotion'].title(), f"{features['emotion_confidence']:.1f}% intensity")
+                m4.metric("Linguistic Complexity", f"{features['complexity_score']:.0f}/100")
+                c1, c2 = st.columns([1, 1])
+                with c1:
+                    st.subheader("📊 Forensic Radar")
+                    fig = create_radar_chart(features)
+                    st.pyplot(fig)
+                    plt.close(fig)
+                with c2:
+                    st.subheader("📋 Forensic Analyst Report")
+                    st.markdown(f"""<div style="background-color:#f0f2f6;padding:20px;border-radius:10px;border-left:5px solid #ff4b4b;">
+                    {report}</div>""", unsafe_allow_html=True)
+                with st.expander("See Raw Emotion Breakdown"):
+                    if features['raw_emotion_scores']:
+                        emotions = {x['label']: x['score'] for x in features['raw_emotion_scores']}
+                        st.bar_chart(emotions)
+                    else:
+                        st.write("Emotion data unavailable.")
+    # --- TAB 2: IMAGE ---
+    with tab2:
+        st.markdown("### 🖼️ AI Image Verification")
+        img_url = st.text_input("Image URL")
+        if st.button("Analyze Image"):
+            if not img_url: st.stop()
+            # Check if image model loaded successfully
+            if 'image' not in ensemble:
+                st.error("The Image Detection model failed to load. Please refresh or check logs.")
+                st.stop()
+            with st.spinner("Scanning pixels..."):
+                img = get_image_from_url(img_url)
+                if img:
+                    col_img, col_data = st.columns([1, 2])
+                    with col_img:
+                        st.image(img, width=300)
+                    with col_data:
+                        # Safe access now guaranteed by check above
+                        res = ensemble['image'](img)
+                        top = max(res, key=lambda x: x['score'])
+                        is_ai = top['label'].lower() in ['fake', 'artificial', 'ai', 'generated']
+                        conf = top['score'] * 100
+                        if is_ai:
+                            st.error(f"🚨 **AI GENERATED** ({conf:.1f}%)")
+                        else:
+                            st.success(f"✅ **REAL PHOTOGRAPH** ({conf:.1f}%)")
+                        st.progress(top['score'])
+                        lens = get_google_lens_url(img_url)
+                        st.markdown(f"[🔎 Verify on Google Lens]({lens})")
+                else:
+                    st.error("Failed to download image. Check the URL.")
 if __name__ == "__main__":
     main()