akshit4857 commited on
Commit
fa0f465
Β·
verified Β·
1 Parent(s): b8a2bdc

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +315 -629
src/streamlit_app.py CHANGED
@@ -5,14 +5,31 @@ Optimized for Hugging Face Spaces deployment
5
 
6
  import os
7
  import io
 
8
  from collections import Counter
9
- from typing import Dict, Optional, List
10
  import streamlit as st
11
- from transformers import pipeline
12
  from PIL import Image
13
  import matplotlib.pyplot as plt
14
  import matplotlib
15
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Set matplotlib backend for server environments
18
  matplotlib.use('Agg')
@@ -21,689 +38,358 @@ matplotlib.use('Agg')
21
  # Configuration
22
  # -------------------------
23
  st.set_page_config(
24
- page_title="Fake Review Detector 🧠",
25
- page_icon="🧠",
26
- layout="centered",
27
  initial_sidebar_state="collapsed"
28
  )
29
 
30
  # Constants
31
- MODEL_NAME = "akshit4857/autotrain-razz4-h7crd"
32
- MAX_TEXT_LENGTH = 5000
33
- MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10MB
 
 
 
 
34
 
35
- # Feature detection keywords
36
- POSITIVE_KEYWORDS = ["amazing", "best", "superb", "excellent", "love", "perfect", "awesome"]
37
- NEGATIVE_KEYWORDS = ["worst", "terrible", "bad", "awful", "hate", "horrible", "useless"]
38
- ALL_KEYWORDS = POSITIVE_KEYWORDS + NEGATIVE_KEYWORDS
39
 
40
  # -------------------------
41
  # Secrets & Environment Management
42
  # -------------------------
43
  def get_secret(key: str, default: str = None) -> Optional[str]:
44
- """Safely retrieve secrets from Streamlit secrets or environment variables"""
45
  try:
46
- # Try Streamlit secrets first
47
  if hasattr(st, 'secrets') and key in st.secrets:
48
  return st.secrets[key]
49
  except Exception:
50
  pass
51
-
52
- # Fall back to environment variables
53
  return os.environ.get(key, default)
54
 
55
- # Retrieve tokens
56
  HF_TOKEN = get_secret("HF_TOKEN")
57
  OPENAI_API_KEY = get_secret("OPENAI_API_KEY")
58
 
59
  # -------------------------
60
- # Model Loading
61
  # -------------------------
62
  @st.cache_resource(show_spinner=False)
63
- def load_classifier(model_name: str = MODEL_NAME, hf_token: Optional[str] = None):
64
  """
65
- Load the text classification model with proper error handling
66
- Uses caching to avoid reloading on every interaction
67
  """
68
- try:
69
- kwargs = {"model": model_name}
70
-
71
- # Use 'token' parameter (updated API, not deprecated 'use_auth_token')
72
- if hf_token:
73
- kwargs["token"] = hf_token
74
-
75
- return pipeline("text-classification", **kwargs)
76
 
 
 
 
77
  except Exception as e:
78
- error_msg = str(e)
79
- st.error(f"❌ **Failed to load model:** {error_msg}")
80
-
81
- # Provide helpful error messages
82
- if "404" in error_msg:
83
- st.info("πŸ’‘ Model not found. Please verify the model name is correct.")
84
- elif "401" in error_msg or "403" in error_msg:
85
- st.info("πŸ’‘ Authentication failed. Set HF_TOKEN in Hugging Face Spaces secrets.")
86
- else:
87
- st.info("πŸ’‘ Check your internet connection and model availability.")
88
-
89
- st.stop()
90
 
91
- # Initialize classifier
92
- with st.spinner("πŸ”„ Loading AI model..."):
93
- classifier = load_classifier(MODEL_NAME, HF_TOKEN)
94
-
95
- # -------------------------
96
- # Feature Extraction Functions
97
- # -------------------------
98
- def extract_text_features(text: str) -> Dict:
99
- """
100
- Extract explainable features from review text
101
- Returns a dictionary of features for analysis
102
- """
103
- text = text.strip()
104
- tokens = text.split()
105
- text_lower = text.lower()
106
-
107
- # Basic structural features
108
- features = {
109
- "length_chars": len(text),
110
- "length_words": len(tokens),
111
- "avg_word_length": sum(len(w) for w in tokens) / len(tokens) if tokens else 0,
112
- "exclamations": text.count("!"),
113
- "questions": text.count("?"),
114
- "caps_words": sum(1 for w in tokens if w.isupper() and len(w) > 1),
115
- "sentences": max(1, text.count(".") + text.count("!") + text.count("?")),
116
- }
117
-
118
- # Keyword analysis
119
- features["keyword_counts"] = {
120
- k: text_lower.count(k) for k in ALL_KEYWORDS
121
- }
122
-
123
- # Sentiment scores
124
- features["pos_score"] = sum(text_lower.count(k) for k in POSITIVE_KEYWORDS)
125
- features["neg_score"] = sum(text_lower.count(k) for k in NEGATIVE_KEYWORDS)
126
- features["sentiment_balance"] = features["pos_score"] - features["neg_score"]
127
-
128
- # Token importance (heuristic: length Γ— frequency)
129
- cleaned_tokens = [
130
- w.strip(".,!?;:\"'").lower()
131
- for w in tokens
132
- if len(w.strip(".,!?;:\"'")) > 2 # Filter very short words
133
- ]
134
-
135
- word_counts = Counter(cleaned_tokens)
136
- importance = {
137
- word: len(word) * count
138
- for word, count in word_counts.items()
139
- }
140
-
141
- # Top tokens by importance
142
- features["top_tokens"] = dict(
143
- sorted(importance.items(), key=lambda x: x[1], reverse=True)[:10]
144
- )
145
-
146
- return features
147
 
148
- # -------------------------
149
- # AI-Powered Explanation (Optional)
150
- # -------------------------
151
- def generate_ai_explanation(
152
- review_text: str,
153
- label: str,
154
- confidence: float,
155
- timeout: int = 15
156
- ) -> Optional[str]:
157
- """
158
- Generate AI-powered explanation using OpenAI API
159
- Returns None if API key is not available
160
- """
161
- if not OPENAI_API_KEY:
162
- return None
163
-
164
- url = "https://api.openai.com/v1/chat/completions"
165
- headers = {
166
- "Authorization": f"Bearer {OPENAI_API_KEY}",
167
- "Content-Type": "application/json"
168
- }
169
-
170
- # Truncate review for API call
171
- review_snippet = review_text[:500] + ("..." if len(review_text) > 500 else "")
172
-
173
- prompt = f"""You are an AI explainability assistant for a fake review detection system.
174
 
175
- **Prediction:** {label}
176
- **Confidence:** {confidence:.1f}%
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- **Review Text:**
179
- {review_snippet}
180
 
181
- Provide a concise, bullet-point explanation covering:
182
- 1. Key linguistic features that influenced this prediction
183
- 2. Common patterns found in {label.lower()} reviews
184
- 3. One specific observation about this review
185
 
186
- Format: 3-4 bullet points, max 120 words total."""
 
 
 
 
 
187
 
188
- payload = {
189
- "model": "gpt-4o-mini",
190
- "messages": [{"role": "user", "content": prompt}],
191
- "max_tokens": 180,
192
- "temperature": 0.3
193
- }
194
-
195
- try:
196
- response = requests.post(
197
- url,
198
- headers=headers,
199
- json=payload,
200
- timeout=timeout
201
- )
202
- response.raise_for_status()
203
-
204
- content = response.json()["choices"][0]["message"]["content"]
205
- return content.strip()
206
-
207
- except requests.exceptions.Timeout:
208
- return "⏱️ AI explanation timed out. Using local analysis instead."
209
- except requests.exceptions.RequestException as e:
210
- return f"⚠️ AI explanation unavailable: {type(e).__name__}"
211
- except Exception as e:
212
- return f"⚠️ Error generating AI explanation: {str(e)}"
213
 
214
  # -------------------------
215
- # Local Rule-Based Explanation
216
  # -------------------------
217
- def generate_local_explanation(features: Dict, label: str) -> List[str]:
218
- """
219
- Generate rule-based explanation from extracted features
220
- Returns a list of explanation bullet points
221
- """
222
- explanations = []
223
-
224
- # Keyword analysis
225
- found_keywords = [k for k, v in features["keyword_counts"].items() if v > 0]
226
- if found_keywords:
227
- keyword_str = ", ".join(found_keywords[:5])
228
- if len(found_keywords) > 5:
229
- keyword_str += f" (+{len(found_keywords)-5} more)"
230
- explanations.append(
231
- f"**Emotional keywords detected:** {keyword_str} β€” "
232
- f"may indicate promotional or exaggerated language"
233
- )
234
-
235
- # Punctuation patterns
236
- if features["exclamations"] >= 3:
237
- explanations.append(
238
- f"**Excessive exclamation marks** ({features['exclamations']}) β€” "
239
- f"common in fake reviews trying to appear enthusiastic"
240
- )
241
-
242
- # Length analysis
243
- words = features["length_words"]
244
- if words < 10:
245
- explanations.append(
246
- f"**Very brief review** ({words} words) β€” "
247
- f"may lack authentic detail or personal experience"
248
- )
249
- elif words > 250:
250
- explanations.append(
251
- f"**Unusually long review** ({words} words) β€” "
252
- f"atypical for casual customers"
253
- )
254
-
255
- # Sentiment analysis
256
- sentiment = features["sentiment_balance"]
257
- if sentiment >= 5:
258
- explanations.append(
259
- f"**Heavily positive sentiment** (+{sentiment}) β€” "
260
- f"may indicate promotional intent"
261
- )
262
- elif sentiment <= -5:
263
- explanations.append(
264
- f"**Heavily negative sentiment** ({sentiment}) β€” "
265
- f"could be competitor sabotage or genuine dissatisfaction"
266
- )
267
-
268
- # ALL CAPS usage
269
- if features["caps_words"] >= 3:
270
- explanations.append(
271
- f"**Multiple ALL-CAPS words** ({features['caps_words']}) β€” "
272
- f"aggressive emphasis uncommon in genuine reviews"
273
- )
274
-
275
- # Average word length
276
- if features["avg_word_length"] > 7:
277
- explanations.append(
278
- f"**Complex vocabulary** (avg {features['avg_word_length']:.1f} chars/word) β€” "
279
- f"may indicate professional/paid writing"
280
- )
281
-
282
- # Fallback if no strong signals
283
- if not explanations:
284
- explanations.append(
285
- "**No strong manipulation signals detected** β€” "
286
- "review appears relatively natural based on heuristic analysis"
287
- )
288
-
289
- return explanations
290
 
291
  # -------------------------
292
- # Visualization Functions
293
  # -------------------------
294
- def create_confidence_chart(confidence: float, label: str) -> plt.Figure:
295
- """Create a horizontal bar chart showing model confidence"""
296
- fig, ax = plt.subplots(figsize=(8, 1.8))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- # Color coding based on prediction and confidence
299
- if "FAKE" in label.upper():
300
- color = '#ff4b4b' if confidence > 70 else '#ff8c42'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  else:
302
- color = '#4CAF50' if confidence > 70 else '#8bc34a'
303
-
304
- ax.barh([0], [confidence], height=0.5, color=color, alpha=0.8)
305
- ax.set_xlim(0, 100)
306
- ax.set_xlabel("Confidence (%)", fontsize=11, fontweight='bold')
307
- ax.set_yticks([])
308
- ax.set_title(f"Model Confidence: {confidence:.1f}%", fontsize=12, fontweight='bold')
309
- ax.grid(axis='x', alpha=0.3, linestyle='--')
310
-
311
- # Add confidence threshold lines
312
- ax.axvline(x=50, color='gray', linestyle='--', alpha=0.5, linewidth=1)
313
- ax.axvline(x=75, color='gray', linestyle='--', alpha=0.3, linewidth=1)
314
-
315
- plt.tight_layout()
316
- return fig
 
 
317
 
318
- def create_keyword_chart(keyword_counts: Dict) -> Optional[plt.Figure]:
319
- """Create bar chart of keyword occurrences"""
320
- nonzero = {k: v for k, v in keyword_counts.items() if v > 0}
321
-
322
- if not nonzero:
323
- return None
324
-
325
- # Sort by count
326
- sorted_items = sorted(nonzero.items(), key=lambda x: x[1], reverse=True)[:10]
327
- keywords, counts = zip(*sorted_items)
328
-
329
- fig, ax = plt.subplots(figsize=(8, 4))
330
-
331
- # Color positive vs negative keywords
332
- colors = [
333
- '#4CAF50' if k in POSITIVE_KEYWORDS else '#ff4b4b'
334
- for k in keywords
335
  ]
336
 
337
- ax.bar(keywords, counts, color=colors, alpha=0.7, edgecolor='black', linewidth=0.5)
338
- ax.set_title("Keyword Frequency Analysis", fontsize=12, fontweight='bold')
339
- ax.set_ylabel("Occurrences", fontsize=10)
340
- ax.set_xlabel("Keywords", fontsize=10)
341
- ax.grid(axis='y', alpha=0.3, linestyle='--')
342
-
343
- plt.xticks(rotation=45, ha='right')
344
- plt.tight_layout()
 
 
 
 
 
345
  return fig
346
 
347
- def create_token_importance_chart(top_tokens: Dict) -> Optional[plt.Figure]:
348
- """Create horizontal bar chart of most important tokens"""
349
- if not top_tokens or len(top_tokens) == 0:
350
- return None
351
-
352
- # Get top 8 tokens
353
- items = list(top_tokens.items())[:8]
354
- if not items:
 
 
355
  return None
356
-
357
- tokens, scores = zip(*items)
358
-
359
- fig, ax = plt.subplots(figsize=(8, 4))
360
-
361
- y_pos = range(len(tokens))
362
- ax.barh(y_pos, scores, color='coral', alpha=0.7, edgecolor='black', linewidth=0.5)
363
- ax.set_yticks(y_pos)
364
- ax.set_yticklabels(tokens)
365
- ax.invert_yaxis()
366
- ax.set_title("Top Tokens by Importance", fontsize=12, fontweight='bold')
367
- ax.set_xlabel("Heuristic Score (length Γ— frequency)", fontsize=10)
368
- ax.grid(axis='x', alpha=0.3, linestyle='--')
369
-
370
- plt.tight_layout()
371
- return fig
372
 
373
- def create_feature_summary_chart(features: Dict) -> plt.Figure:
374
- """Create a summary dashboard of key metrics"""
375
- fig, axes = plt.subplots(2, 2, figsize=(10, 6))
376
- fig.suptitle("Review Feature Summary", fontsize=14, fontweight='bold')
377
-
378
- # 1. Length metrics
379
- ax1 = axes[0, 0]
380
- metrics = ['Words', 'Chars', 'Sentences']
381
- values = [features['length_words'], features['length_chars']/10, features['sentences']]
382
- ax1.bar(metrics, values, color=['#3498db', '#2ecc71', '#9b59b6'], alpha=0.7)
383
- ax1.set_title("Text Structure")
384
- ax1.set_ylabel("Count")
385
- ax1.grid(axis='y', alpha=0.3)
386
-
387
- # 2. Punctuation
388
- ax2 = axes[0, 1]
389
- punct = ['Exclamations', 'Questions', 'CAPS Words']
390
- punct_values = [features['exclamations'], features['questions'], features['caps_words']]
391
- ax2.bar(punct, punct_values, color=['#e74c3c', '#f39c12', '#e67e22'], alpha=0.7)
392
- ax2.set_title("Emphasis Indicators")
393
- ax2.set_ylabel("Count")
394
- ax2.grid(axis='y', alpha=0.3)
395
-
396
- # 3. Sentiment balance
397
- ax3 = axes[1, 0]
398
- sentiment = ['Positive', 'Negative']
399
- sent_values = [features['pos_score'], features['neg_score']]
400
- colors = ['#2ecc71', '#e74c3c']
401
- ax3.bar(sentiment, sent_values, color=colors, alpha=0.7)
402
- ax3.set_title("Sentiment Score")
403
- ax3.set_ylabel("Keyword Count")
404
- ax3.grid(axis='y', alpha=0.3)
405
-
406
- # 4. Overall stats
407
- ax4 = axes[1, 1]
408
- ax4.axis('off')
409
- stats_text = f"""
410
- Avg Word Length: {features['avg_word_length']:.1f}
411
- Sentiment Balance: {features['sentiment_balance']:+d}
412
- Total Keywords: {sum(features['keyword_counts'].values())}
413
- Unique Tokens: {len(features['top_tokens'])}
414
- """
415
- ax4.text(0.1, 0.5, stats_text, fontsize=11, verticalalignment='center',
416
- bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
417
- ax4.set_title("Key Statistics")
418
-
419
- plt.tight_layout()
420
- return fig
421
 
422
  # -------------------------
423
- # Main Application UI
424
  # -------------------------
425
  def main():
426
- """Main application function"""
427
-
428
- # Header
429
- st.markdown(
430
- "<h1 style='text-align:center'>🧠 Fake Review Detector</h1>",
431
- unsafe_allow_html=True
432
- )
433
- st.markdown(
434
- "<p style='text-align:center;color:#666;font-size:16px'>"
435
- "AI-powered analysis to identify potentially fake product reviews"
436
- "</p>",
437
- unsafe_allow_html=True
438
- )
439
  st.divider()
440
-
441
- # Sidebar information
442
- with st.sidebar:
443
- st.header("ℹ️ About This Tool")
444
-
445
- st.markdown(f"""
446
- **Model:** `{MODEL_NAME.split('/')[-1]}`
447
-
448
- **Status:**
449
- - Model: {'πŸ”’ Private' if HF_TOKEN else 'πŸ”“ Public'}
450
- - AI Explanations: {'βœ… Enabled' if OPENAI_API_KEY else '❌ Disabled'}
451
- """)
452
-
453
- st.divider()
454
-
455
- st.header("πŸ“Š Analysis Features")
456
- st.markdown("""
457
- - **Text Classification:** Deep learning model
458
- - **Feature Extraction:** 10+ linguistic signals
459
- - **Keyword Analysis:** Sentiment patterns
460
- - **Writing Style:** Structure & emphasis
461
- - **Visual Insights:** Multiple charts
462
- """)
463
-
464
- st.divider()
465
-
466
- st.header("🎯 How It Works")
467
- st.markdown("""
468
- 1. Paste a product review
469
- 2. AI analyzes text patterns
470
- 3. Get prediction + confidence score
471
- 4. Review detailed explanations
472
- 5. See visual feature breakdown
473
- """)
474
-
475
- st.divider()
476
- st.caption("⚠️ Use as a decision-support tool, not sole arbiter")
477
-
478
- # Main input section
479
- col1, col2 = st.columns([2, 1])
480
-
481
- with col1:
482
- platform = st.selectbox(
483
- "Platform",
484
- ["Amazon", "Flipkart", "Zomato", "Yelp", "TripAdvisor", "Generic"],
485
- help="Select the review platform (for context)"
486
- )
487
-
488
- with col2:
489
- st.metric(
490
- "Max Length",
491
- f"{MAX_TEXT_LENGTH}",
492
- delta="characters",
493
- help="Maximum review length"
494
- )
495
-
496
- # Text input
497
- review_text = st.text_area(
498
- "πŸ“ Review Text",
499
- placeholder="Example: This product is amazing! Best purchase ever!!! Highly recommend to everyone!!!",
500
- height=200,
501
- max_chars=MAX_TEXT_LENGTH,
502
- help=f"Paste a review (max {MAX_TEXT_LENGTH} characters)"
503
- )
504
-
505
- # Character counter
506
- if review_text:
507
- char_count = len(review_text)
508
- st.caption(f"Characters: {char_count}/{MAX_TEXT_LENGTH}")
509
-
510
- # Optional image upload - DISABLED due to HF Spaces restrictions
511
- st.markdown("### πŸ–ΌοΈ Product Image")
512
-
513
- # Image upload is disabled due to Hugging Face Spaces CORS/403 restrictions
514
- # This is a known limitation and doesn't affect the core functionality
515
- st.info("πŸ“· **Image upload temporarily disabled** due to Hugging Face Spaces security restrictions. Text analysis is fully functional!")
516
-
517
- with st.expander("ℹ️ Why is image upload disabled?"):
518
- st.markdown("""
519
- Hugging Face Spaces has CORS (Cross-Origin Resource Sharing) restrictions that prevent
520
- client-side file uploads via Streamlit's file_uploader component.
521
-
522
- **Workaround options:**
523
- 1. Run the app locally (no restrictions)
524
- 2. Use Docker deployment
525
- 3. Deploy on Streamlit Cloud instead
526
- 4. Wait for HF Spaces to update their security policies
527
-
528
- **Good news:** The AI model only needs text to detect fake reviews, so this doesn't
529
- affect accuracy!
530
- """)
531
-
532
- uploaded_image = None # Disabled for now
533
-
534
- st.divider()
535
-
536
- # Analyze button
537
- col1, col2, col3 = st.columns([1, 2, 1])
538
- with col2:
539
- analyze_button = st.button(
540
- "πŸ” Analyze Review",
541
- type="primary",
542
- use_container_width=True
543
- )
544
-
545
- # Analysis logic
546
- if analyze_button:
547
- # Input validation
548
- if not review_text or not review_text.strip():
549
- st.warning("⚠️ Please enter a review text first.")
550
- st.stop()
551
-
552
- if len(review_text.strip()) < 10:
553
- st.warning("⚠️ Review too short. Please enter at least 10 characters.")
554
- st.stop()
555
-
556
- # Run classification
557
- with st.spinner("πŸ€– Analyzing review with AI model..."):
558
- try:
559
- result = classifier(review_text)[0]
560
- label = result.get("label", "Unknown")
561
- score = float(result.get("score", 0.0))
562
- confidence = round(score * 100, 2)
563
- except Exception as e:
564
- st.error(f"❌ Classification failed: {str(e)}")
565
- st.info("πŸ’‘ Try refreshing the page or simplifying the review text.")
566
- st.stop()
567
-
568
- # Extract features
569
- features = extract_text_features(review_text)
570
-
571
- # Display results
572
- st.markdown("---")
573
- st.markdown("## πŸ“Š Analysis Results")
574
-
575
- # Result metrics
576
- col1, col2, col3, col4 = st.columns(4)
577
-
578
- with col1:
579
- st.metric("Platform", platform)
580
-
581
- with col2:
582
- st.metric("Prediction", label.upper())
583
-
584
- with col3:
585
- st.metric("Confidence", f"{confidence}%")
586
-
587
- with col4:
588
- reliability = "High" if confidence > 75 else "Medium" if confidence > 50 else "Low"
589
- st.metric("Reliability", reliability)
590
-
591
- # Visual indicator
592
- if "FAKE" in label.upper():
593
- st.error(f"⚠️ **Likely FAKE Review** (Confidence: {confidence}%)")
594
- else:
595
- st.success(f"βœ… **Likely REAL Review** (Confidence: {confidence}%)")
596
-
597
- # Image display (REMOVED - not functional on HF Spaces)
598
- # Image upload is disabled due to platform restrictions
599
- # This section is kept for reference but won't execute
600
- pass
601
-
602
- # Explanation section
603
- st.markdown("---")
604
- st.markdown("## πŸ’‘ Detailed Explanation")
605
-
606
- # Try AI explanation first
607
- ai_explanation = None
608
- if OPENAI_API_KEY:
609
- with st.spinner("Generating AI-powered explanation..."):
610
- ai_explanation = generate_ai_explanation(review_text, label, confidence)
611
-
612
- # Display explanation
613
- if ai_explanation and not ai_explanation.startswith(("⏱️", "⚠️")):
614
- st.markdown("### πŸ€– AI-Powered Analysis")
615
- st.info(ai_explanation)
616
 
617
- with st.expander("πŸ“‹ See Rule-Based Analysis"):
618
- local_explanations = generate_local_explanation(features, label)
619
- for exp in local_explanations:
620
- st.markdown(f"β€’ {exp}")
621
- else:
622
- if ai_explanation:
623
- st.warning(ai_explanation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
- st.markdown("### πŸ“‹ Rule-Based Analysis")
626
- local_explanations = generate_local_explanation(features, label)
627
- for exp in local_explanations:
628
- st.markdown(f"β€’ {exp}")
629
-
630
- # Visualizations
631
- st.markdown("---")
632
- st.markdown("## πŸ“ˆ Visual Analysis")
633
-
634
- # Confidence chart
635
- fig1 = create_confidence_chart(confidence, label)
636
- st.pyplot(fig1)
637
- plt.close(fig1)
638
-
639
- st.markdown("---")
640
-
641
- # Keyword chart
642
- fig2 = create_keyword_chart(features["keyword_counts"])
643
- if fig2:
644
- st.pyplot(fig2)
645
- plt.close(fig2)
646
- else:
647
- st.info("ℹ️ No tracked emotional keywords found in this review")
648
-
649
- st.markdown("---")
650
-
651
- # Token importance
652
- fig3 = create_token_importance_chart(features["top_tokens"])
653
- if fig3:
654
- st.pyplot(fig3)
655
- plt.close(fig3)
656
-
657
- st.markdown("---")
658
-
659
- # Feature summary dashboard
660
- st.markdown("### πŸ“Š Complete Feature Dashboard")
661
- fig4 = create_feature_summary_chart(features)
662
- st.pyplot(fig4)
663
- plt.close(fig4)
664
-
665
- # Raw data
666
- st.markdown("---")
667
- with st.expander("πŸ” View Raw Feature Data (Advanced)"):
668
- st.json(features)
669
-
670
- # Download option
671
- st.markdown("---")
672
- st.markdown("### πŸ’Ύ Export Results")
673
-
674
- export_data = {
675
- "platform": platform,
676
- "review_text": review_text,
677
- "prediction": label,
678
- "confidence": confidence,
679
- "features": features
680
- }
681
-
682
- st.download_button(
683
- label="πŸ“₯ Download Analysis (JSON)",
684
- data=str(export_data),
685
- file_name="review_analysis.json",
686
- mime="application/json"
687
- )
688
-
689
- # Footer
690
- st.markdown("---")
691
- st.markdown(
692
- "<p style='text-align:center;color:#888;font-size:12px'>"
693
- "⚠️ <b>Disclaimer:</b> This tool provides AI-assisted analysis for educational and research purposes. "
694
- "Always apply human judgment and verify findings independently."
695
- "</p>",
696
- unsafe_allow_html=True
697
- )
698
- st.markdown(
699
- "<p style='text-align:center;color:#888;font-size:11px'>"
700
- "Powered by Transformers πŸ€— | Streamlit | Hugging Face Spaces"
701
- "</p>",
702
- unsafe_allow_html=True
703
- )
704
 
705
- # -------------------------
706
- # Run Application
707
- # -------------------------
708
  if __name__ == "__main__":
709
  main()
 
5
 
6
  import os
7
  import io
8
+ import numpy as np
9
  from collections import Counter
10
+ from typing import Dict, Optional, List, Tuple
11
  import streamlit as st
12
+ from transformers import pipeline, logging as hf_logging
13
  from PIL import Image
14
  import matplotlib.pyplot as plt
15
  import matplotlib
16
  import requests
17
+ import urllib.parse
18
+ import math
19
+ import warnings
20
+
21
+ # -------------------------
22
+ # Log Suppression
23
+ # -------------------------
24
+ # 1. Suppress Python Warnings (Deprecation, UserWarning)
25
+ warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
26
+ warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
27
+
28
+ # 2. Suppress Hugging Face Informational Logs (Weights initialization, CPU usage)
29
+ hf_logging.set_verbosity_error()
30
+
31
+ # 3. Suppress TensorFlow/PyTorch logs if backend triggers them
32
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
33
 
34
  # Set matplotlib backend for server environments
35
  matplotlib.use('Agg')
 
38
  # Configuration
39
  # -------------------------
40
  st.set_page_config(
41
+ page_title="Deep Forensic Review Detector πŸ•΅οΈ",
42
+ page_icon="πŸ•΅οΈ",
43
+ layout="wide",
44
  initial_sidebar_state="collapsed"
45
  )
46
 
47
  # Constants
48
+ FAKE_MODEL_NAME = "akshit4857/autotrain-razz4-h7crd"
49
+ SENTIMENT_MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
50
+ EMOTION_MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
51
+ # Primary Image Model (High Precision)
52
+ IMAGE_MODEL_PRIMARY = "dima806/ai_generated_image_detection"
53
+ # Backup Image Model (High Reliability)
54
+ IMAGE_MODEL_BACKUP = "umm-maybe/AI-image-detector"
55
 
56
+ MAX_TEXT_LENGTH = 5000
 
 
 
57
 
58
  # -------------------------
59
  # Secrets & Environment Management
60
  # -------------------------
61
  def get_secret(key: str, default: str = None) -> Optional[str]:
 
62
  try:
 
63
  if hasattr(st, 'secrets') and key in st.secrets:
64
  return st.secrets[key]
65
  except Exception:
66
  pass
 
 
67
  return os.environ.get(key, default)
68
 
 
69
  HF_TOKEN = get_secret("HF_TOKEN")
70
  OPENAI_API_KEY = get_secret("OPENAI_API_KEY")
71
 
72
  # -------------------------
73
+ # Model Loading (Ensemble)
74
  # -------------------------
75
  @st.cache_resource(show_spinner=False)
76
+ def load_models() -> Tuple[Dict, List[str]]:
77
  """
78
+ Load all models for the ensemble with individual error handling.
79
+ Returns: (models_dictionary, list_of_error_messages)
80
  """
81
+ models = {}
82
+ errors = []
 
 
 
 
 
 
83
 
84
+ # 1. Fake Detector (Critical)
85
+ try:
86
+ models['fake'] = pipeline("text-classification", model=FAKE_MODEL_NAME, token=HF_TOKEN)
87
  except Exception as e:
88
+ errors.append(f"Fake Detector: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # 2. Sentiment
91
+ try:
92
+ models['sentiment'] = pipeline("sentiment-analysis", model=SENTIMENT_MODEL_NAME, tokenizer=SENTIMENT_MODEL_NAME, token=HF_TOKEN)
93
+ except Exception as e:
94
+ errors.append(f"Sentiment Model: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # 3. Emotion
97
+ try:
98
+ # top_k=None replaces deprecated return_all_scores=True
99
+ models['emotion'] = pipeline("text-classification", model=EMOTION_MODEL_NAME, top_k=None, token=HF_TOKEN)
100
+ except Exception as e:
101
+ errors.append(f"Emotion Model: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # 4. Image (With Failover Strategy)
104
+ try:
105
+ # Try primary precision model first
106
+ models['image'] = pipeline("image-classification", model=IMAGE_MODEL_PRIMARY, token=HF_TOKEN)
107
+ except Exception as e:
108
+ print(f"Primary image model failed: {e}")
109
+ try:
110
+ # Fallback to backup model if primary fails
111
+ models['image'] = pipeline("image-classification", model=IMAGE_MODEL_BACKUP, token=HF_TOKEN)
112
+ # Note: We cannot use st.toast here inside a cached function
113
+ errors.append(f"Note: Switched to backup image model ({IMAGE_MODEL_BACKUP}) due to primary failure.")
114
+ except Exception as e2:
115
+ errors.append(f"Image Model (Both Primary & Backup failed): {str(e2)}")
116
 
117
+ return models, errors
 
118
 
119
+ # Initialize models
120
+ with st.spinner("πŸ”„ Initializing Forensic AI Ensemble (This may take a minute)..."):
121
+ ensemble, load_errors = load_models()
 
122
 
123
+ # Handle Errors (Outside the cached function)
124
+ if 'fake' not in ensemble:
125
+ st.error("❌ Critical Error: Failed to load the core Fake Detection model.")
126
+ if load_errors:
127
+ st.error(f"Details: {load_errors}")
128
+ st.stop()
129
 
130
+ if load_errors:
131
+ # Display non-critical errors/warnings
132
+ with st.expander("⚠️ System Warnings (Non-Critical)", expanded=False):
133
+ for err in load_errors:
134
+ st.warning(err)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  # -------------------------
137
+ # Advanced Feature Extraction
138
  # -------------------------
139
+ def calculate_complexity_score(text: str) -> float:
140
+ """Calculate linguistic complexity (0-100)"""
141
+ words = text.split()
142
+ if not words: return 0
143
+ avg_len = sum(len(w) for w in words) / len(words)
144
+ ttr = len(set(words)) / len(words)
145
+ score = (avg_len * 5) + (ttr * 50)
146
+ return min(100, max(0, score))
147
+
148
+ def extract_deep_features(text: str, models: dict) -> Dict:
149
+ """Run multi-model analysis"""
150
+ # Safe defaults if auxiliary models failed
151
+ sent_label = "Unknown"
152
+ sent_score = 0.0
153
+ top_emo = {'label': 'Unknown', 'score': 0.0}
154
+ emo_res = []
155
+
156
+ # 1. Fake Detection (Guaranteed to exist due to check above)
157
+ fake_res = models['fake'](text[:512])[0]
158
+ is_fake_prob = fake_res['score'] if fake_res['label'] == 'Fake' else (1 - fake_res['score'])
159
+
160
+ # 2. Sentiment
161
+ if 'sentiment' in models:
162
+ try:
163
+ sent_res = models['sentiment'](text[:512])[0]
164
+ sent_score = sent_res['score']
165
+ sent_label = sent_res['label']
166
+ except Exception:
167
+ pass
168
+
169
+ # 3. Emotion
170
+ if 'emotion' in models:
171
+ try:
172
+ # top_k=None returns a list of lists like [[{'label': 'joy', 'score': 0.9}, ...]]
173
+ # So we access [0] to get the list for the first input text
174
+ emo_res = models['emotion'](text[:512])[0]
175
+ top_emo = max(emo_res, key=lambda x: x['score'])
176
+ except Exception:
177
+ pass
178
+
179
+ # 4. Complexity
180
+ complexity = calculate_complexity_score(text)
181
+
182
+ return {
183
+ "fake_probability": is_fake_prob * 100,
184
+ "sentiment_label": sent_label,
185
+ "sentiment_confidence": sent_score * 100,
186
+ "primary_emotion": top_emo['label'],
187
+ "emotion_confidence": top_emo['score'] * 100,
188
+ "complexity_score": complexity,
189
+ "raw_emotion_scores": emo_res
190
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # -------------------------
193
+ # AI-Powered Dynamic Explanation
194
  # -------------------------
195
+ def generate_forensic_report(text: str, features: Dict) -> str:
196
+ """Generates a dynamic, detailed explanation using OpenAI if available"""
197
+ if not OPENAI_API_KEY:
198
+ return generate_fallback_report(features)
199
+
200
+ prompt = (
201
+ f"Act as a Forensic Data Scientist. Analyze this review.\n\n"
202
+ f"DATA:\n"
203
+ f"- Fake Probability Model: {features['fake_probability']:.1f}%\n"
204
+ f"- Sentiment: {features['sentiment_label']} ({features['sentiment_confidence']:.1f}%)\n"
205
+ f"- Primary Emotion: {features['primary_emotion']}\n"
206
+ f"- Linguistic Complexity: {features['complexity_score']:.1f}/100\n"
207
+ f"- Review Snippet: {text[:600]}...\n\n"
208
+ f"TASK:\n"
209
+ f"Provide a 'Forensic Verdict' explaining WHY it looks real or fake based on the combination of these factors. "
210
+ f"For example, if sentiment is extreme and emotion is purely 'joy' but complexity is low, suggest bot behavior. "
211
+ f"If complexity is high and emotion is nuanced, suggest human.\n\n"
212
+ f"FORMAT:\n"
213
+ f"Return 3 distinct paragraphs with headers: '1. Linguistic Analysis', '2. Emotional Consistency', '3. Final Verdict'."
214
+ )
215
 
216
+ try:
217
+ headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
218
+ payload = {
219
+ "model": "gpt-4o-mini",
220
+ "messages": [{"role": "user", "content": prompt}],
221
+ "temperature": 0.4
222
+ }
223
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=15)
224
+ return response.json()["choices"][0]["message"]["content"]
225
+ except:
226
+ return generate_fallback_report(features)
227
+
228
+ def generate_fallback_report(features: Dict) -> str:
229
+ """Dynamic rule-based report if AI is offline"""
230
+ f_prob = features['fake_probability']
231
+ emo = features['primary_emotion']
232
+ comp = features['complexity_score']
233
+
234
+ report = "### 1. Linguistic Analysis\n"
235
+ if comp < 40:
236
+ report += "The vocabulary is repetitive and simple. This low lexical density often correlates with generated content or bulk-written reviews.\n\n"
237
  else:
238
+ report += "The sentence structure is complex and varied, which is a strong indicator of human authorship.\n\n"
239
+
240
+ report += "### 2. Emotional Consistency\n"
241
+ if f_prob > 70 and emo in ['joy', 'surprise']:
242
+ report += f"The review displays extreme levels of '{emo}'. Fake reviews often exaggerate positive emotions to boost ratings artificially.\n\n"
243
+ elif f_prob > 70 and emo in ['anger', 'disgust']:
244
+ report += f"The review is heavily driven by '{emo}'. Competitor sabotage reviews often utilize aggressive negative emotions.\n\n"
245
+ else:
246
+ report += f"The detected emotion is '{emo}', which appears contextually appropriate for the sentiment expressed.\n\n"
247
+
248
+ report += "### 3. Final Verdict\n"
249
+ if f_prob > 50:
250
+ report += f"Based on the ensemble analysis, there is a {f_prob:.1f}% probability this is inauthentic."
251
+ else:
252
+ report += "Multiple data points suggest this review represents a genuine user experience."
253
+
254
+ return report
255
 
256
+ # -------------------------
257
+ # Visualization: Radar Chart
258
+ # -------------------------
259
+ def create_radar_chart(features: Dict) -> plt.Figure:
260
+ """Creates a multi-aspect radar chart"""
261
+ categories = ['Fake Probability', 'Sentiment Intensity', 'Emotional Intensity', 'Complexity (Inv)']
262
+ inv_complexity = 100 - features['complexity_score']
263
+
264
+ values = [
265
+ features['fake_probability'],
266
+ features['sentiment_confidence'],
267
+ features['emotion_confidence'],
268
+ inv_complexity
 
 
 
 
269
  ]
270
 
271
+ N = len(categories)
272
+ angles = [n / float(N) * 2 * math.pi for n in range(N)]
273
+ values += values[:1]
274
+ angles += angles[:1]
275
+
276
+ fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
277
+ ax.plot(angles, values, linewidth=2, linestyle='solid', color='#FF4B4B')
278
+ ax.fill(angles, values, '#FF4B4B', alpha=0.25)
279
+ ax.set_xticks(angles[:-1])
280
+ ax.set_xticklabels(categories, size=10, weight='bold')
281
+ ax.set_yticks([20, 40, 60, 80, 100])
282
+ ax.set_yticklabels(["20", "40", "60", "80", "100"], color="grey", size=7)
283
+ ax.set_ylim(0, 100)
284
  return fig
285
 
286
+ # -------------------------
287
+ # Image Functions
288
+ # -------------------------
289
+ def get_image_from_url(url: str) -> Optional[Image.Image]:
290
+ try:
291
+ headers = {'User-Agent': 'Mozilla/5.0'}
292
+ response = requests.get(url, headers=headers, timeout=10, stream=True)
293
+ response.raise_for_status()
294
+ return Image.open(io.BytesIO(response.content)).convert("RGB")
295
+ except Exception:
296
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ def get_google_lens_url(image_url: str) -> str:
299
+ return f"https://lens.google.com/uploadbyurl?url={urllib.parse.quote(image_url)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  # -------------------------
302
+ # Main UI
303
  # -------------------------
304
  def main():
305
+ st.markdown("<h1 style='text-align:center'>πŸ•΅οΈ Deep Forensic Review Investigator</h1>", unsafe_allow_html=True)
306
+ st.markdown("<p style='text-align:center;color:#666;'>Multi-Aspect Ensemble Analysis | Text & Image Forensics</p>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
307
  st.divider()
308
+
309
+ tab1, tab2 = st.tabs(["πŸ“ Multi-Aspect Text Forensics", "πŸ–ΌοΈ Image Forensics"])
310
+
311
+ # --- TAB 1: TEXT ---
312
+ with tab1:
313
+ col_in1, col_in2 = st.columns([3, 1])
314
+ with col_in1:
315
+ review_text = st.text_area("Paste Review for Forensic Analysis", height=150)
316
+ with col_in2:
317
+ st.info("ℹ️ This tool combines 3 AI models (Fake Detection, Sentiment, Emotion) to achieve high precision.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ if st.button("πŸ” Run Deep Analysis", type="primary"):
320
+ if not review_text:
321
+ st.warning("Input required.")
322
+ st.stop()
323
+
324
+ with st.spinner("βš™οΈ Running Ensemble Models..."):
325
+ features = extract_deep_features(review_text, ensemble)
326
+ report = generate_forensic_report(review_text, features)
327
+
328
+ st.markdown("---")
329
+ m1, m2, m3, m4 = st.columns(4)
330
+ m1.metric("Fake Probability", f"{features['fake_probability']:.1f}%",
331
+ delta="High Risk" if features['fake_probability'] > 70 else "Low Risk",
332
+ delta_color="inverse")
333
+ m2.metric("Sentiment", features['sentiment_label'], f"{features['sentiment_confidence']:.1f}% conf")
334
+ m3.metric("Primary Emotion", features['primary_emotion'].title(), f"{features['emotion_confidence']:.1f}% intensity")
335
+ m4.metric("Linguistic Complexity", f"{features['complexity_score']:.0f}/100")
336
+
337
+ c1, c2 = st.columns([1, 1])
338
+ with c1:
339
+ st.subheader("πŸ“Š Forensic Radar")
340
+ fig = create_radar_chart(features)
341
+ st.pyplot(fig)
342
+ plt.close(fig)
343
+ with c2:
344
+ st.subheader("πŸ“‹ Forensic Analyst Report")
345
+ st.markdown(f"""<div style="background-color:#f0f2f6;padding:20px;border-radius:10px;border-left:5px solid #ff4b4b;">
346
+ {report}</div>""", unsafe_allow_html=True)
347
+
348
+ with st.expander("See Raw Emotion Breakdown"):
349
+ if features['raw_emotion_scores']:
350
+ emotions = {x['label']: x['score'] for x in features['raw_emotion_scores']}
351
+ st.bar_chart(emotions)
352
+ else:
353
+ st.write("Emotion data unavailable.")
354
+
355
+ # --- TAB 2: IMAGE ---
356
+ with tab2:
357
+ st.markdown("### πŸ–ΌοΈ AI Image Verification")
358
+ img_url = st.text_input("Image URL")
359
+ if st.button("Analyze Image"):
360
+ if not img_url: st.stop()
361
 
362
+ # Check if image model loaded successfully
363
+ if 'image' not in ensemble:
364
+ st.error("The Image Detection model failed to load. Please refresh or check logs.")
365
+ st.stop()
366
+
367
+ with st.spinner("Scanning pixels..."):
368
+ img = get_image_from_url(img_url)
369
+ if img:
370
+ col_img, col_data = st.columns([1, 2])
371
+ with col_img:
372
+ st.image(img, width=300)
373
+
374
+ with col_data:
375
+ # Safe access now guaranteed by check above
376
+ res = ensemble['image'](img)
377
+ top = max(res, key=lambda x: x['score'])
378
+
379
+ is_ai = top['label'].lower() in ['fake', 'artificial', 'ai', 'generated']
380
+ conf = top['score'] * 100
381
+
382
+ if is_ai:
383
+ st.error(f"🚨 **AI GENERATED** ({conf:.1f}%)")
384
+ else:
385
+ st.success(f"βœ… **REAL PHOTOGRAPH** ({conf:.1f}%)")
386
+
387
+ st.progress(top['score'])
388
+
389
+ lens = get_google_lens_url(img_url)
390
+ st.markdown(f"[πŸ”Ž Verify on Google Lens]({lens})")
391
+ else:
392
+ st.error("Failed to download image. Check the URL.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
 
 
 
394
  if __name__ == "__main__":
395
  main()