Vedag812 commited on
Commit
4d007d4
Β·
verified Β·
1 Parent(s): e10bc93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -59
app.py CHANGED
@@ -64,13 +64,16 @@ vectorizer_path = "tfidf_vectorizer.joblib"
64
  model = joblib.load(model_path)
65
  vectorizer = joblib.load(vectorizer_path)
66
 
67
- # Session statistics
68
- session_stats = {"total": 0, "spam": 0, "not_spam": 0, "history": []}
69
-
70
  # Spam indicators
71
  SPAM_KEYWORDS = ['win', 'winner', 'congratulations', 'free', 'urgent', 'click', 'verify',
72
  'account', 'suspended', 'prize', 'lottery', 'cash', 'credit', 'loan']
73
 
 
 
 
 
 
 
74
  def analyze_email(message):
75
  """Detailed email analysis"""
76
  analysis = {}
@@ -78,7 +81,10 @@ def analyze_email(message):
78
  # Basic stats
79
  analysis['word_count'] = len(message.split())
80
  analysis['char_count'] = len(message)
81
- analysis['has_urls'] = bool(re.search(r'http\S+|www\S+', message))
 
 
 
82
  analysis['has_email'] = bool(re.search(r'\S+@\S+', message))
83
 
84
  # Suspicious patterns
@@ -102,7 +108,7 @@ def highlight_spam_words(message, keywords):
102
 
103
  def classify_email(message):
104
  if not message.strip():
105
- return "<div style='color:gray;'>Empty message</div>", "", ""
106
 
107
  try:
108
  # Get analysis
@@ -113,19 +119,7 @@ def classify_email(message):
113
  vec = vectorizer.transform([cleaned])
114
  pred = model.predict(vec)[0]
115
 
116
- # Update stats
117
- session_stats['total'] += 1
118
- if pred == 1:
119
- session_stats['spam'] += 1
120
- result_type = "Spam"
121
- else:
122
- session_stats['not_spam'] += 1
123
- result_type = "Not Spam"
124
-
125
- session_stats['history'].append({
126
- 'message': message[:50] + '...' if len(message) > 50 else message,
127
- 'result': result_type
128
- })
129
 
130
  # Result card
131
  if pred == 1:
@@ -150,7 +144,7 @@ def classify_email(message):
150
  <table style='width:100%; border-collapse: collapse;'>
151
  <tr><td style='padding:5px;'><b>Word Count:</b></td><td>{analysis['word_count']}</td></tr>
152
  <tr><td style='padding:5px;'><b>Character Count:</b></td><td>{analysis['char_count']}</td></tr>
153
- <tr><td style='padding:5px;'><b>Contains URLs:</b></td><td>{'⚠️ Yes' if analysis['has_urls'] else 'βœ“ No'}</td></tr>
154
  <tr><td style='padding:5px;'><b>Contains Emails:</b></td><td>{'Yes' if analysis['has_email'] else 'No'}</td></tr>
155
  <tr><td style='padding:5px;'><b>ALL CAPS Words:</b></td><td>{analysis['all_caps_words']}</td></tr>
156
  <tr><td style='padding:5px;'><b>Exclamation Marks:</b></td><td>{analysis['exclamation_marks']}</td></tr>
@@ -158,10 +152,23 @@ def classify_email(message):
158
  </div>
159
  """
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  # Highlighted message with spam keywords
162
  if analysis['spam_keywords']:
163
  keywords_html = f"""
164
- <div style='background-color:#fff3cd; padding:15px; border-radius:8px; margin-top:10px; border-left:4px solid #ff9800;'>
165
  <h3 style='margin-top:0; color:#333;'>⚠️ Suspicious Keywords Found</h3>
166
  <p style='margin:5px 0;'><b>Keywords:</b> {', '.join(analysis['spam_keywords'])}</p>
167
  <div style='background-color:white; padding:10px; border-radius:5px; margin-top:10px; font-size:14px; line-height:1.6;'>
@@ -172,40 +179,15 @@ def classify_email(message):
172
  else:
173
  keywords_html = ""
174
 
175
- return result_html, details_html, keywords_html
176
 
177
  except Exception as e:
178
  print(f"Prediction error: {e}")
179
- return "<div style='color:gray;'>Error during classification</div>", "", ""
180
 
181
  def get_statistics():
182
  """Generate statistics dashboard"""
183
- if session_stats['total'] == 0:
184
- return "<div style='text-align:center; color:gray; padding:20px;'>No emails checked yet</div>"
185
-
186
- spam_pct = (session_stats['spam'] / session_stats['total']) * 100
187
- not_spam_pct = (session_stats['not_spam'] / session_stats['total']) * 100
188
-
189
- stats_html = f"""
190
- <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding:20px; border-radius:10px; color:white;'>
191
- <h2 style='margin-top:0; text-align:center;'>πŸ“ˆ Session Statistics</h2>
192
- <div style='display:grid; grid-template-columns: repeat(3, 1fr); gap:15px; margin-top:15px;'>
193
- <div style='background-color:rgba(255,255,255,0.2); padding:15px; border-radius:8px; text-align:center;'>
194
- <div style='font-size:32px; font-weight:bold;'>{session_stats['total']}</div>
195
- <div style='font-size:14px;'>Total Checked</div>
196
- </div>
197
- <div style='background-color:rgba(255,77,77,0.3); padding:15px; border-radius:8px; text-align:center;'>
198
- <div style='font-size:32px; font-weight:bold;'>{session_stats['spam']}</div>
199
- <div style='font-size:14px;'>Spam ({spam_pct:.1f}%)</div>
200
- </div>
201
- <div style='background-color:rgba(77,255,77,0.3); padding:15px; border-radius:8px; text-align:center;'>
202
- <div style='font-size:32px; font-weight:bold;'>{session_stats['not_spam']}</div>
203
- <div style='font-size:14px;'>Legitimate ({not_spam_pct:.1f}%)</div>
204
- </div>
205
- </div>
206
- </div>
207
- """
208
- return stats_html
209
 
210
  def process_bulk_emails(file):
211
  """Process bulk emails from file"""
@@ -298,24 +280,25 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(), title="Enhanced Email Spam Class
298
  output_label = gr.HTML(label="πŸ“Š Result")
299
 
300
  analysis_output = gr.HTML(label="πŸ“‹ Analysis Details")
 
301
  keywords_output = gr.HTML(label="πŸ”Ž Keyword Highlights")
302
 
303
  gr.Examples(
304
  examples=examples,
305
  inputs=input_text,
306
- outputs=[output_label, analysis_output, keywords_output],
307
  fn=classify_email
308
  )
309
 
310
  submit_btn.click(
311
  fn=classify_email,
312
  inputs=input_text,
313
- outputs=[output_label, analysis_output, keywords_output]
314
  )
315
  input_text.submit(
316
  fn=classify_email,
317
  inputs=input_text,
318
- outputs=[output_label, analysis_output, keywords_output]
319
  )
320
 
321
  # Bulk Processing Tab
@@ -334,14 +317,6 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(), title="Enhanced Email Spam Class
334
  inputs=file_input,
335
  outputs=[bulk_output, download_output]
336
  )
337
-
338
- # Statistics Tab
339
- with gr.Tab("πŸ“ˆ Statistics"):
340
- stats_display = gr.HTML()
341
- refresh_btn = gr.Button("πŸ”„ Refresh Statistics", variant="primary")
342
-
343
- refresh_btn.click(fn=get_statistics, outputs=stats_display)
344
- demo.load(fn=get_statistics, outputs=stats_display)
345
 
346
  if __name__ == "__main__":
347
  demo.launch()
 
64
  model = joblib.load(model_path)
65
  vectorizer = joblib.load(vectorizer_path)
66
 
 
 
 
67
  # Spam indicators
68
  SPAM_KEYWORDS = ['win', 'winner', 'congratulations', 'free', 'urgent', 'click', 'verify',
69
  'account', 'suspended', 'prize', 'lottery', 'cash', 'credit', 'loan']
70
 
71
+ def extract_urls(message):
72
+ """Extract all URLs from the message"""
73
+ url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
74
+ urls = re.findall(url_pattern, message)
75
+ return urls
76
+
77
  def analyze_email(message):
78
  """Detailed email analysis"""
79
  analysis = {}
 
81
  # Basic stats
82
  analysis['word_count'] = len(message.split())
83
  analysis['char_count'] = len(message)
84
+
85
+ # Extract URLs
86
+ analysis['urls'] = extract_urls(message)
87
+ analysis['has_urls'] = len(analysis['urls']) > 0
88
  analysis['has_email'] = bool(re.search(r'\S+@\S+', message))
89
 
90
  # Suspicious patterns
 
108
 
109
  def classify_email(message):
110
  if not message.strip():
111
+ return "<div style='color:gray;'>Empty message</div>", "", "", ""
112
 
113
  try:
114
  # Get analysis
 
119
  vec = vectorizer.transform([cleaned])
120
  pred = model.predict(vec)[0]
121
 
122
+ result_type = "Spam" if pred == 1 else "Not Spam"
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  # Result card
125
  if pred == 1:
 
144
  <table style='width:100%; border-collapse: collapse;'>
145
  <tr><td style='padding:5px;'><b>Word Count:</b></td><td>{analysis['word_count']}</td></tr>
146
  <tr><td style='padding:5px;'><b>Character Count:</b></td><td>{analysis['char_count']}</td></tr>
147
+ <tr><td style='padding:5px;'><b>Contains URLs:</b></td><td>{'⚠️ Yes (' + str(len(analysis['urls'])) + ')' if analysis['has_urls'] else 'βœ“ No'}</td></tr>
148
  <tr><td style='padding:5px;'><b>Contains Emails:</b></td><td>{'Yes' if analysis['has_email'] else 'No'}</td></tr>
149
  <tr><td style='padding:5px;'><b>ALL CAPS Words:</b></td><td>{analysis['all_caps_words']}</td></tr>
150
  <tr><td style='padding:5px;'><b>Exclamation Marks:</b></td><td>{analysis['exclamation_marks']}</td></tr>
 
152
  </div>
153
  """
154
 
155
+ # URLs detected
156
+ if analysis['urls']:
157
+ urls_html = f"""
158
+ <div style='background-color:#fff3cd; padding:15px; border-radius:8px; margin-top:10px; border-left:4px solid #ff9800;'>
159
+ <h3 style='margin-top:0; color:#333;'>πŸ”— URLs Detected</h3>
160
+ <div style='background-color:white; padding:10px; border-radius:5px; font-size:14px;'>
161
+ {'<br>'.join(['<a href="' + url + '" target="_blank" style="color:#d32f2f; word-break:break-all;">' + url + '</a>' for url in analysis['urls']])}
162
+ </div>
163
+ </div>
164
+ """
165
+ else:
166
+ urls_html = ""
167
+
168
  # Highlighted message with spam keywords
169
  if analysis['spam_keywords']:
170
  keywords_html = f"""
171
+ <div style='background-color:#ffebee; padding:15px; border-radius:8px; margin-top:10px; border-left:4px solid #f44336;'>
172
  <h3 style='margin-top:0; color:#333;'>⚠️ Suspicious Keywords Found</h3>
173
  <p style='margin:5px 0;'><b>Keywords:</b> {', '.join(analysis['spam_keywords'])}</p>
174
  <div style='background-color:white; padding:10px; border-radius:5px; margin-top:10px; font-size:14px; line-height:1.6;'>
 
179
  else:
180
  keywords_html = ""
181
 
182
+ return result_html, details_html, urls_html, keywords_html
183
 
184
  except Exception as e:
185
  print(f"Prediction error: {e}")
186
+ return "<div style='color:gray;'>Error during classification</div>", "", "", ""
187
 
188
  def get_statistics():
189
  """Generate statistics dashboard"""
190
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  def process_bulk_emails(file):
193
  """Process bulk emails from file"""
 
280
  output_label = gr.HTML(label="πŸ“Š Result")
281
 
282
  analysis_output = gr.HTML(label="πŸ“‹ Analysis Details")
283
+ urls_output = gr.HTML(label="πŸ”— URLs Found")
284
  keywords_output = gr.HTML(label="πŸ”Ž Keyword Highlights")
285
 
286
  gr.Examples(
287
  examples=examples,
288
  inputs=input_text,
289
+ outputs=[output_label, analysis_output, urls_output, keywords_output],
290
  fn=classify_email
291
  )
292
 
293
  submit_btn.click(
294
  fn=classify_email,
295
  inputs=input_text,
296
+ outputs=[output_label, analysis_output, urls_output, keywords_output]
297
  )
298
  input_text.submit(
299
  fn=classify_email,
300
  inputs=input_text,
301
+ outputs=[output_label, analysis_output, urls_output, keywords_output]
302
  )
303
 
304
  # Bulk Processing Tab
 
317
  inputs=file_input,
318
  outputs=[bulk_output, download_output]
319
  )
 
 
 
 
 
 
 
 
320
 
321
  if __name__ == "__main__":
322
  demo.launch()