Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,19 +66,19 @@ with open('label_encoder.pkl', 'rb') as file:
|
|
| 66 |
def predict_phishing(url, html):
|
| 67 |
cleaned_url = preprocess_url(url)
|
| 68 |
cleaned_html = preprocess_html(html)
|
| 69 |
-
|
| 70 |
new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
|
| 71 |
new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
|
| 72 |
-
|
| 73 |
new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
|
| 74 |
new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
|
| 75 |
-
|
| 76 |
new_predictions_prob = model.predict([new_url_padded, new_html_padded])
|
| 77 |
new_predictions = (new_predictions_prob > 0.6).astype(int) # Adjust threshold if needed
|
| 78 |
-
|
| 79 |
predicted_category = label_encoder.inverse_transform(new_predictions)[0]
|
| 80 |
predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
|
| 81 |
-
|
| 82 |
return predicted_category.capitalize(), predicted_probability
|
| 83 |
|
| 84 |
# Create Gradio Interface
|
|
@@ -86,14 +86,17 @@ interface = gr.Interface(
|
|
| 86 |
fn=predict_phishing,
|
| 87 |
inputs=[
|
| 88 |
gr.components.Textbox(label="URL"),
|
| 89 |
-
gr.components.Textbox(label="HTML Snippet")
|
| 90 |
],
|
| 91 |
outputs=[
|
| 92 |
gr.components.Textbox(label="Predicted Category"),
|
| 93 |
gr.components.Textbox(label="Predicted Probability")
|
| 94 |
],
|
| 95 |
title="Phishing Detection Model",
|
| 96 |
-
description="Enter a URL and its HTML content to predict if it's spam or legitimate."
|
|
|
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
# Launch the Gradio interface
|
|
|
|
| 66 |
def predict_phishing(url, html):
|
| 67 |
cleaned_url = preprocess_url(url)
|
| 68 |
cleaned_html = preprocess_html(html)
|
| 69 |
+
|
| 70 |
new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
|
| 71 |
new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
|
| 72 |
+
|
| 73 |
new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
|
| 74 |
new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
|
| 75 |
+
|
| 76 |
new_predictions_prob = model.predict([new_url_padded, new_html_padded])
|
| 77 |
new_predictions = (new_predictions_prob > 0.6).astype(int) # Adjust threshold if needed
|
| 78 |
+
|
| 79 |
predicted_category = label_encoder.inverse_transform(new_predictions)[0]
|
| 80 |
predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
|
| 81 |
+
|
| 82 |
return predicted_category.capitalize(), predicted_probability
|
| 83 |
|
| 84 |
# Create Gradio Interface
|
|
|
|
| 86 |
fn=predict_phishing,
|
| 87 |
inputs=[
|
| 88 |
gr.components.Textbox(label="URL"),
|
| 89 |
+
gr.components.Textbox(label="HTML Snippet", lines=10, placeholder="Paste HTML content here")
|
| 90 |
],
|
| 91 |
outputs=[
|
| 92 |
gr.components.Textbox(label="Predicted Category"),
|
| 93 |
gr.components.Textbox(label="Predicted Probability")
|
| 94 |
],
|
| 95 |
title="Phishing Detection Model",
|
| 96 |
+
description="Enter a URL and its HTML content to predict if it's spam or legitimate. It's recommended to provide both for accurate results.",
|
| 97 |
+
theme="huggingface",
|
| 98 |
+
live=True,
|
| 99 |
+
css=".interface-container { border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; }"
|
| 100 |
)
|
| 101 |
|
| 102 |
# Launch the Gradio interface
|