Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,35 +1,34 @@
|
|
| 1 |
-
import
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
-
import datetime
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
| 7 |
MODEL_ID_AI = "lvulpecula/ai-detector-ai"
|
| 8 |
MODEL_ID_FAKE = "lvulpecula/ai-detector-fake"
|
| 9 |
-
|
| 10 |
-
# Thresholds
|
| 11 |
-
THRESHOLD_LOGIT_DIFF_AI = 1.5
|
| 12 |
-
THRESHOLD_LOGIT_DIFF_FAKE = 1.5
|
| 13 |
-
UNCERTAIN_MARGIN = 0.5
|
| 14 |
-
|
| 15 |
-
# Load models and tokenizers
|
| 16 |
tokenizer_ai = AutoTokenizer.from_pretrained(MODEL_ID_AI)
|
| 17 |
model_ai = AutoModelForSequenceClassification.from_pretrained(MODEL_ID_AI)
|
| 18 |
-
|
| 19 |
tokenizer_fake = AutoTokenizer.from_pretrained(MODEL_ID_FAKE)
|
| 20 |
model_fake = AutoModelForSequenceClassification.from_pretrained(MODEL_ID_FAKE)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def get_confidence(logits, positive_class=1):
|
| 23 |
probs = torch.softmax(logits, dim=0)
|
| 24 |
return probs[positive_class].item()
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
if not text:
|
| 30 |
-
return {"error": "Empty input text."}
|
| 31 |
if len(text) < 30:
|
| 32 |
-
return {"error": "Text too short to analyze reliably."}
|
| 33 |
|
| 34 |
inputs_ai = tokenizer_ai(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
| 35 |
inputs_fake = tokenizer_fake(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
|
@@ -37,55 +36,34 @@ def predict(text):
|
|
| 37 |
with torch.no_grad():
|
| 38 |
ai_logits = model_ai(**inputs_ai).logits[0]
|
| 39 |
ai_diff = ai_logits[1] - ai_logits[0]
|
| 40 |
-
|
| 41 |
-
if ai_diff > THRESHOLD_LOGIT_DIFF_AI:
|
| 42 |
ai_label = "π€ AI-generated"
|
| 43 |
confidence_ai = get_confidence(ai_logits, 1)
|
| 44 |
-
|
| 45 |
-
elif abs(ai_diff) < UNCERTAIN_MARGIN:
|
| 46 |
ai_label = "β Uncertain"
|
| 47 |
confidence_ai = None
|
| 48 |
-
is_ai = False
|
| 49 |
else:
|
| 50 |
ai_label = "π€ Human-written"
|
| 51 |
confidence_ai = get_confidence(ai_logits, 0)
|
| 52 |
-
is_ai = False
|
| 53 |
|
| 54 |
fake_logits = model_fake(**inputs_fake).logits[0]
|
| 55 |
fake_diff = fake_logits[1] - fake_logits[0]
|
| 56 |
-
|
| 57 |
-
if fake_diff > THRESHOLD_LOGIT_DIFF_FAKE:
|
| 58 |
fake_label = "β οΈ Fake news"
|
| 59 |
confidence_fake = get_confidence(fake_logits, 1)
|
| 60 |
-
|
| 61 |
-
elif abs(fake_diff) < UNCERTAIN_MARGIN:
|
| 62 |
fake_label = "β Uncertain"
|
| 63 |
confidence_fake = None
|
| 64 |
-
is_fake = False
|
| 65 |
else:
|
| 66 |
fake_label = "β
True information"
|
| 67 |
confidence_fake = get_confidence(fake_logits, 0)
|
| 68 |
-
is_fake = False
|
| 69 |
|
| 70 |
-
return {
|
| 71 |
"ai_label": ai_label,
|
| 72 |
"confidence_ai": round(confidence_ai * 100, 2) if confidence_ai is not None else "N/A",
|
| 73 |
-
"ai_logit_margin": round(ai_diff.item(), 3),
|
| 74 |
-
"is_ai_generated": is_ai,
|
| 75 |
"fake_label": fake_label,
|
| 76 |
-
"confidence_fake": round(confidence_fake * 100, 2) if confidence_fake is not None else "N/A"
|
| 77 |
-
|
| 78 |
-
"is_fake": is_fake
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
# Gradio interface
|
| 82 |
-
interface = gr.Interface(
|
| 83 |
-
fn=predict,
|
| 84 |
-
inputs=gr.Textbox(label="Article Text", lines=10, placeholder="Paste article text here..."),
|
| 85 |
-
outputs="json",
|
| 86 |
-
title="AI News Detector",
|
| 87 |
-
description="This tool predicts whether a news article is AI-generated and/or fake.",
|
| 88 |
-
allow_flagging="never"
|
| 89 |
-
)
|
| 90 |
|
| 91 |
-
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
|
|
|
| 4 |
|
| 5 |
+
app = Flask(__name__)
|
| 6 |
+
|
| 7 |
+
# Load models (same as before)
|
| 8 |
MODEL_ID_AI = "lvulpecula/ai-detector-ai"
|
| 9 |
MODEL_ID_FAKE = "lvulpecula/ai-detector-fake"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
tokenizer_ai = AutoTokenizer.from_pretrained(MODEL_ID_AI)
|
| 11 |
model_ai = AutoModelForSequenceClassification.from_pretrained(MODEL_ID_AI)
|
|
|
|
| 12 |
tokenizer_fake = AutoTokenizer.from_pretrained(MODEL_ID_FAKE)
|
| 13 |
model_fake = AutoModelForSequenceClassification.from_pretrained(MODEL_ID_FAKE)
|
| 14 |
|
| 15 |
+
# CORS (so the extension can access it)
|
| 16 |
+
from flask_cors import CORS
|
| 17 |
+
CORS(app)
|
| 18 |
+
|
| 19 |
def get_confidence(logits, positive_class=1):
|
| 20 |
probs = torch.softmax(logits, dim=0)
|
| 21 |
return probs[positive_class].item()
|
| 22 |
|
| 23 |
+
@app.route("/predict", methods=["POST"])
|
| 24 |
+
def predict():
|
| 25 |
+
data = request.get_json()
|
| 26 |
+
text = data.get("text", "").strip()
|
| 27 |
|
| 28 |
if not text:
|
| 29 |
+
return jsonify({"error": "Empty input text."}), 400
|
| 30 |
if len(text) < 30:
|
| 31 |
+
return jsonify({"error": "Text too short to analyze reliably."}), 400
|
| 32 |
|
| 33 |
inputs_ai = tokenizer_ai(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
| 34 |
inputs_fake = tokenizer_fake(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
|
|
|
| 36 |
with torch.no_grad():
|
| 37 |
ai_logits = model_ai(**inputs_ai).logits[0]
|
| 38 |
ai_diff = ai_logits[1] - ai_logits[0]
|
| 39 |
+
if ai_diff > 1.5:
|
|
|
|
| 40 |
ai_label = "π€ AI-generated"
|
| 41 |
confidence_ai = get_confidence(ai_logits, 1)
|
| 42 |
+
elif abs(ai_diff) < 0.5:
|
|
|
|
| 43 |
ai_label = "β Uncertain"
|
| 44 |
confidence_ai = None
|
|
|
|
| 45 |
else:
|
| 46 |
ai_label = "π€ Human-written"
|
| 47 |
confidence_ai = get_confidence(ai_logits, 0)
|
|
|
|
| 48 |
|
| 49 |
fake_logits = model_fake(**inputs_fake).logits[0]
|
| 50 |
fake_diff = fake_logits[1] - fake_logits[0]
|
| 51 |
+
if fake_diff > 1.5:
|
|
|
|
| 52 |
fake_label = "β οΈ Fake news"
|
| 53 |
confidence_fake = get_confidence(fake_logits, 1)
|
| 54 |
+
elif abs(fake_diff) < 0.5:
|
|
|
|
| 55 |
fake_label = "β Uncertain"
|
| 56 |
confidence_fake = None
|
|
|
|
| 57 |
else:
|
| 58 |
fake_label = "β
True information"
|
| 59 |
confidence_fake = get_confidence(fake_logits, 0)
|
|
|
|
| 60 |
|
| 61 |
+
return jsonify({
|
| 62 |
"ai_label": ai_label,
|
| 63 |
"confidence_ai": round(confidence_ai * 100, 2) if confidence_ai is not None else "N/A",
|
|
|
|
|
|
|
| 64 |
"fake_label": fake_label,
|
| 65 |
+
"confidence_fake": round(confidence_fake * 100, 2) if confidence_fake is not None else "N/A"
|
| 66 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
app.run(host="0.0.0.0", port=7860)
|