Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,107 +1,110 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
3 |
-
RobertaTokenizer, RobertaForSequenceClassification,
|
4 |
-
AutoTokenizer, AutoModelForSequenceClassification
|
5 |
-
)
|
6 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
model_options = {
|
10 |
-
"GoalZero/aidetection-ada-v0.2": "GoalZero/aidetection-ada-v0.2",
|
11 |
-
"GoalZero/aidetection-ada-v0.1": "GoalZero/aidetection-ada-v0.1",
|
12 |
-
"GoalZero/babbage-mini-v0.1": "GoalZero/babbage-mini-v0.1",
|
13 |
"GoalZero/ada-2534": "GoalZero/ada-2534"
|
14 |
}
|
15 |
|
16 |
-
# Initialize
|
17 |
model = None
|
18 |
tokenizer = None
|
19 |
current_model_name = None
|
20 |
|
21 |
def load_model(model_name):
|
22 |
-
"""Load model and tokenizer, handling both RoBERTa and DeBERTa"""
|
23 |
try:
|
24 |
-
|
25 |
-
|
26 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
27 |
-
else:
|
28 |
-
model = RobertaForSequenceClassification.from_pretrained(model_name)
|
29 |
-
tokenizer = RobertaTokenizer.from_pretrained(model_name)
|
30 |
return model, tokenizer
|
31 |
except Exception as e:
|
32 |
raise Exception(f"Failed to load model {model_name}: {str(e)}")
|
33 |
|
34 |
# Load default model
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
current_model_name = default_model
|
39 |
-
except Exception as e:
|
40 |
-
print(f"Error loading default model: {str(e)}")
|
41 |
|
|
|
|
|
|
|
42 |
def classify_text(text, model_choice):
|
43 |
global model, tokenizer, current_model_name
|
44 |
-
|
45 |
try:
|
46 |
# Reload model if needed
|
47 |
if model is None or model_choice != current_model_name:
|
48 |
model, tokenizer = load_model(model_choice)
|
49 |
current_model_name = model_choice
|
50 |
-
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
inputs = tokenizer(
|
56 |
-
|
57 |
return_tensors='pt',
|
58 |
padding=True,
|
59 |
truncation=True,
|
60 |
max_length=128
|
61 |
-
)
|
62 |
-
|
63 |
-
# Predict
|
64 |
with torch.no_grad():
|
65 |
outputs = model(**inputs)
|
66 |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
67 |
prob_ai = probabilities[0][1].item()
|
68 |
-
|
69 |
-
return {
|
70 |
-
"AI Probability": round(prob_ai * 100, 10),
|
71 |
-
"Model used": model_choice
|
72 |
-
}
|
73 |
-
except Exception as e:
|
74 |
return {
|
75 |
-
"
|
|
|
76 |
"Model used": model_choice
|
77 |
}
|
|
|
|
|
78 |
|
79 |
-
#
|
|
|
|
|
80 |
iface = gr.Interface(
|
81 |
fn=classify_text,
|
82 |
inputs=[
|
83 |
-
gr.Textbox(
|
84 |
-
lines=2,
|
85 |
-
placeholder="Enter text here...",
|
86 |
-
label="Input Text"
|
87 |
-
),
|
88 |
gr.Dropdown(
|
89 |
choices=list(model_options.keys()),
|
90 |
-
value=
|
91 |
label="Select Model Version"
|
92 |
)
|
93 |
],
|
94 |
outputs=gr.JSON(label="Results"),
|
95 |
-
title="GoalZero Ada AI Detection",
|
96 |
-
description="Enter text to get the probability of it being AI-written.
|
97 |
-
examples=[
|
98 |
-
["Waymo is an American autonomous driving technology company that originated as the Google Self-Driving Car Project in 2009. It is now a subsidiary of Alphabet Inc., headquartered in Mountain View, California. The name \"Waymo\" was adopted in December 2016 when the project was rebranded and spun out of Google to focus on developing fully autonomous vehicles aimed at improving transportation safety and convenience", "GoalZero/babbage-mini-v0.1"],
|
99 |
-
["WWII demonstrated the importance of alliances in global conflicts. The Axis and Allied powers were formed as countries sought to protect their interests and expand their influence. This lesson underscores the potential for future global conflicts to involve complex alliances, similar to the Cold War era’s NATO and Warsaw Pact alignments.", "GoalZero/aidetection-ada-v0.2"],
|
100 |
-
["Eustace was a thorough gentleman. There was candor in his quack, and affability in his waddle; and underneath his snowy down beat a pure and sympathetic heart. In short, he was a most exemplary duck.", "GoalZero/aidetection-ada-v0.1"],
|
101 |
-
["This is an example of AI-written text using the DeBERTa model for testing purposes.", "GoalZero/ada-2534"]
|
102 |
-
]
|
103 |
)
|
104 |
|
105 |
-
# Launch the app
|
106 |
if __name__ == "__main__":
|
107 |
iface.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
|
|
|
|
|
|
|
3 |
import torch
|
4 |
+
import math
|
5 |
+
|
6 |
+
# -----------------------------
|
7 |
+
# Device setup
|
8 |
+
# -----------------------------
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
print(f"Using device: {device}")
|
11 |
+
|
12 |
+
# -----------------------------
|
13 |
+
# GPT-2 for perplexity
|
14 |
+
# -----------------------------
|
15 |
+
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
16 |
+
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
|
17 |
+
gpt2_model.eval()
|
18 |
|
19 |
+
def compute_perplexity(sentence):
|
20 |
+
tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
|
21 |
+
with torch.no_grad():
|
22 |
+
outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
|
23 |
+
loss = outputs.loss.item()
|
24 |
+
ppl = math.exp(loss)
|
25 |
+
# Normalize and round
|
26 |
+
return round(ppl / 100, 2)
|
27 |
+
|
28 |
+
# -----------------------------
|
29 |
+
# AI detection models
|
30 |
+
# -----------------------------
|
31 |
model_options = {
|
|
|
|
|
|
|
32 |
"GoalZero/ada-2534": "GoalZero/ada-2534"
|
33 |
}
|
34 |
|
35 |
+
# Initialize globals
|
36 |
model = None
|
37 |
tokenizer = None
|
38 |
current_model_name = None
|
39 |
|
40 |
def load_model(model_name):
|
|
|
41 |
try:
|
42 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
|
43 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
44 |
return model, tokenizer
|
45 |
except Exception as e:
|
46 |
raise Exception(f"Failed to load model {model_name}: {str(e)}")
|
47 |
|
48 |
# Load default model
|
49 |
+
default_model = "GoalZero/ada-2534"
|
50 |
+
model, tokenizer = load_model(default_model)
|
51 |
+
current_model_name = default_model
|
|
|
|
|
|
|
52 |
|
53 |
+
# -----------------------------
|
54 |
+
# Classification function
|
55 |
+
# -----------------------------
|
56 |
def classify_text(text, model_choice):
|
57 |
global model, tokenizer, current_model_name
|
|
|
58 |
try:
|
59 |
# Reload model if needed
|
60 |
if model is None or model_choice != current_model_name:
|
61 |
model, tokenizer = load_model(model_choice)
|
62 |
current_model_name = model_choice
|
63 |
+
|
64 |
+
# Compute perplexity
|
65 |
+
ppl_score = compute_perplexity(text)
|
66 |
+
# Prepend perplexity to text
|
67 |
+
text_with_ppl = f"{ppl_score} {text}"
|
68 |
+
|
69 |
+
# Tokenize and predict
|
70 |
inputs = tokenizer(
|
71 |
+
text_with_ppl,
|
72 |
return_tensors='pt',
|
73 |
padding=True,
|
74 |
truncation=True,
|
75 |
max_length=128
|
76 |
+
).to(device)
|
77 |
+
|
|
|
78 |
with torch.no_grad():
|
79 |
outputs = model(**inputs)
|
80 |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
81 |
prob_ai = probabilities[0][1].item()
|
82 |
+
|
|
|
|
|
|
|
|
|
|
|
83 |
return {
|
84 |
+
"AI Probability": round(prob_ai * 100, 10),
|
85 |
+
"Perplexity": ppl_score,
|
86 |
"Model used": model_choice
|
87 |
}
|
88 |
+
except Exception as e:
|
89 |
+
return {"error": str(e), "Model used": model_choice}
|
90 |
|
91 |
+
# -----------------------------
|
92 |
+
# Gradio interface
|
93 |
+
# -----------------------------
|
94 |
iface = gr.Interface(
|
95 |
fn=classify_text,
|
96 |
inputs=[
|
97 |
+
gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
|
|
|
|
|
|
|
|
|
98 |
gr.Dropdown(
|
99 |
choices=list(model_options.keys()),
|
100 |
+
value=default_model,
|
101 |
label="Select Model Version"
|
102 |
)
|
103 |
],
|
104 |
outputs=gr.JSON(label="Results"),
|
105 |
+
title="GoalZero Ada AI Detection with Perplexity",
|
106 |
+
description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
)
|
108 |
|
|
|
109 |
if __name__ == "__main__":
|
110 |
iface.launch(share=True)
|