saneowl's picture
Update app.py
2fe403a verified
raw
history blame
3.44 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
import torch
import math
# -----------------------------
# Device setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# -----------------------------
# GPT-2 for perplexity
# -----------------------------
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
gpt2_model.eval()
def compute_perplexity(sentence):
tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
with torch.no_grad():
outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
loss = outputs.loss.item()
ppl = math.exp(loss)
# Normalize and round
return round(ppl / 100, 2)
# -----------------------------
# AI detection models
# -----------------------------
model_options = {
"GoalZero/ada-2534-beta-0-0-2": "GoalZero/ada-2534"
}
# Initialize globals
model = None
tokenizer = None
current_model_name = None
def load_model(model_name):
try:
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
except Exception as e:
raise Exception(f"Failed to load model {model_name}: {str(e)}")
# Load default model
default_model = "GoalZero/ada-2534-beta-0-0-2"
model, tokenizer = load_model(default_model)
current_model_name = default_model
# -----------------------------
# Classification function
# -----------------------------
def classify_text(text, model_choice):
global model, tokenizer, current_model_name
try:
# Reload model if needed
if model is None or model_choice != current_model_name:
model, tokenizer = load_model(model_choice)
current_model_name = model_choice
# Compute perplexity
ppl_score = compute_perplexity(text)
# Prepend perplexity to text
text_with_ppl = f"{ppl_score} {text}"
# Tokenize and predict
inputs = tokenizer(
text_with_ppl,
return_tensors='pt',
padding=True,
truncation=True,
max_length=128
).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
prob_ai = probabilities[0][1].item()
return {
"AI Probability": round(prob_ai * 100, 10),
"Perplexity": ppl_score,
"Model used": model_choice
}
except Exception as e:
return {"error": str(e), "Model used": model_choice}
# -----------------------------
# Gradio interface
# -----------------------------
iface = gr.Interface(
fn=classify_text,
inputs=[
gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
gr.Dropdown(
choices=list(model_options.keys()),
value=default_model,
label="Select Model Version"
)
],
outputs=gr.JSON(label="Results"),
title="GoalZero Ada AI Detection with Perplexity",
description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
)
if __name__ == "__main__":
iface.launch(share=True)