Spaces:
Running
Running
File size: 3,439 Bytes
36e5180 1102ccb ca12785 1102ccb 36e5180 1102ccb 34531ec 2fe403a 34531ec 1102ccb f0ef3c4 ef92b60 f0ef3c4 1102ccb ef92b60 f0ef3c4 36a716f 1102ccb bb5b784 1102ccb 34531ec ef92b60 f0ef3c4 ef92b60 f0ef3c4 ef92b60 1102ccb f0ef3c4 1102ccb f0ef3c4 1102ccb f0ef3c4 ef92b60 1102ccb f0ef3c4 1102ccb f0ef3c4 1102ccb 398fb47 1102ccb 398fb47 34531ec 1102ccb f0ef3c4 1102ccb f0ef3c4 34531ec f0ef3c4 1102ccb 398fb47 ca12785 ef92b60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
import torch
import math
# -----------------------------
# Device setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# -----------------------------
# GPT-2 for perplexity
# -----------------------------
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
gpt2_model.eval()
def compute_perplexity(sentence):
tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
with torch.no_grad():
outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
loss = outputs.loss.item()
ppl = math.exp(loss)
# Normalize and round
return round(ppl / 100, 2)
# -----------------------------
# AI detection models
# -----------------------------
model_options = {
"GoalZero/ada-2534-beta-0-0-2": "GoalZero/ada-2534"
}
# Initialize globals
model = None
tokenizer = None
current_model_name = None
def load_model(model_name):
try:
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
except Exception as e:
raise Exception(f"Failed to load model {model_name}: {str(e)}")
# Load default model
default_model = "GoalZero/ada-2534-beta-0-0-2"
model, tokenizer = load_model(default_model)
current_model_name = default_model
# -----------------------------
# Classification function
# -----------------------------
def classify_text(text, model_choice):
global model, tokenizer, current_model_name
try:
# Reload model if needed
if model is None or model_choice != current_model_name:
model, tokenizer = load_model(model_choice)
current_model_name = model_choice
# Compute perplexity
ppl_score = compute_perplexity(text)
# Prepend perplexity to text
text_with_ppl = f"{ppl_score} {text}"
# Tokenize and predict
inputs = tokenizer(
text_with_ppl,
return_tensors='pt',
padding=True,
truncation=True,
max_length=128
).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
prob_ai = probabilities[0][1].item()
return {
"AI Probability": round(prob_ai * 100, 10),
"Perplexity": ppl_score,
"Model used": model_choice
}
except Exception as e:
return {"error": str(e), "Model used": model_choice}
# -----------------------------
# Gradio interface
# -----------------------------
iface = gr.Interface(
fn=classify_text,
inputs=[
gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
gr.Dropdown(
choices=list(model_options.keys()),
value=default_model,
label="Select Model Version"
)
],
outputs=gr.JSON(label="Results"),
title="GoalZero Ada AI Detection with Perplexity",
description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
)
if __name__ == "__main__":
iface.launch(share=True)
|