Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel | |
import torch | |
import math | |
# ----------------------------- | |
# Device setup | |
# ----------------------------- | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {device}") | |
# ----------------------------- | |
# GPT-2 for perplexity | |
# ----------------------------- | |
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device) | |
gpt2_model.eval() | |
def compute_perplexity(sentence): | |
tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device) | |
with torch.no_grad(): | |
outputs = gpt2_model(**tokens, labels=tokens["input_ids"]) | |
loss = outputs.loss.item() | |
ppl = math.exp(loss) | |
# Normalize and round | |
return round(ppl / 100, 2) | |
# ----------------------------- | |
# AI detection models | |
# ----------------------------- | |
model_options = { | |
"GoalZero/ada-2534-beta-0-0-2": "GoalZero/ada-2534" | |
} | |
# Initialize globals | |
model = None | |
tokenizer = None | |
current_model_name = None | |
def load_model(model_name): | |
try: | |
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
return model, tokenizer | |
except Exception as e: | |
raise Exception(f"Failed to load model {model_name}: {str(e)}") | |
# Load default model | |
default_model = "GoalZero/ada-2534-beta-0-0-2" | |
model, tokenizer = load_model(default_model) | |
current_model_name = default_model | |
# ----------------------------- | |
# Classification function | |
# ----------------------------- | |
def classify_text(text, model_choice): | |
global model, tokenizer, current_model_name | |
try: | |
# Reload model if needed | |
if model is None or model_choice != current_model_name: | |
model, tokenizer = load_model(model_choice) | |
current_model_name = model_choice | |
# Compute perplexity | |
ppl_score = compute_perplexity(text) | |
# Prepend perplexity to text | |
text_with_ppl = f"{ppl_score} {text}" | |
# Tokenize and predict | |
inputs = tokenizer( | |
text_with_ppl, | |
return_tensors='pt', | |
padding=True, | |
truncation=True, | |
max_length=128 | |
).to(device) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
prob_ai = probabilities[0][1].item() | |
return { | |
"AI Probability": round(prob_ai * 100, 10), | |
"Perplexity": ppl_score, | |
"Model used": model_choice | |
} | |
except Exception as e: | |
return {"error": str(e), "Model used": model_choice} | |
# ----------------------------- | |
# Gradio interface | |
# ----------------------------- | |
iface = gr.Interface( | |
fn=classify_text, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"), | |
gr.Dropdown( | |
choices=list(model_options.keys()), | |
value=default_model, | |
label="Select Model Version" | |
) | |
], | |
outputs=gr.JSON(label="Results"), | |
title="GoalZero Ada AI Detection with Perplexity", | |
description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended." | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) | |