Spaces:

GoalZero
/

GoalZero-Ada-Demo

Running

App Files Files Community

GoalZero-Ada-Demo / app.py

saneowl

Update app.py

2fe403a verified 2 days ago

raw

history blame

3.44 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
	import torch
	import math

	# -----------------------------
	# Device setup
	# -----------------------------
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# -----------------------------
	# GPT-2 for perplexity
	# -----------------------------
	gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
	gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
	gpt2_model.eval()

	def compute_perplexity(sentence):
	tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
	with torch.no_grad():
	outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
	loss = outputs.loss.item()
	ppl = math.exp(loss)
	# Normalize and round
	return round(ppl / 100, 2)

	# -----------------------------
	# AI detection models
	# -----------------------------
	model_options = {
	"GoalZero/ada-2534-beta-0-0-2": "GoalZero/ada-2534"
	}

	# Initialize globals
	model = None
	tokenizer = None
	current_model_name = None

	def load_model(model_name):
	try:
	model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	return model, tokenizer
	except Exception as e:
	raise Exception(f"Failed to load model {model_name}: {str(e)}")

	# Load default model
	default_model = "GoalZero/ada-2534-beta-0-0-2"
	model, tokenizer = load_model(default_model)
	current_model_name = default_model

	# -----------------------------
	# Classification function
	# -----------------------------
	def classify_text(text, model_choice):
	global model, tokenizer, current_model_name
	try:
	# Reload model if needed
	if model is None or model_choice != current_model_name:
	model, tokenizer = load_model(model_choice)
	current_model_name = model_choice

	# Compute perplexity
	ppl_score = compute_perplexity(text)
	# Prepend perplexity to text
	text_with_ppl = f"{ppl_score} {text}"

	# Tokenize and predict
	inputs = tokenizer(
	text_with_ppl,
	return_tensors='pt',
	padding=True,
	truncation=True,
	max_length=128
	).to(device)

	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	prob_ai = probabilities[0][1].item()

	return {
	"AI Probability": round(prob_ai * 100, 10),
	"Perplexity": ppl_score,
	"Model used": model_choice
	}
	except Exception as e:
	return {"error": str(e), "Model used": model_choice}

	# -----------------------------
	# Gradio interface
	# -----------------------------
	iface = gr.Interface(
	fn=classify_text,
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
	gr.Dropdown(
	choices=list(model_options.keys()),
	value=default_model,
	label="Select Model Version"
	)
	],
	outputs=gr.JSON(label="Results"),
	title="GoalZero Ada AI Detection with Perplexity",
	description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
	)

	if __name__ == "__main__":
	iface.launch(share=True)