Spaces:

GoalZero
/

GoalZero-Ada-Demo

Running

File size: 3,439 Bytes

36e5180
1102ccb
ca12785
1102ccb
 
 
 
 
 
 
 
 
 
 
 
 
 
36e5180
1102ccb
 
 
 
 
 
 
 
 
 
 
 
34531ec
2fe403a
34531ec
 
1102ccb
f0ef3c4
 
ef92b60
f0ef3c4
 
 
1102ccb
 
ef92b60
f0ef3c4
 
 
 
36a716f
1102ccb
 
bb5b784
1102ccb
 
 
34531ec
ef92b60
f0ef3c4
ef92b60
 
f0ef3c4
ef92b60
1102ccb
 
 
 
 
 
 
f0ef3c4
1102ccb
f0ef3c4
 
 
 
1102ccb
 
f0ef3c4
 
 
ef92b60
1102ccb
f0ef3c4
1102ccb
 
f0ef3c4
 
1102ccb
 
398fb47
1102ccb
 
 
398fb47
 
34531ec
1102ccb
f0ef3c4
 
1102ccb
f0ef3c4
 
34531ec
f0ef3c4
1102ccb
 
398fb47
ca12785
 
ef92b60

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
import torch
import math

# -----------------------------
# Device setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# -----------------------------
# GPT-2 for perplexity
# -----------------------------
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
gpt2_model.eval()

def compute_perplexity(sentence):
    tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
        loss = outputs.loss.item()
        ppl = math.exp(loss)
    # Normalize and round
    return round(ppl / 100, 2)

# -----------------------------
# AI detection models
# -----------------------------
model_options = {
    "GoalZero/ada-2534-beta-0-0-2": "GoalZero/ada-2534"
}

# Initialize globals
model = None
tokenizer = None
current_model_name = None

def load_model(model_name):
    try:
        model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        return model, tokenizer
    except Exception as e:
        raise Exception(f"Failed to load model {model_name}: {str(e)}")

# Load default model
default_model = "GoalZero/ada-2534-beta-0-0-2"
model, tokenizer = load_model(default_model)
current_model_name = default_model

# -----------------------------
# Classification function
# -----------------------------
def classify_text(text, model_choice):
    global model, tokenizer, current_model_name
    try:
        # Reload model if needed
        if model is None or model_choice != current_model_name:
            model, tokenizer = load_model(model_choice)
            current_model_name = model_choice

        # Compute perplexity
        ppl_score = compute_perplexity(text)
        # Prepend perplexity to text
        text_with_ppl = f"{ppl_score} {text}"

        # Tokenize and predict
        inputs = tokenizer(
            text_with_ppl,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        ).to(device)

        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
            prob_ai = probabilities[0][1].item()

        return {
            "AI Probability": round(prob_ai * 100, 10),
            "Perplexity": ppl_score,
            "Model used": model_choice
        }
    except Exception as e:
        return {"error": str(e), "Model used": model_choice}

# -----------------------------
# Gradio interface
# -----------------------------
iface = gr.Interface(
    fn=classify_text,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
        gr.Dropdown(
            choices=list(model_options.keys()),
            value=default_model,
            label="Select Model Version"
        )
    ],
    outputs=gr.JSON(label="Results"),
    title="GoalZero Ada AI Detection with Perplexity",
    description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
)

if __name__ == "__main__":
    iface.launch(share=True)