saneowl commited on
Commit
1102ccb
·
verified ·
1 Parent(s): ef92b60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -55
app.py CHANGED
@@ -1,107 +1,110 @@
1
  import gradio as gr
2
- from transformers import (
3
- RobertaTokenizer, RobertaForSequenceClassification,
4
- AutoTokenizer, AutoModelForSequenceClassification
5
- )
6
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Define available models including DeBERTa
 
 
 
 
 
 
 
 
 
 
 
9
  model_options = {
10
- "GoalZero/aidetection-ada-v0.2": "GoalZero/aidetection-ada-v0.2",
11
- "GoalZero/aidetection-ada-v0.1": "GoalZero/aidetection-ada-v0.1",
12
- "GoalZero/babbage-mini-v0.1": "GoalZero/babbage-mini-v0.1",
13
  "GoalZero/ada-2534": "GoalZero/ada-2534"
14
  }
15
 
16
- # Initialize global variables
17
  model = None
18
  tokenizer = None
19
  current_model_name = None
20
 
21
  def load_model(model_name):
22
- """Load model and tokenizer, handling both RoBERTa and DeBERTa"""
23
  try:
24
- if "deberta" in model_name.lower() or "ada-2534" in model_name.lower():
25
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
26
- tokenizer = AutoTokenizer.from_pretrained(model_name)
27
- else:
28
- model = RobertaForSequenceClassification.from_pretrained(model_name)
29
- tokenizer = RobertaTokenizer.from_pretrained(model_name)
30
  return model, tokenizer
31
  except Exception as e:
32
  raise Exception(f"Failed to load model {model_name}: {str(e)}")
33
 
34
  # Load default model
35
- try:
36
- default_model = "GoalZero/aidetection-ada-v0.2"
37
- model, tokenizer = load_model(default_model)
38
- current_model_name = default_model
39
- except Exception as e:
40
- print(f"Error loading default model: {str(e)}")
41
 
 
 
 
42
  def classify_text(text, model_choice):
43
  global model, tokenizer, current_model_name
44
-
45
  try:
46
  # Reload model if needed
47
  if model is None or model_choice != current_model_name:
48
  model, tokenizer = load_model(model_choice)
49
  current_model_name = model_choice
50
-
51
- # Clean input
52
- cleaned_text = text.replace('.', '').replace('\n', ' ')
53
-
54
- # Tokenize
 
 
55
  inputs = tokenizer(
56
- cleaned_text,
57
  return_tensors='pt',
58
  padding=True,
59
  truncation=True,
60
  max_length=128
61
- )
62
-
63
- # Predict
64
  with torch.no_grad():
65
  outputs = model(**inputs)
66
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
67
  prob_ai = probabilities[0][1].item()
68
-
69
- return {
70
- "AI Probability": round(prob_ai * 100, 10),
71
- "Model used": model_choice
72
- }
73
- except Exception as e:
74
  return {
75
- "error": f"An error occurred: {str(e)}",
 
76
  "Model used": model_choice
77
  }
 
 
78
 
79
- # Create the Gradio interface
 
 
80
  iface = gr.Interface(
81
  fn=classify_text,
82
  inputs=[
83
- gr.Textbox(
84
- lines=2,
85
- placeholder="Enter text here...",
86
- label="Input Text"
87
- ),
88
  gr.Dropdown(
89
  choices=list(model_options.keys()),
90
- value="GoalZero/aidetection-ada-v0.2",
91
  label="Select Model Version"
92
  )
93
  ],
94
  outputs=gr.JSON(label="Results"),
95
- title="GoalZero Ada AI Detection",
96
- description="Enter text to get the probability of it being AI-written. Select a model version to use.",
97
- examples=[
98
- ["Waymo is an American autonomous driving technology company that originated as the Google Self-Driving Car Project in 2009. It is now a subsidiary of Alphabet Inc., headquartered in Mountain View, California. The name \"Waymo\" was adopted in December 2016 when the project was rebranded and spun out of Google to focus on developing fully autonomous vehicles aimed at improving transportation safety and convenience", "GoalZero/babbage-mini-v0.1"],
99
- ["WWII demonstrated the importance of alliances in global conflicts. The Axis and Allied powers were formed as countries sought to protect their interests and expand their influence. This lesson underscores the potential for future global conflicts to involve complex alliances, similar to the Cold War era’s NATO and Warsaw Pact alignments.", "GoalZero/aidetection-ada-v0.2"],
100
- ["Eustace was a thorough gentleman. There was candor in his quack, and affability in his waddle; and underneath his snowy down beat a pure and sympathetic heart. In short, he was a most exemplary duck.", "GoalZero/aidetection-ada-v0.1"],
101
- ["This is an example of AI-written text using the DeBERTa model for testing purposes.", "GoalZero/ada-2534"]
102
- ]
103
  )
104
 
105
- # Launch the app
106
  if __name__ == "__main__":
107
  iface.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
 
 
 
3
  import torch
4
+ import math
5
+
6
+ # -----------------------------
7
+ # Device setup
8
+ # -----------------------------
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ print(f"Using device: {device}")
11
+
12
+ # -----------------------------
13
+ # GPT-2 for perplexity
14
+ # -----------------------------
15
+ gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
16
+ gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
17
+ gpt2_model.eval()
18
 
19
+ def compute_perplexity(sentence):
20
+ tokens = gpt2_tokenizer(sentence, return_tensors='pt').to(device)
21
+ with torch.no_grad():
22
+ outputs = gpt2_model(**tokens, labels=tokens["input_ids"])
23
+ loss = outputs.loss.item()
24
+ ppl = math.exp(loss)
25
+ # Normalize and round
26
+ return round(ppl / 100, 2)
27
+
28
+ # -----------------------------
29
+ # AI detection models
30
+ # -----------------------------
31
  model_options = {
 
 
 
32
  "GoalZero/ada-2534": "GoalZero/ada-2534"
33
  }
34
 
35
+ # Initialize globals
36
  model = None
37
  tokenizer = None
38
  current_model_name = None
39
 
40
  def load_model(model_name):
 
41
  try:
42
+ model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
43
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
44
  return model, tokenizer
45
  except Exception as e:
46
  raise Exception(f"Failed to load model {model_name}: {str(e)}")
47
 
48
  # Load default model
49
+ default_model = "GoalZero/ada-2534"
50
+ model, tokenizer = load_model(default_model)
51
+ current_model_name = default_model
 
 
 
52
 
53
+ # -----------------------------
54
+ # Classification function
55
+ # -----------------------------
56
  def classify_text(text, model_choice):
57
  global model, tokenizer, current_model_name
 
58
  try:
59
  # Reload model if needed
60
  if model is None or model_choice != current_model_name:
61
  model, tokenizer = load_model(model_choice)
62
  current_model_name = model_choice
63
+
64
+ # Compute perplexity
65
+ ppl_score = compute_perplexity(text)
66
+ # Prepend perplexity to text
67
+ text_with_ppl = f"{ppl_score} {text}"
68
+
69
+ # Tokenize and predict
70
  inputs = tokenizer(
71
+ text_with_ppl,
72
  return_tensors='pt',
73
  padding=True,
74
  truncation=True,
75
  max_length=128
76
+ ).to(device)
77
+
 
78
  with torch.no_grad():
79
  outputs = model(**inputs)
80
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
81
  prob_ai = probabilities[0][1].item()
82
+
 
 
 
 
 
83
  return {
84
+ "AI Probability": round(prob_ai * 100, 10),
85
+ "Perplexity": ppl_score,
86
  "Model used": model_choice
87
  }
88
+ except Exception as e:
89
+ return {"error": str(e), "Model used": model_choice}
90
 
91
+ # -----------------------------
92
+ # Gradio interface
93
+ # -----------------------------
94
  iface = gr.Interface(
95
  fn=classify_text,
96
  inputs=[
97
+ gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
 
 
 
 
98
  gr.Dropdown(
99
  choices=list(model_options.keys()),
100
+ value=default_model,
101
  label="Select Model Version"
102
  )
103
  ],
104
  outputs=gr.JSON(label="Results"),
105
+ title="GoalZero Ada AI Detection with Perplexity",
106
+ description="Enter text to get the probability of it being AI-written. The perplexity score is also computed and prepended."
 
 
 
 
 
 
107
  )
108
 
 
109
  if __name__ == "__main__":
110
  iface.launch(share=True)