|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json_repair |
|
import torch |
|
from datasets import Dataset |
|
from loguru import logger |
|
from torch.nn import functional as F |
|
from tqdm.auto import tqdm |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
Pipeline, |
|
TFAutoModelForCausalLM, |
|
pipeline, |
|
) |
|
from transformers.models.llama.modeling_llama import LlamaForCausalLM |
|
from transformers.pipelines import PIPELINE_REGISTRY |
|
|
|
|
|
def format_part(number: int, text: str, guess: str) -> str: |
|
return f"\t * Part {number}: {text}\n\t * Model Guess: {guess}" |
|
|
|
|
|
system_prompt = """ |
|
You are a quizbowl player. Given the a leadin and your responses to the previous related parts, provide the answer, a brief (1-2 sentences) explanation to the provided question along with your confidence in the guess. |
|
The answer should be a single word or short phrase, and the explanation should be concise and relevant to the question. |
|
The answer should be formatted in the below JSON format: |
|
|
|
{ |
|
"answer": str, |
|
"explanation": str, |
|
"confidence": float (0-1 in the steps of 0.01) |
|
"justification": str (optional justification for the confidence score) |
|
} |
|
The confidence should be a float between 0 and 1, representing your confidence in the answer. |
|
""" |
|
|
|
user_prompt_template = """ |
|
"Leadin: {leadin} |
|
Question: {part}" |
|
What is being asked in the question? Provide a concise answer, a brief explanation, and your confidence in the guess along with justification.""" |
|
|
|
|
|
def prepare_conversation(leadin, part): |
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": system_prompt, |
|
}, |
|
{ |
|
"role": "user", |
|
"content": user_prompt_template.format(leadin=leadin, part=part), |
|
}, |
|
] |
|
return messages |
|
|
|
|
|
def parse_output_text(output_text: str): |
|
try: |
|
start_index = output_text.find("{") |
|
if start_index == -1: |
|
raise ValueError("No JSON object found in the output text.") |
|
output_text = output_text[start_index:] |
|
json_data = json_repair.loads(output_text) |
|
if isinstance(json_data, list): |
|
json_data = json_data[0] |
|
answer = json_data.get("answer", "").strip() |
|
explanation = json_data.get("explanation", "").strip() |
|
confidence = json_data.get("confidence", 0.0) |
|
except Exception as e: |
|
logger.warning( |
|
f"Error parsing JSON: {e.__class__.__name__} - {e}. Got:\n{output_text}" |
|
) |
|
answer, explanation, confidence = "", "", 0.0 |
|
|
|
try: |
|
confidence = float(confidence) |
|
confidence = max(0.0, min(1.0, confidence)) |
|
except ValueError: |
|
logger.warning(f"Invalid confidence value: {confidence}. Defaulting to 0.0.") |
|
confidence = 0.0 |
|
return { |
|
"answer": answer, |
|
"explanation": explanation, |
|
"confidence": confidence, |
|
} |
|
|
|
|
|
def postprocess_response(output_text, scores=None): |
|
model_response = parse_output_text(output_text) |
|
|
|
|
|
if scores is not None and len(scores) > 0: |
|
probs = [F.softmax(score, dim=-1).max().item() for score in scores] |
|
logit_confidence = float(sum(probs) / len(probs)) if probs else 0.0 |
|
model_response["confidence"] = ( |
|
model_response["confidence"] + logit_confidence |
|
) / 2 |
|
|
|
return model_response |
|
|
|
|
|
class QBBonusPipeline(Pipeline): |
|
def __init__(self, model, tokenizer, **kwargs): |
|
super().__init__( |
|
model=model, |
|
tokenizer=tokenizer, |
|
**kwargs, |
|
) |
|
self.tokenizer.padding_side = "left" |
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
|
def _sanitize_parameters(self, **kwargs): |
|
|
|
return {}, {}, {} |
|
|
|
def preprocess(self, inputs): |
|
batch_size = len(inputs["leadin"]) |
|
conversations = [] |
|
for i in range(batch_size): |
|
conversations.append( |
|
prepare_conversation(inputs["leadin"][i], inputs["part"][i]) |
|
) |
|
|
|
model_inputs = self.tokenizer.apply_chat_template( |
|
conversations, |
|
add_generation_prompt=True, |
|
tokenize=True, |
|
return_dict=True, |
|
padding=True, |
|
return_tensors="pt", |
|
) |
|
return model_inputs |
|
|
|
def _forward(self, model_inputs): |
|
with torch.no_grad(): |
|
outputs = self.model.generate( |
|
**model_inputs, |
|
max_new_tokens=256, |
|
return_dict_in_generate=True, |
|
output_scores=True, |
|
) |
|
|
|
|
|
|
|
|
|
input_length = model_inputs["input_ids"].shape[1] |
|
outputs.sequences = outputs.sequences[:, input_length:] |
|
outputs.scores = torch.stack(outputs.scores, dim=1) |
|
return outputs |
|
|
|
def postprocess(self, model_outputs): |
|
output_texts = self.tokenizer.batch_decode( |
|
model_outputs.sequences, skip_special_tokens=True |
|
) |
|
records = [] |
|
|
|
for output_text in output_texts: |
|
record = postprocess_response(output_text) |
|
records.append(record) |
|
return records |
|
|
|
|
|
PIPELINE_REGISTRY.register_pipeline( |
|
"quizbowl-bonus", |
|
pipeline_class=QBBonusPipeline, |
|
pt_model=LlamaForCausalLM, |
|
default={ |
|
"pt": ("meta-llama/Llama-3.2-3B-Instruct", "main"), |
|
}, |
|
type="text", |
|
) |
|
|
|
if __name__ == "__main__": |
|
pipe = pipeline("quizbowl-bonus", device_map="auto", trust_remote_code=True) |
|
|
|
examples = [ |
|
{ |
|
"leadin": "This is a leadin.", |
|
"part": "What is the capital of France?", |
|
}, |
|
{ |
|
"leadin": "This is another leadin.", |
|
"part": "What is the largest planet in our solar system?", |
|
"previous_parts": [ |
|
{"text": "What is the smallest planet?", "guess": "Mercury"}, |
|
{"text": "What is the second smallest planet?", "guess": "Mars"}, |
|
], |
|
}, |
|
{ |
|
"leadin": "This is a leadin with no previous parts.", |
|
"part": "What is the chemical symbol for water?", |
|
"previous_parts": [], |
|
}, |
|
] * 5 |
|
|
|
dataset = Dataset.from_list(examples) |
|
|
|
print("Dataset size:", len(dataset)) |
|
outputs = [] |
|
batch_size = 5 |
|
for batch in tqdm(dataset.batch(batch_size), desc="Processing batches"): |
|
output = pipe(batch, batch_size=batch_size) |
|
outputs.extend(output) |
|
|