Update README.md
Browse files
README.md
CHANGED
|
@@ -79,30 +79,57 @@ Python
|
|
| 79 |
|
| 80 |
import torch
|
| 81 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
# Load the tokenizer and reward model
|
| 88 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder=subfolder)
|
| 89 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_id, subfolder=subfolder)
|
| 90 |
|
| 91 |
prompt = "What diet should I follow to lose weight healthily?"
|
| 92 |
good_response = "A balanced, nutritious plan based on eating whole foods is best. Limit processed and sugary foods."
|
| 93 |
bad_response = "Just eat less lol."
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
# Get the reward scores (logits)
|
| 100 |
-
with torch.no_grad():
|
| 101 |
-
reward_good = model(**inputs_good).logits[0].item()
|
| 102 |
-
reward_bad = model(**inputs_bad).logits[0].item()
|
| 103 |
|
| 104 |
-
print(f"Score for good response: {reward_good:.2f}")
|
| 105 |
-
print(f"Score for bad response: {reward_bad:.2f}")
|
| 106 |
|
| 107 |
# The model should give a higher score to the better response.
|
| 108 |
# Expected: Score for good response: 2.15
|
|
|
|
| 79 |
|
| 80 |
import torch
|
| 81 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 82 |
+
from peft import PeftModel
|
| 83 |
+
from huggingface_hub import snapshot_download # Import the downloader tool
|
| 84 |
|
| 85 |
+
# --- CONFIGURATION ---
|
| 86 |
+
BASE_MODEL_ID = "openai-community/gpt2"
|
| 87 |
+
HF_MODEL_ID = "nabeelshan/rlhf-gpt2-pipeline"
|
| 88 |
+
SUBFOLDER = "reward_model_final"
|
| 89 |
+
|
| 90 |
+
print(f"Downloading model files from '{HF_MODEL_ID}'...")
|
| 91 |
+
local_model_path = snapshot_download(
|
| 92 |
+
repo_id=HF_MODEL_ID,
|
| 93 |
+
allow_patterns=f"{SUBFOLDER}/*"
|
| 94 |
+
)
|
| 95 |
+
local_adapter_path = f"{local_model_path}/{SUBFOLDER}"
|
| 96 |
+
print(f" Successfully downloaded to: {local_adapter_path}")
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
print("Loading model from local path...")
|
| 100 |
+
tokenizer = AutoTokenizer.from_pretrained(local_adapter_path)
|
| 101 |
+
if tokenizer.pad_token is None:
|
| 102 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 103 |
+
|
| 104 |
+
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 105 |
+
BASE_MODEL_ID,
|
| 106 |
+
num_labels=1,
|
| 107 |
+
pad_token_id=tokenizer.pad_token_id
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
model = PeftModel.from_pretrained(base_model, local_adapter_path)
|
| 111 |
+
model.eval()
|
| 112 |
+
print(" Model loaded successfully!")
|
| 113 |
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
prompt = "What diet should I follow to lose weight healthily?"
|
| 116 |
good_response = "A balanced, nutritious plan based on eating whole foods is best. Limit processed and sugary foods."
|
| 117 |
bad_response = "Just eat less lol."
|
| 118 |
|
| 119 |
+
def get_reward_score(prompt_text: str, response_text: str) -> float:
|
| 120 |
+
"""Tokenizes and calculates the reward score for a given prompt and response."""
|
| 121 |
+
inputs = tokenizer(prompt_text, response_text, return_tensors="pt", padding=True, truncation=True)
|
| 122 |
+
with torch.no_grad():
|
| 123 |
+
result = model(**inputs)
|
| 124 |
+
return result.logits[0].item()
|
| 125 |
+
|
| 126 |
+
score_good = get_reward_score(prompt, good_response)
|
| 127 |
+
score_bad = get_reward_score(prompt, bad_response)
|
| 128 |
+
|
| 129 |
+
print(f"\nScore for good response: {score_good:.2f}")
|
| 130 |
+
print(f"Score for bad response: {score_bad:.2f}")
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
|
|
|
|
|
|
| 133 |
|
| 134 |
# The model should give a higher score to the better response.
|
| 135 |
# Expected: Score for good response: 2.15
|