nabeelshan commited on
Commit
577cf9f
·
verified ·
1 Parent(s): f35cb12

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +42 -15
README.md CHANGED
@@ -79,30 +79,57 @@ Python
79
 
80
  import torch
81
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
82
 
83
- # Define the model ID and the reward model subfolder
84
- model_id = "nabeelshan/rlhf-gpt2-pipeline"
85
- subfolder = "reward_model_final"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Load the tokenizer and reward model
88
- tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder=subfolder)
89
- model = AutoModelForSequenceClassification.from_pretrained(model_id, subfolder=subfolder)
90
 
91
  prompt = "What diet should I follow to lose weight healthily?"
92
  good_response = "A balanced, nutritious plan based on eating whole foods is best. Limit processed and sugary foods."
93
  bad_response = "Just eat less lol."
94
 
95
- # Tokenize the inputs (prompt + response)
96
- inputs_good = tokenizer(prompt, good_response, return_tensors="pt")
97
- inputs_bad = tokenizer(prompt, bad_response, return_tensors="pt")
 
 
 
 
 
 
 
 
 
98
 
99
- # Get the reward scores (logits)
100
- with torch.no_grad():
101
- reward_good = model(**inputs_good).logits[0].item()
102
- reward_bad = model(**inputs_bad).logits[0].item()
103
 
104
- print(f"Score for good response: {reward_good:.2f}")
105
- print(f"Score for bad response: {reward_bad:.2f}")
106
 
107
  # The model should give a higher score to the better response.
108
  # Expected: Score for good response: 2.15
 
79
 
80
  import torch
81
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
82
+ from peft import PeftModel
83
+ from huggingface_hub import snapshot_download # Import the downloader tool
84
 
85
+ # --- CONFIGURATION ---
86
+ BASE_MODEL_ID = "openai-community/gpt2"
87
+ HF_MODEL_ID = "nabeelshan/rlhf-gpt2-pipeline"
88
+ SUBFOLDER = "reward_model_final"
89
+
90
+ print(f"Downloading model files from '{HF_MODEL_ID}'...")
91
+ local_model_path = snapshot_download(
92
+ repo_id=HF_MODEL_ID,
93
+ allow_patterns=f"{SUBFOLDER}/*"
94
+ )
95
+ local_adapter_path = f"{local_model_path}/{SUBFOLDER}"
96
+ print(f" Successfully downloaded to: {local_adapter_path}")
97
+
98
+
99
+ print("Loading model from local path...")
100
+ tokenizer = AutoTokenizer.from_pretrained(local_adapter_path)
101
+ if tokenizer.pad_token is None:
102
+ tokenizer.pad_token = tokenizer.eos_token
103
+
104
+ base_model = AutoModelForSequenceClassification.from_pretrained(
105
+ BASE_MODEL_ID,
106
+ num_labels=1,
107
+ pad_token_id=tokenizer.pad_token_id
108
+ )
109
+
110
+ model = PeftModel.from_pretrained(base_model, local_adapter_path)
111
+ model.eval()
112
+ print(" Model loaded successfully!")
113
 
 
 
 
114
 
115
  prompt = "What diet should I follow to lose weight healthily?"
116
  good_response = "A balanced, nutritious plan based on eating whole foods is best. Limit processed and sugary foods."
117
  bad_response = "Just eat less lol."
118
 
119
+ def get_reward_score(prompt_text: str, response_text: str) -> float:
120
+ """Tokenizes and calculates the reward score for a given prompt and response."""
121
+ inputs = tokenizer(prompt_text, response_text, return_tensors="pt", padding=True, truncation=True)
122
+ with torch.no_grad():
123
+ result = model(**inputs)
124
+ return result.logits[0].item()
125
+
126
+ score_good = get_reward_score(prompt, good_response)
127
+ score_bad = get_reward_score(prompt, bad_response)
128
+
129
+ print(f"\nScore for good response: {score_good:.2f}")
130
+ print(f"Score for bad response: {score_bad:.2f}")
131
 
 
 
 
 
132
 
 
 
133
 
134
  # The model should give a higher score to the better response.
135
  # Expected: Score for good response: 2.15