pvduy commited on
Commit
9769e0d
·
1 Parent(s): a4cc9cb

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +171 -0
README.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Training Code
2
+ ```python
3
+ from torch.utils.data import dataset
4
+ from datasets import load_dataset, load_from_disk
5
+ from tqdm import tqdm
6
+ from datasets import load_metric
7
+ from transformers import (
8
+ Seq2SeqTrainer,
9
+ Seq2SeqTrainingArguments,
10
+ AutoTokenizer,
11
+ AutoModelForSeq2SeqLM,
12
+ DataCollatorForSeq2Seq
13
+ )
14
+ import evaluate
15
+ import os
16
+ from datasets import load_dataset
17
+ import numpy as np
18
+
19
+ MAX_LENGTH_INPUT = 512+128
20
+ MAX_LENGTH_OUTPUT = 2
21
+
22
+ from datasets import load_dataset
23
+
24
+ class Seq2SeqDataset(dataset.Dataset):
25
+
26
+ def __init__(self, tokenizer, type_data='train'):
27
+
28
+ # Set up the datasets
29
+ data_path = "CarperAI/openai_summarize_comparisons"
30
+ if type_data == 'train':
31
+ dataset = load_dataset("CarperAI/openai_summarize_comparisons", split="train")
32
+ else:
33
+ dataset = load_dataset("CarperAI/openai_summarize_comparisons", split="test").select(range(20000))
34
+ self.prompts = []
35
+ self.outputs = []
36
+ inputs = dataset["prompt"]
37
+ choosen = dataset["chosen"]
38
+ rejected = dataset["rejected"]
39
+ for i, (inp, ch, re) in enumerate(zip(inputs, choosen, rejected)):
40
+ choice_first = np.random.choice([ch, re])
41
+ res = "A" if choice_first == ch else "B"
42
+ choice_second = ch if choice_first == re else re
43
+ prompt = f"""POST: {inp}\n\nRESPONSE A: {choice_first}\n\nRESPONSE B: {choice_second}\n\nWhich response is better? RESPONSE"""
44
+ output = f"{res}"
45
+ self.prompts.append(prompt)
46
+ self.outputs.append(output)
47
+ print("Example prompt: ", self.prompts[0])
48
+ print("Example output: ", self.outputs[0])
49
+ self.tokenizer = tokenizer
50
+
51
+ def __len__(self):
52
+ return len(self.prompts)
53
+
54
+ def __getitem__(self, idx):
55
+ input_text = self.prompts[idx]
56
+ output_text = self.outputs[idx]
57
+
58
+ model_input = self.tokenizer(
59
+ input_text,
60
+ max_length=MAX_LENGTH_INPUT,
61
+ padding='max_length',
62
+ truncation=True
63
+ )
64
+ with self.tokenizer.as_target_tokenizer():
65
+ labels = self.tokenizer(
66
+ output_text,
67
+ max_length=MAX_LENGTH_OUTPUT,
68
+ padding='max_length',
69
+ truncation=True
70
+ )["input_ids"]
71
+ model_input['labels'] = labels
72
+ model_input['labels'] = [-100 if token == self.tokenizer.pad_token_id else token for token in model_input['labels']]
73
+ return model_input
74
+
75
+ import wandb
76
+ wandb.init(name="stanfordnlp/SteamSHP-flan-t5-xl", project="trlx", entity="pvduy")
77
+
78
+
79
+ if __name__=="__main__":
80
+ config = {
81
+ "logging_steps": 100,
82
+ "eval_steps": 100,
83
+ "save_steps": 500,
84
+ "batch_size": 4,
85
+ "batch_size_val": 4,
86
+ "warmup_steps": 100,
87
+ "accum_steps": 2,
88
+ "num_beams": 3,
89
+ "output_dir": "flan-t5-rm",
90
+ }
91
+
92
+ accuracy_metric = evaluate.load("accuracy")
93
+ def compute_metrics(pred):
94
+ labels_ids = pred.label_ids
95
+ pred_ids = pred.predictions
96
+ pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
97
+ labels_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
98
+ acc = sum(np.array(labels_str) == np.array(pred_str)) / len(labels_str)
99
+ return {"accuracy": acc}
100
+
101
+ training_args = Seq2SeqTrainingArguments(
102
+ output_dir=config["output_dir"],
103
+ do_train=True,
104
+ num_train_epochs=5,
105
+ do_eval=False,
106
+ predict_with_generate=True,
107
+ adam_beta1=0.9,
108
+ adam_beta2=0.999,
109
+ learning_rate=5e-5,
110
+ half_precision_backend=True,
111
+ bf16=True,
112
+ per_device_train_batch_size=config["batch_size"],
113
+ per_device_eval_batch_size=config["batch_size_val"],
114
+ logging_steps=config["logging_steps"],
115
+ evaluation_strategy="epoch",
116
+ warmup_steps=config["warmup_steps"],
117
+ eval_accumulation_steps=1,
118
+ lr_scheduler_type="linear",
119
+ save_strategy="epoch",
120
+ gradient_accumulation_steps=config["accum_steps"],
121
+ deepspeed='configs/ds_configs/ds_config_gpt_2.json',
122
+ )
123
+
124
+ tokenizer = AutoTokenizer.from_pretrained("stanfordnlp/SteamSHP-flan-t5-xl")
125
+ model = AutoModelForSeq2SeqLM.from_pretrained("stanfordnlp/SteamSHP-flan-t5-xl")
126
+
127
+ train_dataset = Seq2SeqDataset(tokenizer, type_data='train')
128
+ val_dataset = Seq2SeqDataset(tokenizer, type_data='val')
129
+ print("Train dataset size: ", len(train_dataset))
130
+ print("Val dataset size: ", len(val_dataset))
131
+
132
+ params = sum(p.numel() for p in model.parameters() if p.requires_grad)
133
+ print(f"Number of trainable parameters: {params}")
134
+
135
+ trainer = Seq2SeqTrainer(
136
+ model=model,
137
+ tokenizer=tokenizer,
138
+ args=training_args,
139
+ train_dataset=train_dataset,
140
+ eval_dataset=val_dataset,
141
+ compute_metrics=compute_metrics,
142
+ )
143
+
144
+ trainer.train()
145
+ ```
146
+
147
+ ### Inference Code
148
+ ```python
149
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
150
+ from datasets import load_dataset
151
+ import numpy as np
152
+ import torch
153
+ from tqdm import tqdm
154
+ dataset = load_dataset("CarperAI/openai_summarize_comparisons", split="test")
155
+
156
+ tokenizer = AutoTokenizer.from_pretrained("flan-t5-rm/checkpoint-4338/")
157
+ model = AutoModelForSeq2SeqLM.from_pretrained("flan-t5-rm/checkpoint-4338/")
158
+
159
+ device = "cuda" if torch.cuda.is_available() else "cpu"
160
+ model.to(device)
161
+
162
+ df = dataset.to_pandas()
163
+ predictions = []
164
+ for i, row in tqdm(df.iterrows(), total=len(df)):
165
+ prompt = f"""POST: {row["prompt"]}\n\nRESPONSE A: {row["chosen"]}\n\nRESPONSE B: {row["rejected"]}\n\nWhich response is better? RESPONSE"""
166
+ x = tokenizer([prompt], return_tensors='pt').input_ids.to(device)
167
+ y = model.generate(x, max_new_tokens=1)
168
+ predictions.append(tokenizer.batch_decode(y, skip_special_tokens=True)[0])
169
+
170
+ print("Accuracy: ", sum(np.array(predictions) == 'A') / len(predictions))
171
+ ```