accelerate tokenizer
#98
by
lugim
- opened
- tokenization_chatglm.py +3 -0
tokenization_chatglm.py
CHANGED
|
@@ -439,5 +439,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 439 |
encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
|
| 440 |
pad_width=[(0, 0), (difference, 0)])
|
| 441 |
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
return encoded_inputs
|
|
|
|
| 439 |
encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
|
| 440 |
pad_width=[(0, 0), (difference, 0)])
|
| 441 |
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
| 442 |
+
encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"].tolist()
|
| 443 |
+
encoded_inputs["position_ids"] = encoded_inputs["position_ids"].tolist()
|
| 444 |
+
|
| 445 |
|
| 446 |
return encoded_inputs
|