Upload folder using huggingface_hub
Browse files- modeling_molmo.py +3 -0
- preprocessing_molmo.py +6 -0
- tokenizer_config.json +1 -1
modeling_molmo.py
CHANGED
|
@@ -2013,6 +2013,7 @@ class MolmoForCausalLM(PreTrainedModel):
|
|
| 2013 |
config_class = MolmoConfig
|
| 2014 |
base_model_prefix = "model"
|
| 2015 |
_no_split_modules = ["MolmoBlock"]
|
|
|
|
| 2016 |
|
| 2017 |
def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
|
| 2018 |
super().__init__(config)
|
|
@@ -2150,6 +2151,8 @@ class MolmoForCausalLM(PreTrainedModel):
|
|
| 2150 |
# Shift so that tokens < n predict n
|
| 2151 |
shift_logits = logits[..., :-1, :].contiguous()
|
| 2152 |
shift_labels = labels[..., 1:].contiguous()
|
|
|
|
|
|
|
| 2153 |
# Flatten the tokens
|
| 2154 |
loss_fct = torch.nn.CrossEntropyLoss()
|
| 2155 |
shift_logits = shift_logits.view(-1, self.config.embedding_size)
|
|
|
|
| 2013 |
config_class = MolmoConfig
|
| 2014 |
base_model_prefix = "model"
|
| 2015 |
_no_split_modules = ["MolmoBlock"]
|
| 2016 |
+
_tp_plan = {}
|
| 2017 |
|
| 2018 |
def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
|
| 2019 |
super().__init__(config)
|
|
|
|
| 2151 |
# Shift so that tokens < n predict n
|
| 2152 |
shift_logits = logits[..., :-1, :].contiguous()
|
| 2153 |
shift_labels = labels[..., 1:].contiguous()
|
| 2154 |
+
# Ignore image tokens
|
| 2155 |
+
shift_labels = torch.where(shift_labels >= 152064, torch.tensor(-100, device=shift_labels.device), shift_labels)
|
| 2156 |
# Flatten the tokens
|
| 2157 |
loss_fct = torch.nn.CrossEntropyLoss()
|
| 2158 |
shift_logits = shift_logits.view(-1, self.config.embedding_size)
|
preprocessing_molmo.py
CHANGED
|
@@ -183,10 +183,16 @@ class MolmoProcessor(ProcessorMixin):
|
|
| 183 |
image_input_idx = out["image_input_idx"]
|
| 184 |
out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
|
| 185 |
|
|
|
|
|
|
|
|
|
|
| 186 |
for k, v in out.items():
|
| 187 |
out[k] = torch.from_numpy(v)
|
| 188 |
|
| 189 |
return out
|
| 190 |
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
MolmoProcessor.register_for_auto_class()
|
|
|
|
| 183 |
image_input_idx = out["image_input_idx"]
|
| 184 |
out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
|
| 185 |
|
| 186 |
+
# Add attention mask for training
|
| 187 |
+
out["attention_mask"] = np.ones_like(decoder_input_tokens)
|
| 188 |
+
|
| 189 |
for k, v in out.items():
|
| 190 |
out[k] = torch.from_numpy(v)
|
| 191 |
|
| 192 |
return out
|
| 193 |
|
| 194 |
+
def __call__(self, *args, **kwargs):
|
| 195 |
+
return self.process(*args, **kwargs)
|
| 196 |
+
|
| 197 |
|
| 198 |
MolmoProcessor.register_for_auto_class()
|
tokenizer_config.json
CHANGED
|
@@ -229,7 +229,7 @@
|
|
| 229 |
"AutoProcessor": "preprocessing_molmo.MolmoProcessor"
|
| 230 |
},
|
| 231 |
"bos_token": "<|endoftext|>",
|
| 232 |
-
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
| 233 |
"clean_up_tokenization_spaces": false,
|
| 234 |
"eos_token": "<|endoftext|>",
|
| 235 |
"model_max_length": 8192,
|
|
|
|
| 229 |
"AutoProcessor": "preprocessing_molmo.MolmoProcessor"
|
| 230 |
},
|
| 231 |
"bos_token": "<|endoftext|>",
|
| 232 |
+
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'].lower() != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
| 233 |
"clean_up_tokenization_spaces": false,
|
| 234 |
"eos_token": "<|endoftext|>",
|
| 235 |
"model_max_length": 8192,
|