oelachqar commited on
Commit
f19f7fc
·
verified ·
1 Parent(s): bae94b8

Upload folder using huggingface_hub

Browse files
modeling_molmo.py CHANGED
@@ -2013,6 +2013,7 @@ class MolmoForCausalLM(PreTrainedModel):
2013
  config_class = MolmoConfig
2014
  base_model_prefix = "model"
2015
  _no_split_modules = ["MolmoBlock"]
 
2016
 
2017
  def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
2018
  super().__init__(config)
@@ -2150,6 +2151,8 @@ class MolmoForCausalLM(PreTrainedModel):
2150
  # Shift so that tokens < n predict n
2151
  shift_logits = logits[..., :-1, :].contiguous()
2152
  shift_labels = labels[..., 1:].contiguous()
 
 
2153
  # Flatten the tokens
2154
  loss_fct = torch.nn.CrossEntropyLoss()
2155
  shift_logits = shift_logits.view(-1, self.config.embedding_size)
 
2013
  config_class = MolmoConfig
2014
  base_model_prefix = "model"
2015
  _no_split_modules = ["MolmoBlock"]
2016
+ _tp_plan = {}
2017
 
2018
  def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
2019
  super().__init__(config)
 
2151
  # Shift so that tokens < n predict n
2152
  shift_logits = logits[..., :-1, :].contiguous()
2153
  shift_labels = labels[..., 1:].contiguous()
2154
+ # Ignore image tokens
2155
+ shift_labels = torch.where(shift_labels >= 152064, torch.tensor(-100, device=shift_labels.device), shift_labels)
2156
  # Flatten the tokens
2157
  loss_fct = torch.nn.CrossEntropyLoss()
2158
  shift_logits = shift_logits.view(-1, self.config.embedding_size)
preprocessing_molmo.py CHANGED
@@ -183,10 +183,16 @@ class MolmoProcessor(ProcessorMixin):
183
  image_input_idx = out["image_input_idx"]
184
  out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
185
 
 
 
 
186
  for k, v in out.items():
187
  out[k] = torch.from_numpy(v)
188
 
189
  return out
190
 
 
 
 
191
 
192
  MolmoProcessor.register_for_auto_class()
 
183
  image_input_idx = out["image_input_idx"]
184
  out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
185
 
186
+ # Add attention mask for training
187
+ out["attention_mask"] = np.ones_like(decoder_input_tokens)
188
+
189
  for k, v in out.items():
190
  out[k] = torch.from_numpy(v)
191
 
192
  return out
193
 
194
+ def __call__(self, *args, **kwargs):
195
+ return self.process(*args, **kwargs)
196
+
197
 
198
  MolmoProcessor.register_for_auto_class()
tokenizer_config.json CHANGED
@@ -229,7 +229,7 @@
229
  "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
230
  },
231
  "bos_token": "<|endoftext|>",
232
- "chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
233
  "clean_up_tokenization_spaces": false,
234
  "eos_token": "<|endoftext|>",
235
  "model_max_length": 8192,
 
229
  "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
230
  },
231
  "bos_token": "<|endoftext|>",
232
+ "chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'].lower() != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
233
  "clean_up_tokenization_spaces": false,
234
  "eos_token": "<|endoftext|>",
235
  "model_max_length": 8192,