bug: cannot inference: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model

by nekomeowww - opened Jul 21

Jul 21

2025-07-09 14:14:44.231707 [E:onnxruntime:, sequential_executor.cc:572 ExecuteKernel] Non-zero status code returned while running 3843266348432971732_CoreML_3843266348432971732_0 node. Name:'CoreMLExecutionProvider_3843266348432971732_CoreML_3843266348432971732_0_0' Status Message: Error executing model: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).
  Error: Non-zero status code returned while running 3360655929800718712_CoreML_3360655929800718712_0 node. Name:'CoreMLExecutionProvider_3360655929800718712_CoreML_3360655929800718712_0_0' Status Message: Error executing model: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).

Cannot inference this ONNX exported model with ORT:

Loading:

let cache_api = hf_hub::Cache::from_env();
let cache_repo = cache_api.repo(hf_hub::Repo::with_revision(model_id.into(), hf_hub::RepoType::Model, revision.into()));

let api = hf_hub::api::sync::ApiBuilder::new().build()?;
let repo = api.repo(hf_hub::Repo::with_revision(model_id.into(), hf_hub::RepoType::Model, revision.into()));

let encoder_model_path_sub_name = "onnx/encoder_model.onnx";
let encoder_model_path = match cache_repo.get(encoder_model_path_sub_name) {
  Some(path) => path,
  None => repo.download(encoder_model_path_sub_name)?,
};

let decoder_model_path_sub_name = "onnx/decoder_model_merged.onnx";
let decoder_model_path = match cache_repo.get(decoder_model_path_sub_name) {
  Some(path) => path,
  None => repo.download(decoder_model_path_sub_name)?,
};

let config_path_sub_name = "config.json";
let config_path = match cache_repo.get(config_path_sub_name) {
  Some(path) => path,
  None => repo.download(config_path_sub_name)?,
};

let tokenizer_config_path_sub_name = "tokenizer.json";
let tokenizer_config_path = match cache_repo.get(tokenizer_config_path_sub_name) {
  Some(path) => path,
  None => repo.download(tokenizer_config_path_sub_name)?,
};

let encoder_session = Self::create_optimized_session(encoder_model_path)?;
let decoder_session = Self::create_optimized_session(decoder_model_path)?;

Inference:

let mut decoder_input_ids = self.retrieve_init_tokens(gen_config)?;

let owned_input = input_features.to_owned();
let inputs = vec![("input_features", Value::from_array(owned_input)?)];
let encoder_outputs = self.encoder_session.run(inputs)?;
let encoder_hidden_states = encoder_outputs.get("last_hidden_state").unwrap();

let mut generated_tokens = Vec::new();

// KV Cache
// let num_decoder_layers = self.config.decoder_layers as usize;
// let head_dim = self.config.d_model / self.config.decoder_attention_heads;
// let mut past_key_values: Vec<Array4<f32>> = (0..num_decoder_layers * 2)
//   .map(|_| Array4::<f32>::zeros((1, self.config.decoder_attention_heads as usize, 0, head_dim as usize)))
//   .collect();

for _step in 0..gen_config.max_new_tokens {
  let decoder_input_ids_array = Array2::from_shape_vec((1, decoder_input_ids.len()), decoder_input_ids.clone())?.mapv(|x| x);

  // KV Cache
  // let mut decoder_inputs: Vec<(Cow<'_, str>, SessionInputValue<'_>)> = Vec::with_capacity(2 + past_key_values.len());

  let mut decoder_inputs: Vec<(Cow<'_, str>, SessionInputValue<'_>)> = Vec::with_capacity(2);

  // name = encoder_hidden_states, type = tensor: float32[batch_size,encoder_sequence_length / 2,1280]
  decoder_inputs.push(("encoder_hidden_states".into(), encoder_hidden_states.into()));
  // name = input_ids, type = tensor: int64[batch_size,decoder_sequence_length]
  decoder_inputs.push(("input_ids".into(), Value::from_array(decoder_input_ids_array)?.into()));
  // name = use_cache_branch, type = tensor: boolean[1]
  let use_cache_branch_tensor = ndarray::Array1::from(vec![false]);
  decoder_inputs.push(("use_cache_branch".into(), Value::from_array(use_cache_branch_tensor)?.into()));

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment