bug: cannot inference: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model
#1
by
nekomeowww
- opened
2025-07-09 14:14:44.231707 [E:onnxruntime:, sequential_executor.cc:572 ExecuteKernel] Non-zero status code returned while running 3843266348432971732_CoreML_3843266348432971732_0 node. Name:'CoreMLExecutionProvider_3843266348432971732_CoreML_3843266348432971732_0_0' Status Message: Error executing model: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).
Error: Non-zero status code returned while running 3360655929800718712_CoreML_3360655929800718712_0 node. Name:'CoreMLExecutionProvider_3360655929800718712_CoreML_3360655929800718712_0_0' Status Message: Error executing model: Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).
Cannot inference this ONNX exported model with ORT:
- Loading:
let cache_api = hf_hub::Cache::from_env();
let cache_repo = cache_api.repo(hf_hub::Repo::with_revision(model_id.into(), hf_hub::RepoType::Model, revision.into()));
let api = hf_hub::api::sync::ApiBuilder::new().build()?;
let repo = api.repo(hf_hub::Repo::with_revision(model_id.into(), hf_hub::RepoType::Model, revision.into()));
let encoder_model_path_sub_name = "onnx/encoder_model.onnx";
let encoder_model_path = match cache_repo.get(encoder_model_path_sub_name) {
Some(path) => path,
None => repo.download(encoder_model_path_sub_name)?,
};
let decoder_model_path_sub_name = "onnx/decoder_model_merged.onnx";
let decoder_model_path = match cache_repo.get(decoder_model_path_sub_name) {
Some(path) => path,
None => repo.download(decoder_model_path_sub_name)?,
};
let config_path_sub_name = "config.json";
let config_path = match cache_repo.get(config_path_sub_name) {
Some(path) => path,
None => repo.download(config_path_sub_name)?,
};
let tokenizer_config_path_sub_name = "tokenizer.json";
let tokenizer_config_path = match cache_repo.get(tokenizer_config_path_sub_name) {
Some(path) => path,
None => repo.download(tokenizer_config_path_sub_name)?,
};
let encoder_session = Self::create_optimized_session(encoder_model_path)?;
let decoder_session = Self::create_optimized_session(decoder_model_path)?;
- Inference:
let mut decoder_input_ids = self.retrieve_init_tokens(gen_config)?;
let owned_input = input_features.to_owned();
let inputs = vec![("input_features", Value::from_array(owned_input)?)];
let encoder_outputs = self.encoder_session.run(inputs)?;
let encoder_hidden_states = encoder_outputs.get("last_hidden_state").unwrap();
let mut generated_tokens = Vec::new();
// KV Cache
// let num_decoder_layers = self.config.decoder_layers as usize;
// let head_dim = self.config.d_model / self.config.decoder_attention_heads;
// let mut past_key_values: Vec<Array4<f32>> = (0..num_decoder_layers * 2)
// .map(|_| Array4::<f32>::zeros((1, self.config.decoder_attention_heads as usize, 0, head_dim as usize)))
// .collect();
for _step in 0..gen_config.max_new_tokens {
let decoder_input_ids_array = Array2::from_shape_vec((1, decoder_input_ids.len()), decoder_input_ids.clone())?.mapv(|x| x);
// KV Cache
// let mut decoder_inputs: Vec<(Cow<'_, str>, SessionInputValue<'_>)> = Vec::with_capacity(2 + past_key_values.len());
let mut decoder_inputs: Vec<(Cow<'_, str>, SessionInputValue<'_>)> = Vec::with_capacity(2);
// name = encoder_hidden_states, type = tensor: float32[batch_size,encoder_sequence_length / 2,1280]
decoder_inputs.push(("encoder_hidden_states".into(), encoder_hidden_states.into()));
// name = input_ids, type = tensor: int64[batch_size,decoder_sequence_length]
decoder_inputs.push(("input_ids".into(), Value::from_array(decoder_input_ids_array)?.into()));
// name = use_cache_branch, type = tensor: boolean[1]
let use_cache_branch_tensor = ndarray::Array1::from(vec![false]);
decoder_inputs.push(("use_cache_branch".into(), Value::from_array(use_cache_branch_tensor)?.into()));