快速开始

备注

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及 ONNX Runtime!

本教程以一个简单的 resnet50 模型为例,讲述如何在 Ascend NPU上使用 ONNX Runtime 进行模型推理。

环境准备

安装本教程所依赖的额外必要库。

1pip install numpy Pillow onnx

模型准备

ONNX Runtime 推理需要 ONNX 格式模型作为输入,目前有以下几种主流途径获得 ONNX 模型。

  1. ONNX Model Zoo 中下载模型。

  2. 从 torch、TensorFlow 等框架导出 ONNX 模型。

  3. 使用转换工具,完成其他类型到 ONNX 模型的转换。

本教程使用的 resnet50 模型是从 ONNX Model Zoo 中直接下载的,具体的 下载链接

类别标签

类别标签用于将输出权重转换成人类可读的类别信息,具体的 下载链接

模型推理

python推理示例

 1import onnxruntime as ort
 2import numpy as np
 3import onnx
 4from PIL import Image
 5
 6def preprocess(image_path):
 7    img = Image.open(image_path)
 8    img = img.resize((224, 224))
 9    img = np.array(img).astype(np.float32)
10
11    img = np.transpose(img, (2, 0, 1))
12    img = img / 255.0
13    mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
14    std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
15    img = (img - mean) / std
16    img = np.expand_dims(img, axis=0)
17    return img
18
19def inference(model_path, img):
20    options = ort.SessionOptions()
21    providers = [
22        (
23            "CANNExecutionProvider",
24            {
25                "device_id": 0,
26                "arena_extend_strategy": "kNextPowerOfTwo",
27                "npu_mem_limit": 2 * 1024 * 1024 * 1024,
28                "op_select_impl_mode": "high_performance",
29                "optypelist_for_implmode": "Gelu",
30                "enable_cann_graph": True
31            },
32        ),
33        "CPUExecutionProvider",
34    ]
35
36    session = ort.InferenceSession(model_path, sess_options=options, providers=providers)
37    input_name = session.get_inputs()[0].name
38    output_name = session.get_outputs()[0].name
39
40    result = session.run([output_name], {input_name: img})
41    return result
42
43def display(classes_path, result):
44    with open(classes_path) as f:
45        labels = [line.strip() for line in f.readlines()]
46
47    pred_idx = np.argmax(result)
48    print(f'Predicted class: {labels[pred_idx]} ({result[0][0][pred_idx]:.4f})')
49
50if __name__ == '__main__':
51    model_path = '~/model/resnet/resnet50.onnx'
52    image_path = '~/model/resnet/cat.jpg'
53    classes_path = '~/model/resnet/imagenet_classes.txt'
54
55    img = preprocess(image_path)
56    result = inference(model_path, img)
57    display(classes_path, result)

C++推理示例

  1  #include <iostream>
  2  #include <vector>
  3
  4  #include "onnxruntime_cxx_api.h"
  5
  6  // path of model, Change to user's own model path
  7  const char* model_path = "./onnx/resnet50_Opset16.onnx";
  8
  9  /**
 10  * @brief Input data preparation provided by user.
 11  *
 12  * @param num_input_nodes The number of model input nodes.
 13  * @return  A collection of input data.
 14  */
 15  std::vector<std::vector<float>> input_prepare(size_t num_input_nodes) {
 16      std::vector<std::vector<float>> input_datas;
 17      input_datas.reserve(num_input_nodes);
 18
 19      constexpr size_t input_data_size = 3 * 224 * 224;
 20      std::vector<float> input_data(input_data_size);
 21      // initialize input data with values in [0.0, 1.0]
 22      for (unsigned int i = 0; i < input_data_size; i++)
 23          input_data[i] = (float)i / (input_data_size + 1);
 24      input_datas.push_back(input_data);
 25
 26      return input_datas;
 27  }
 28
 29  /**
 30  * @brief Model output data processing logic(For User updates).
 31  *
 32  * @param output_tensors The results of the model output.
 33  */
 34  void output_postprocess(std::vector<Ort::Value>& output_tensors) {
 35      auto floatarr = output_tensors.front().GetTensorMutableData<float>();
 36
 37      for (int i = 0; i < 5; i++) {
 38          std::cout << "Score for class [" << i << "] =  " << floatarr[i] << '\n';
 39      }
 40
 41      std::cout << "Done!" << std::endl;
 42  }
 43
 44  /**
 45  * @brief The main functions for model inference.
 46  *
 47  *  The complete model inference process, which generally does not need to be
 48  * changed here
 49  */
 50  void inference() {
 51      const auto& api = Ort::GetApi();
 52      Ort::Env env(ORT_LOGGING_LEVEL_WARNING);
 53
 54      // Enable cann graph in cann provider option.
 55      OrtCANNProviderOptions* cann_options = nullptr;
 56      api.CreateCANNProviderOptions(&cann_options);
 57
 58      // Configurations of EP
 59      std::vector<const char*> keys{
 60          "device_id",
 61          "npu_mem_limit",
 62          "arena_extend_strategy",
 63          "enable_cann_graph"};
 64      std::vector<const char*> values{"0", "4294967296", "kNextPowerOfTwo", "1"};
 65      api.UpdateCANNProviderOptions(
 66          cann_options, keys.data(), values.data(), keys.size());
 67
 68      // Convert to general session options
 69      Ort::SessionOptions session_options;
 70      api.SessionOptionsAppendExecutionProvider_CANN(
 71          static_cast<OrtSessionOptions*>(session_options), cann_options);
 72
 73      Ort::Session session(env, model_path, session_options);
 74
 75      Ort::AllocatorWithDefaultOptions allocator;
 76
 77      // Input Process
 78      const size_t num_input_nodes = session.GetInputCount();
 79      std::vector<const char*> input_node_names;
 80      std::vector<Ort::AllocatedStringPtr> input_names_ptr;
 81      input_node_names.reserve(num_input_nodes);
 82      input_names_ptr.reserve(num_input_nodes);
 83      std::vector<std::vector<int64_t>> input_node_shapes;
 84      std::cout << num_input_nodes << std::endl;
 85      for (size_t i = 0; i < num_input_nodes; i++) {
 86          auto input_name = session.GetInputNameAllocated(i, allocator);
 87          input_node_names.push_back(input_name.get());
 88          input_names_ptr.push_back(std::move(input_name));
 89          auto type_info = session.GetInputTypeInfo(i);
 90          auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
 91          input_node_shapes.push_back(tensor_info.GetShape());
 92      }
 93
 94      // Output Process
 95      const size_t num_output_nodes = session.GetOutputCount();
 96      std::vector<const char*> output_node_names;
 97      std::vector<Ort::AllocatedStringPtr> output_names_ptr;
 98      output_names_ptr.reserve(num_input_nodes);
 99      output_node_names.reserve(num_output_nodes);
100      for (size_t i = 0; i < num_output_nodes; i++) {
101          auto output_name = session.GetOutputNameAllocated(i, allocator);
102          output_node_names.push_back(output_name.get());
103          output_names_ptr.push_back(std::move(output_name));
104      }
105
106      //  User need to generate input date according to real situation.
107      std::vector<std::vector<float>> input_datas = input_prepare(num_input_nodes);
108
109      auto memory_info = Ort::MemoryInfo::CreateCpu(
110          OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
111
112      std::vector<Ort::Value> input_tensors;
113      input_tensors.reserve(num_input_nodes);
114      for (size_t i = 0; i < input_node_shapes.size(); i++) {
115          auto input_tensor = Ort::Value::CreateTensor<float>(
116              memory_info,
117              input_datas[i].data(),
118              input_datas[i].size(),
119              input_node_shapes[i].data(),
120              input_node_shapes[i].size());
121          input_tensors.push_back(std::move(input_tensor));
122      }
123
124      auto output_tensors = session.Run(
125          Ort::RunOptions{nullptr},
126          input_node_names.data(),
127          input_tensors.data(),
128          num_input_nodes,
129          output_node_names.data(),
130          output_node_names.size());
131
132      // Processing of out_tensor
133      output_postprocess(output_tensors);
134  }
135
136  int main(int argc, char* argv[]) {
137      inference();
138      return 0;
139  }