快速开始
备注
阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及 ONNX Runtime!
本教程以一个简单的 resnet50 模型为例,讲述如何在 Ascend NPU上使用 ONNX Runtime 进行模型推理。
环境准备
安装本教程所依赖的额外必要库。
1pip install numpy Pillow onnx
模型准备
ONNX Runtime 推理需要 ONNX 格式模型作为输入,目前有以下几种主流途径获得 ONNX 模型。
从 ONNX Model Zoo 中下载模型。
从 torch、TensorFlow 等框架导出 ONNX 模型。
使用转换工具,完成其他类型到 ONNX 模型的转换。
本教程使用的 resnet50 模型是从 ONNX Model Zoo 中直接下载的,具体的 下载链接
类别标签
类别标签用于将输出权重转换成人类可读的类别信息,具体的 下载链接
模型推理
python推理示例
1import onnxruntime as ort
2import numpy as np
3import onnx
4from PIL import Image
5
6def preprocess(image_path):
7 img = Image.open(image_path)
8 img = img.resize((224, 224))
9 img = np.array(img).astype(np.float32)
10
11 img = np.transpose(img, (2, 0, 1))
12 img = img / 255.0
13 mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
14 std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
15 img = (img - mean) / std
16 img = np.expand_dims(img, axis=0)
17 return img
18
19def inference(model_path, img):
20 options = ort.SessionOptions()
21 providers = [
22 (
23 "CANNExecutionProvider",
24 {
25 "device_id": 0,
26 "arena_extend_strategy": "kNextPowerOfTwo",
27 "npu_mem_limit": 2 * 1024 * 1024 * 1024,
28 "op_select_impl_mode": "high_performance",
29 "optypelist_for_implmode": "Gelu",
30 "enable_cann_graph": True
31 },
32 ),
33 "CPUExecutionProvider",
34 ]
35
36 session = ort.InferenceSession(model_path, sess_options=options, providers=providers)
37 input_name = session.get_inputs()[0].name
38 output_name = session.get_outputs()[0].name
39
40 result = session.run([output_name], {input_name: img})
41 return result
42
43def display(classes_path, result):
44 with open(classes_path) as f:
45 labels = [line.strip() for line in f.readlines()]
46
47 pred_idx = np.argmax(result)
48 print(f'Predicted class: {labels[pred_idx]} ({result[0][0][pred_idx]:.4f})')
49
50if __name__ == '__main__':
51 model_path = '~/model/resnet/resnet50.onnx'
52 image_path = '~/model/resnet/cat.jpg'
53 classes_path = '~/model/resnet/imagenet_classes.txt'
54
55 img = preprocess(image_path)
56 result = inference(model_path, img)
57 display(classes_path, result)
C++推理示例
1 #include <iostream>
2 #include <vector>
3
4 #include "onnxruntime_cxx_api.h"
5
6 // path of model, Change to user's own model path
7 const char* model_path = "./onnx/resnet50_Opset16.onnx";
8
9 /**
10 * @brief Input data preparation provided by user.
11 *
12 * @param num_input_nodes The number of model input nodes.
13 * @return A collection of input data.
14 */
15 std::vector<std::vector<float>> input_prepare(size_t num_input_nodes) {
16 std::vector<std::vector<float>> input_datas;
17 input_datas.reserve(num_input_nodes);
18
19 constexpr size_t input_data_size = 3 * 224 * 224;
20 std::vector<float> input_data(input_data_size);
21 // initialize input data with values in [0.0, 1.0]
22 for (unsigned int i = 0; i < input_data_size; i++)
23 input_data[i] = (float)i / (input_data_size + 1);
24 input_datas.push_back(input_data);
25
26 return input_datas;
27 }
28
29 /**
30 * @brief Model output data processing logic(For User updates).
31 *
32 * @param output_tensors The results of the model output.
33 */
34 void output_postprocess(std::vector<Ort::Value>& output_tensors) {
35 auto floatarr = output_tensors.front().GetTensorMutableData<float>();
36
37 for (int i = 0; i < 5; i++) {
38 std::cout << "Score for class [" << i << "] = " << floatarr[i] << '\n';
39 }
40
41 std::cout << "Done!" << std::endl;
42 }
43
44 /**
45 * @brief The main functions for model inference.
46 *
47 * The complete model inference process, which generally does not need to be
48 * changed here
49 */
50 void inference() {
51 const auto& api = Ort::GetApi();
52 Ort::Env env(ORT_LOGGING_LEVEL_WARNING);
53
54 // Enable cann graph in cann provider option.
55 OrtCANNProviderOptions* cann_options = nullptr;
56 api.CreateCANNProviderOptions(&cann_options);
57
58 // Configurations of EP
59 std::vector<const char*> keys{
60 "device_id",
61 "npu_mem_limit",
62 "arena_extend_strategy",
63 "enable_cann_graph"};
64 std::vector<const char*> values{"0", "4294967296", "kNextPowerOfTwo", "1"};
65 api.UpdateCANNProviderOptions(
66 cann_options, keys.data(), values.data(), keys.size());
67
68 // Convert to general session options
69 Ort::SessionOptions session_options;
70 api.SessionOptionsAppendExecutionProvider_CANN(
71 static_cast<OrtSessionOptions*>(session_options), cann_options);
72
73 Ort::Session session(env, model_path, session_options);
74
75 Ort::AllocatorWithDefaultOptions allocator;
76
77 // Input Process
78 const size_t num_input_nodes = session.GetInputCount();
79 std::vector<const char*> input_node_names;
80 std::vector<Ort::AllocatedStringPtr> input_names_ptr;
81 input_node_names.reserve(num_input_nodes);
82 input_names_ptr.reserve(num_input_nodes);
83 std::vector<std::vector<int64_t>> input_node_shapes;
84 std::cout << num_input_nodes << std::endl;
85 for (size_t i = 0; i < num_input_nodes; i++) {
86 auto input_name = session.GetInputNameAllocated(i, allocator);
87 input_node_names.push_back(input_name.get());
88 input_names_ptr.push_back(std::move(input_name));
89 auto type_info = session.GetInputTypeInfo(i);
90 auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
91 input_node_shapes.push_back(tensor_info.GetShape());
92 }
93
94 // Output Process
95 const size_t num_output_nodes = session.GetOutputCount();
96 std::vector<const char*> output_node_names;
97 std::vector<Ort::AllocatedStringPtr> output_names_ptr;
98 output_names_ptr.reserve(num_input_nodes);
99 output_node_names.reserve(num_output_nodes);
100 for (size_t i = 0; i < num_output_nodes; i++) {
101 auto output_name = session.GetOutputNameAllocated(i, allocator);
102 output_node_names.push_back(output_name.get());
103 output_names_ptr.push_back(std::move(output_name));
104 }
105
106 // User need to generate input date according to real situation.
107 std::vector<std::vector<float>> input_datas = input_prepare(num_input_nodes);
108
109 auto memory_info = Ort::MemoryInfo::CreateCpu(
110 OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
111
112 std::vector<Ort::Value> input_tensors;
113 input_tensors.reserve(num_input_nodes);
114 for (size_t i = 0; i < input_node_shapes.size(); i++) {
115 auto input_tensor = Ort::Value::CreateTensor<float>(
116 memory_info,
117 input_datas[i].data(),
118 input_datas[i].size(),
119 input_node_shapes[i].data(),
120 input_node_shapes[i].size());
121 input_tensors.push_back(std::move(input_tensor));
122 }
123
124 auto output_tensors = session.Run(
125 Ort::RunOptions{nullptr},
126 input_node_names.data(),
127 input_tensors.data(),
128 num_input_nodes,
129 output_node_names.data(),
130 output_node_names.size());
131
132 // Processing of out_tensor
133 output_postprocess(output_tensors);
134 }
135
136 int main(int argc, char* argv[]) {
137 inference();
138 return 0;
139 }