Add YOLOv8 pose estimation support to ONNXRuntime C++ example (#23786)

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
chendao12138 2026-03-05 23:27:10 +08:00 committed by GitHub
parent 7bd3b2ce19
commit 1ab234ed14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 244 additions and 20 deletions

View file

@ -59,7 +59,7 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
switch (modelType)
{
case YOLO_DETECT_V8:
case YOLO_POSE:
case YOLO_POSE_V8:
case YOLO_DETECT_V8_HALF:
case YOLO_POSE_V8_HALF://LetterBox
{
@ -82,7 +82,7 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
int m = min(h, w);
int top = (h - m) / 2;
int left = (w - m) / 2;
cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(1), iImgSize.at(0)));
cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
break;
}
}
@ -90,8 +90,8 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
}
char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
char* Ret = RET_OK;
const char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
const char* Ret = RET_OK;
std::regex pattern("[\u4e00-\u9fa5]");
bool result = std::regex_search(iParams.modelPath, pattern);
if (result)
@ -103,6 +103,7 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
try
{
rectConfidenceThreshold = iParams.rectConfidenceThreshold;
pointScoresThreshold = iParams.pointScoresThreshold;//change
iouThreshold = iParams.iouThreshold;
imgSize = iParams.imgSize;
modelType = iParams.modelType;
@ -136,15 +137,15 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
{
Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
char* temp_buf = new char[50];
strcpy(temp_buf, input_node_name.get());
strcpy_s(temp_buf, 50, input_node_name.get());
inputNodeNames.push_back(temp_buf);
}
size_t OutputNodesNum = session->GetOutputCount();
for (size_t i = 0; i < OutputNodesNum; i++)
{
Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
char* temp_buf = new char[10];
strcpy(temp_buf, output_node_name.get());
char* temp_buf = new char[50];
strcpy_s(temp_buf, 50, output_node_name.get());
outputNodeNames.push_back(temp_buf);
}
options = Ort::RunOptions{ nullptr };
@ -157,10 +158,11 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
const char* str2 = e.what();
std::string result = std::string(str1) + std::string(str2);
char* merged = new char[result.length() + 1];
std::strcpy(merged, result.c_str());
strcpy_s(merged, result.length() + 1, result.c_str());
std::cout << merged << std::endl;
delete[] merged;
return "[YOLO_V8]:Create session failed.";
Ret = "[YOLO_V8]:Create session failed.";
return Ret;
}
}
@ -322,6 +324,104 @@ char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::
}
break;
}
case YOLO_POSE_V8://change
case YOLO_POSE_V8_HALF:
{
int signalResultNum = outputNodeDims[1];//56: "4+1+3*17"
int strideNum = outputNodeDims[2];//8400
//std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<cv::Point2f> keyPoints;
std::vector<float> keyPointsScores;
cv::Mat rawData;
if (modelType == YOLO_POSE_V8)
{
// FP32
rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output);
}
else
{
// FP16
rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output);
rawData.convertTo(rawData, CV_32F);
}
// Note:
// ultralytics add transpose operator to the output of yolov8 model.which make yolov8/v5/v7 has same shape
// https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8n-pose.pt
rawData = rawData.t();
float* pdata = (float*)rawData.data;
for (int i = 0; i < strideNum; ++i)
{
float* classesScores = pdata + 4;
if (pdata[4] > rectConfidenceThreshold)
{
cv::Mat kpts = cv::Mat(kpts_num, 3, CV_32FC1, pdata + 5);//keypoints & scores
confidences.push_back(pdata[4]);
float x = pdata[0];
float y = pdata[1];
float w = pdata[2];
float h = pdata[3];
int left = int((x - 0.5 * w) * resizeScales);
int top = int((y - 0.5 * h) * resizeScales);
int width = int(w * resizeScales);
int height = int(h * resizeScales);
boxes.push_back(cv::Rect(left, top, width, height));
for (int kp = 0; kp < kpts.rows; ++kp) {
float x = kpts.at<float>(kp, 0) * resizeScales; // x
float y = kpts.at<float>(kp, 1) * resizeScales; // y
float score = kpts.at<float>(kp, 2); // score
keyPoints.push_back(cv::Point2f(x, y));
keyPointsScores.push_back(kpts.at<float>(kp, 2));
}
}
pdata += signalResultNum;
}
std::vector<int> nmsResult;
cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
for (int i = 0; i < nmsResult.size(); i++)
{
int idx = nmsResult[i];
DL_RESULT result;
result.classId = 0;
result.confidence = confidences[idx];
result.box = boxes[idx];
result.keyPoints.clear();
result.keyPointsScore.clear();
for (int j = 0; j < this->classes.size() - 1; j++)
{
if (keyPointsScores[idx * (this->classes.size() - 1) + j] > pointScoresThreshold)
{
result.keyPoints.push_back(keyPoints[idx * (this->classes.size() - 1) + j]);
result.keyPointsScore.push_back(keyPointsScores[idx * (this->classes.size() - 1) + j]);
}
}
oResult.push_back(result);
}
#ifdef benchmark
clock_t starttime_4 = clock();
double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
if (cudaEnable)
{
std::cout << "[YOLO_V8(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
}
else
{
std::cout << "[YOLO_V8(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
}
#endif // benchmark
break;
}
default:
std::cout << "[YOLO_V8]: " << "Not support model type." << std::endl;
}
@ -332,7 +432,7 @@ char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::
char* YOLO_V8::WarmUpSession() {
clock_t starttime_1 = clock();
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(1), imgSize.at(0)), CV_8UC3);
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
cv::Mat processedImg;
PreProcess(iImg, imgSize, processedImg);
if (modelType < 4)
@ -371,4 +471,4 @@ char* YOLO_V8::WarmUpSession() {
#endif
}
return RET_OK;
}
}

View file

@ -3,6 +3,7 @@
#pragma once
#define RET_OK nullptr
//#define USE_CUDA
#ifdef _WIN32
#include <Windows.h>
@ -25,7 +26,7 @@ enum MODEL_TYPE
{
//FLOAT32 MODEL
YOLO_DETECT_V8 = 1,
YOLO_POSE = 2,
YOLO_POSE_V8 = 2,
YOLO_CLS = 3,
//FLOAT16 MODEL
@ -41,6 +42,7 @@ typedef struct _DL_INIT_PARAM
MODEL_TYPE modelType = YOLO_DETECT_V8;
std::vector<int> imgSize = { 640, 640 };
float rectConfidenceThreshold = 0.6;
float pointScoresThreshold = 0.6;
float iouThreshold = 0.5;
int keyPointsNum = 2;//Note:kpt number for pose
bool cudaEnable = false;
@ -54,6 +56,7 @@ typedef struct _DL_RESULT
int classId;
float confidence;
cv::Rect box;
std::vector<float> keyPointsScore;
std::vector<cv::Point2f> keyPoints;
} DL_RESULT;
@ -66,7 +69,7 @@ public:
~YOLO_V8();
public:
char* CreateSession(DL_INIT_PARAM& iParams);
const char* CreateSession(DL_INIT_PARAM& iParams);
char* RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult);
@ -79,6 +82,7 @@ public:
char* PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg);
std::vector<std::string> classes{};
int kpts_num;
private:
Ort::Env env;
@ -91,6 +95,7 @@ private:
MODEL_TYPE modelType;
std::vector<int> imgSize;
float rectConfidenceThreshold;
float pointScoresThreshold;
float iouThreshold;
float resizeScales;//letterbox scale
};

View file

@ -9,7 +9,7 @@
void Detector(YOLO_V8*& p) {
std::filesystem::path current_path = std::filesystem::current_path();
std::filesystem::path imgs_path = current_path / "images";
std::filesystem::path imgs_path = current_path / "images/detect/";
for (auto& i : std::filesystem::directory_iterator(imgs_path))
{
if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
@ -97,11 +97,75 @@ void Classifier(YOLO_V8*& p)
}
}
void PoseEstimator(YOLO_V8*& p)
{
std::filesystem::path current_path = std::filesystem::current_path();
std::cout << "current_path: " << current_path << std::endl;
std::filesystem::path imgs_path = current_path / "images/pose/";
for (auto& i : std::filesystem::directory_iterator(imgs_path))
{
if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".bmp")
{
std::string img_path = i.path().string();
cv::Mat img = cv::imread(img_path);
std::vector<DL_RESULT> res;
p->RunSession(img, res);
if (res.empty())
{
std::cout << "No pose detected in image: " << img_path << std::endl;
//continue;
}
for (auto& re : res)
{
cv::RNG rng(cv::getTickCount());
cv::Scalar color_box(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));
cv::Scalar color_point(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));
cv::rectangle(img, re.box, color_box, 2);
float confidence = floor(100 * re.confidence) / 100;
std::cout << std::fixed << std::setprecision(2);
std::string label_box = p->classes[re.classId] + " " +
std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4);
for (int i = 0; i < re.keyPoints.size(); i++)
{
cv::circle(img, re.keyPoints[i], 5, color_point, -1);
std::string label_point = p->classes[i + 1];
cv::putText(img, label_point, re.keyPoints[i], cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0), 2);
}
cv::rectangle(
img,
cv::Point(re.box.x, re.box.y - 25),
cv::Point(re.box.x + label_box.length() * 15, re.box.y),
color_box,
cv::FILLED
);
cv::putText(
img,
label_box,
cv::Point(re.box.x, re.box.y - 5),
cv::FONT_HERSHEY_SIMPLEX,
0.75,
cv::Scalar(0, 0, 0),
2
);
}
//std::cout << "Press any key to exit" << std::endl;
cv::imshow("Result of Detection", img);
cv::waitKey(0);
cv::destroyAllWindows();
int ReadCocoYaml(YOLO_V8*& p) {
}
}
}
int ReadCocoYaml(YOLO_V8*& p, const std::string& yamlPath = "coco.yaml") {
// Open the YAML file
std::ifstream file("coco.yaml");
std::ifstream file(yamlPath);
if (!file.is_open())
{
std::cerr << "Failed to open file" << std::endl;
@ -119,8 +183,25 @@ int ReadCocoYaml(YOLO_V8*& p) {
// Find the start and end of the names section
std::size_t start = 0;
std::size_t end = 0;
std::string kpt_num = "none";
int kpts = 0;
for (std::size_t i = 0; i < lines.size(); i++)
{
if (lines[i].find("kpt_shape:") != std::string::npos)
{
std::stringstream kpt_shape(lines[i]);
std::getline(kpt_shape, kpt_num, '[');
std::getline(kpt_shape, kpt_num, ',');
if (!kpt_num.empty()) {
try {
kpts = std::stoi(kpt_num);
std::cout << "kpt_num as integer: " << kpts << std::endl;
}
catch (const std::exception& e) {
std::cerr << "Error converting kpt_num to integer: " << e.what() << std::endl;
}
}
}
if (lines[i].find("names:") != std::string::npos)
{
start = i + 1;
@ -132,6 +213,7 @@ int ReadCocoYaml(YOLO_V8*& p) {
}
}
// Extract the names
std::vector<std::string> names;
for (std::size_t i = start; i < end; i++)
@ -142,8 +224,14 @@ int ReadCocoYaml(YOLO_V8*& p) {
std::getline(ss, name); // Extract the string after the delimiter
names.push_back(name);
}
if (kpts != 0) {
for (int i = 1; i <= kpts; i++) {
names.push_back(std::to_string(i));
}
}
p->classes = names;
p->kpts_num = kpts;
return 0;
}
@ -151,11 +239,11 @@ int ReadCocoYaml(YOLO_V8*& p) {
void DetectTest()
{
YOLO_V8* yoloDetector = new YOLO_V8;
ReadCocoYaml(yoloDetector);
ReadCocoYaml(yoloDetector, "./yaml/coco.yaml");
DL_INIT_PARAM params;
params.rectConfidenceThreshold = 0.1;
params.iouThreshold = 0.5;
params.modelPath = "yolov8n.onnx";
params.modelPath = "./models/yolov8n.onnx";
params.imgSize = { 640, 640 };
#ifdef USE_CUDA
params.cudaEnable = true;
@ -181,15 +269,46 @@ void ClsTest()
{
YOLO_V8* yoloDetector = new YOLO_V8;
std::string model_path = "cls.onnx";
ReadCocoYaml(yoloDetector);
ReadCocoYaml(yoloDetector, "./yaml/cls.yaml");
DL_INIT_PARAM params{ model_path, YOLO_CLS, {224, 224} };
yoloDetector->CreateSession(params);
Classifier(yoloDetector);
}
void PoseTest()
{
YOLO_V8* yoloDetector = new YOLO_V8;
ReadCocoYaml(yoloDetector, "./yaml/coco8-pose.yaml");
DL_INIT_PARAM params;
params.rectConfidenceThreshold = 0.25;
params.pointScoresThreshold = 0.5;
params.iouThreshold = 0.7;
params.modelPath = "./models/yolov8n-pose.onnx";
params.imgSize = { 640, 640 };
#ifdef USE_CUDA
params.cudaEnable = true;
// GPU FP32 inference
params.modelType = YOLO_POSE_V8;
// GPU FP16 inference
//Note: change fp16 onnx model
//params.modelType = YOLO_DETECT_V8_HALF;
#else
// CPU inference
params.modelType = YOLO_POSE_V8;
params.cudaEnable = false;
#endif
yoloDetector->CreateSession(params);
PoseEstimator(yoloDetector);
delete yoloDetector;
}
int main()
{
//DetectTest();
ClsTest();
//ClsTest();
PoseTest();
}