mirror of
https://github.com/ultralytics/ultralytics
synced 2026-04-21 14:07:18 +00:00
Add YOLOv8 pose estimation support to ONNXRuntime C++ example (#23786)
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
7bd3b2ce19
commit
1ab234ed14
3 changed files with 244 additions and 20 deletions
|
|
@ -59,7 +59,7 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
|
|||
switch (modelType)
|
||||
{
|
||||
case YOLO_DETECT_V8:
|
||||
case YOLO_POSE:
|
||||
case YOLO_POSE_V8:
|
||||
case YOLO_DETECT_V8_HALF:
|
||||
case YOLO_POSE_V8_HALF://LetterBox
|
||||
{
|
||||
|
|
@ -82,7 +82,7 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
|
|||
int m = min(h, w);
|
||||
int top = (h - m) / 2;
|
||||
int left = (w - m) / 2;
|
||||
cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(1), iImgSize.at(0)));
|
||||
cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -90,8 +90,8 @@ char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oIm
|
|||
}
|
||||
|
||||
|
||||
char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
|
||||
char* Ret = RET_OK;
|
||||
const char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
|
||||
const char* Ret = RET_OK;
|
||||
std::regex pattern("[\u4e00-\u9fa5]");
|
||||
bool result = std::regex_search(iParams.modelPath, pattern);
|
||||
if (result)
|
||||
|
|
@ -103,6 +103,7 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
|
|||
try
|
||||
{
|
||||
rectConfidenceThreshold = iParams.rectConfidenceThreshold;
|
||||
pointScoresThreshold = iParams.pointScoresThreshold;//change
|
||||
iouThreshold = iParams.iouThreshold;
|
||||
imgSize = iParams.imgSize;
|
||||
modelType = iParams.modelType;
|
||||
|
|
@ -136,15 +137,15 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
|
|||
{
|
||||
Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
|
||||
char* temp_buf = new char[50];
|
||||
strcpy(temp_buf, input_node_name.get());
|
||||
strcpy_s(temp_buf, 50, input_node_name.get());
|
||||
inputNodeNames.push_back(temp_buf);
|
||||
}
|
||||
size_t OutputNodesNum = session->GetOutputCount();
|
||||
for (size_t i = 0; i < OutputNodesNum; i++)
|
||||
{
|
||||
Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
|
||||
char* temp_buf = new char[10];
|
||||
strcpy(temp_buf, output_node_name.get());
|
||||
char* temp_buf = new char[50];
|
||||
strcpy_s(temp_buf, 50, output_node_name.get());
|
||||
outputNodeNames.push_back(temp_buf);
|
||||
}
|
||||
options = Ort::RunOptions{ nullptr };
|
||||
|
|
@ -157,10 +158,11 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
|
|||
const char* str2 = e.what();
|
||||
std::string result = std::string(str1) + std::string(str2);
|
||||
char* merged = new char[result.length() + 1];
|
||||
std::strcpy(merged, result.c_str());
|
||||
strcpy_s(merged, result.length() + 1, result.c_str());
|
||||
std::cout << merged << std::endl;
|
||||
delete[] merged;
|
||||
return "[YOLO_V8]:Create session failed.";
|
||||
Ret = "[YOLO_V8]:Create session failed.";
|
||||
return Ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -322,6 +324,104 @@ char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::
|
|||
}
|
||||
break;
|
||||
}
|
||||
case YOLO_POSE_V8://change
|
||||
case YOLO_POSE_V8_HALF:
|
||||
{
|
||||
int signalResultNum = outputNodeDims[1];//56: "4+1+3*17"
|
||||
int strideNum = outputNodeDims[2];//8400
|
||||
//std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
std::vector<cv::Point2f> keyPoints;
|
||||
std::vector<float> keyPointsScores;
|
||||
cv::Mat rawData;
|
||||
if (modelType == YOLO_POSE_V8)
|
||||
{
|
||||
// FP32
|
||||
rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output);
|
||||
}
|
||||
else
|
||||
{
|
||||
// FP16
|
||||
rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output);
|
||||
rawData.convertTo(rawData, CV_32F);
|
||||
}
|
||||
// Note:
|
||||
// ultralytics add transpose operator to the output of yolov8 model.which make yolov8/v5/v7 has same shape
|
||||
// https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8n-pose.pt
|
||||
rawData = rawData.t();
|
||||
|
||||
float* pdata = (float*)rawData.data;
|
||||
for (int i = 0; i < strideNum; ++i)
|
||||
{
|
||||
float* classesScores = pdata + 4;
|
||||
|
||||
if (pdata[4] > rectConfidenceThreshold)
|
||||
{
|
||||
cv::Mat kpts = cv::Mat(kpts_num, 3, CV_32FC1, pdata + 5);//keypoints & scores
|
||||
confidences.push_back(pdata[4]);
|
||||
float x = pdata[0];
|
||||
float y = pdata[1];
|
||||
float w = pdata[2];
|
||||
float h = pdata[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * resizeScales);
|
||||
int top = int((y - 0.5 * h) * resizeScales);
|
||||
|
||||
int width = int(w * resizeScales);
|
||||
int height = int(h * resizeScales);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
for (int kp = 0; kp < kpts.rows; ++kp) {
|
||||
float x = kpts.at<float>(kp, 0) * resizeScales; // x
|
||||
float y = kpts.at<float>(kp, 1) * resizeScales; // y
|
||||
float score = kpts.at<float>(kp, 2); // score
|
||||
keyPoints.push_back(cv::Point2f(x, y));
|
||||
keyPointsScores.push_back(kpts.at<float>(kp, 2));
|
||||
}
|
||||
}
|
||||
pdata += signalResultNum;
|
||||
}
|
||||
std::vector<int> nmsResult;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
|
||||
|
||||
for (int i = 0; i < nmsResult.size(); i++)
|
||||
{
|
||||
int idx = nmsResult[i];
|
||||
DL_RESULT result;
|
||||
result.classId = 0;
|
||||
result.confidence = confidences[idx];
|
||||
result.box = boxes[idx];
|
||||
result.keyPoints.clear();
|
||||
result.keyPointsScore.clear();
|
||||
for (int j = 0; j < this->classes.size() - 1; j++)
|
||||
{
|
||||
if (keyPointsScores[idx * (this->classes.size() - 1) + j] > pointScoresThreshold)
|
||||
{
|
||||
result.keyPoints.push_back(keyPoints[idx * (this->classes.size() - 1) + j]);
|
||||
result.keyPointsScore.push_back(keyPointsScores[idx * (this->classes.size() - 1) + j]);
|
||||
}
|
||||
}
|
||||
oResult.push_back(result);
|
||||
}
|
||||
|
||||
#ifdef benchmark
|
||||
clock_t starttime_4 = clock();
|
||||
double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
|
||||
double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
|
||||
double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
|
||||
if (cudaEnable)
|
||||
{
|
||||
std::cout << "[YOLO_V8(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "[YOLO_V8(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
|
||||
}
|
||||
#endif // benchmark
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "[YOLO_V8]: " << "Not support model type." << std::endl;
|
||||
}
|
||||
|
|
@ -332,7 +432,7 @@ char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::
|
|||
|
||||
char* YOLO_V8::WarmUpSession() {
|
||||
clock_t starttime_1 = clock();
|
||||
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(1), imgSize.at(0)), CV_8UC3);
|
||||
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
|
||||
cv::Mat processedImg;
|
||||
PreProcess(iImg, imgSize, processedImg);
|
||||
if (modelType < 4)
|
||||
|
|
@ -371,4 +471,4 @@ char* YOLO_V8::WarmUpSession() {
|
|||
#endif
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
#pragma once
|
||||
|
||||
#define RET_OK nullptr
|
||||
//#define USE_CUDA
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
|
|
@ -25,7 +26,7 @@ enum MODEL_TYPE
|
|||
{
|
||||
//FLOAT32 MODEL
|
||||
YOLO_DETECT_V8 = 1,
|
||||
YOLO_POSE = 2,
|
||||
YOLO_POSE_V8 = 2,
|
||||
YOLO_CLS = 3,
|
||||
|
||||
//FLOAT16 MODEL
|
||||
|
|
@ -41,6 +42,7 @@ typedef struct _DL_INIT_PARAM
|
|||
MODEL_TYPE modelType = YOLO_DETECT_V8;
|
||||
std::vector<int> imgSize = { 640, 640 };
|
||||
float rectConfidenceThreshold = 0.6;
|
||||
float pointScoresThreshold = 0.6;
|
||||
float iouThreshold = 0.5;
|
||||
int keyPointsNum = 2;//Note:kpt number for pose
|
||||
bool cudaEnable = false;
|
||||
|
|
@ -54,6 +56,7 @@ typedef struct _DL_RESULT
|
|||
int classId;
|
||||
float confidence;
|
||||
cv::Rect box;
|
||||
std::vector<float> keyPointsScore;
|
||||
std::vector<cv::Point2f> keyPoints;
|
||||
} DL_RESULT;
|
||||
|
||||
|
|
@ -66,7 +69,7 @@ public:
|
|||
~YOLO_V8();
|
||||
|
||||
public:
|
||||
char* CreateSession(DL_INIT_PARAM& iParams);
|
||||
const char* CreateSession(DL_INIT_PARAM& iParams);
|
||||
|
||||
char* RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult);
|
||||
|
||||
|
|
@ -79,6 +82,7 @@ public:
|
|||
char* PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg);
|
||||
|
||||
std::vector<std::string> classes{};
|
||||
int kpts_num;
|
||||
|
||||
private:
|
||||
Ort::Env env;
|
||||
|
|
@ -91,6 +95,7 @@ private:
|
|||
MODEL_TYPE modelType;
|
||||
std::vector<int> imgSize;
|
||||
float rectConfidenceThreshold;
|
||||
float pointScoresThreshold;
|
||||
float iouThreshold;
|
||||
float resizeScales;//letterbox scale
|
||||
};
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
void Detector(YOLO_V8*& p) {
|
||||
std::filesystem::path current_path = std::filesystem::current_path();
|
||||
std::filesystem::path imgs_path = current_path / "images";
|
||||
std::filesystem::path imgs_path = current_path / "images/detect/";
|
||||
for (auto& i : std::filesystem::directory_iterator(imgs_path))
|
||||
{
|
||||
if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
|
||||
|
|
@ -97,11 +97,75 @@ void Classifier(YOLO_V8*& p)
|
|||
}
|
||||
}
|
||||
|
||||
void PoseEstimator(YOLO_V8*& p)
|
||||
{
|
||||
std::filesystem::path current_path = std::filesystem::current_path();
|
||||
std::cout << "current_path: " << current_path << std::endl;
|
||||
std::filesystem::path imgs_path = current_path / "images/pose/";
|
||||
|
||||
for (auto& i : std::filesystem::directory_iterator(imgs_path))
|
||||
{
|
||||
if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".bmp")
|
||||
{
|
||||
std::string img_path = i.path().string();
|
||||
cv::Mat img = cv::imread(img_path);
|
||||
std::vector<DL_RESULT> res;
|
||||
p->RunSession(img, res);
|
||||
if (res.empty())
|
||||
{
|
||||
std::cout << "No pose detected in image: " << img_path << std::endl;
|
||||
//continue;
|
||||
}
|
||||
for (auto& re : res)
|
||||
{
|
||||
cv::RNG rng(cv::getTickCount());
|
||||
cv::Scalar color_box(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));
|
||||
cv::Scalar color_point(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));
|
||||
|
||||
cv::rectangle(img, re.box, color_box, 2);
|
||||
|
||||
float confidence = floor(100 * re.confidence) / 100;
|
||||
std::cout << std::fixed << std::setprecision(2);
|
||||
std::string label_box = p->classes[re.classId] + " " +
|
||||
std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4);
|
||||
|
||||
for (int i = 0; i < re.keyPoints.size(); i++)
|
||||
{
|
||||
cv::circle(img, re.keyPoints[i], 5, color_point, -1);
|
||||
std::string label_point = p->classes[i + 1];
|
||||
cv::putText(img, label_point, re.keyPoints[i], cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0), 2);
|
||||
}
|
||||
cv::rectangle(
|
||||
img,
|
||||
cv::Point(re.box.x, re.box.y - 25),
|
||||
cv::Point(re.box.x + label_box.length() * 15, re.box.y),
|
||||
color_box,
|
||||
cv::FILLED
|
||||
);
|
||||
|
||||
cv::putText(
|
||||
img,
|
||||
label_box,
|
||||
cv::Point(re.box.x, re.box.y - 5),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
0.75,
|
||||
cv::Scalar(0, 0, 0),
|
||||
2
|
||||
);
|
||||
}
|
||||
//std::cout << "Press any key to exit" << std::endl;
|
||||
cv::imshow("Result of Detection", img);
|
||||
cv::waitKey(0);
|
||||
cv::destroyAllWindows();
|
||||
|
||||
|
||||
int ReadCocoYaml(YOLO_V8*& p) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ReadCocoYaml(YOLO_V8*& p, const std::string& yamlPath = "coco.yaml") {
|
||||
// Open the YAML file
|
||||
std::ifstream file("coco.yaml");
|
||||
std::ifstream file(yamlPath);
|
||||
if (!file.is_open())
|
||||
{
|
||||
std::cerr << "Failed to open file" << std::endl;
|
||||
|
|
@ -119,8 +183,25 @@ int ReadCocoYaml(YOLO_V8*& p) {
|
|||
// Find the start and end of the names section
|
||||
std::size_t start = 0;
|
||||
std::size_t end = 0;
|
||||
std::string kpt_num = "none";
|
||||
int kpts = 0;
|
||||
for (std::size_t i = 0; i < lines.size(); i++)
|
||||
{
|
||||
if (lines[i].find("kpt_shape:") != std::string::npos)
|
||||
{
|
||||
std::stringstream kpt_shape(lines[i]);
|
||||
std::getline(kpt_shape, kpt_num, '[');
|
||||
std::getline(kpt_shape, kpt_num, ',');
|
||||
if (!kpt_num.empty()) {
|
||||
try {
|
||||
kpts = std::stoi(kpt_num);
|
||||
std::cout << "kpt_num as integer: " << kpts << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "Error converting kpt_num to integer: " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lines[i].find("names:") != std::string::npos)
|
||||
{
|
||||
start = i + 1;
|
||||
|
|
@ -132,6 +213,7 @@ int ReadCocoYaml(YOLO_V8*& p) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Extract the names
|
||||
std::vector<std::string> names;
|
||||
for (std::size_t i = start; i < end; i++)
|
||||
|
|
@ -142,8 +224,14 @@ int ReadCocoYaml(YOLO_V8*& p) {
|
|||
std::getline(ss, name); // Extract the string after the delimiter
|
||||
names.push_back(name);
|
||||
}
|
||||
if (kpts != 0) {
|
||||
for (int i = 1; i <= kpts; i++) {
|
||||
names.push_back(std::to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
p->classes = names;
|
||||
p->kpts_num = kpts;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -151,11 +239,11 @@ int ReadCocoYaml(YOLO_V8*& p) {
|
|||
void DetectTest()
|
||||
{
|
||||
YOLO_V8* yoloDetector = new YOLO_V8;
|
||||
ReadCocoYaml(yoloDetector);
|
||||
ReadCocoYaml(yoloDetector, "./yaml/coco.yaml");
|
||||
DL_INIT_PARAM params;
|
||||
params.rectConfidenceThreshold = 0.1;
|
||||
params.iouThreshold = 0.5;
|
||||
params.modelPath = "yolov8n.onnx";
|
||||
params.modelPath = "./models/yolov8n.onnx";
|
||||
params.imgSize = { 640, 640 };
|
||||
#ifdef USE_CUDA
|
||||
params.cudaEnable = true;
|
||||
|
|
@ -181,15 +269,46 @@ void ClsTest()
|
|||
{
|
||||
YOLO_V8* yoloDetector = new YOLO_V8;
|
||||
std::string model_path = "cls.onnx";
|
||||
ReadCocoYaml(yoloDetector);
|
||||
ReadCocoYaml(yoloDetector, "./yaml/cls.yaml");
|
||||
DL_INIT_PARAM params{ model_path, YOLO_CLS, {224, 224} };
|
||||
yoloDetector->CreateSession(params);
|
||||
Classifier(yoloDetector);
|
||||
}
|
||||
|
||||
void PoseTest()
|
||||
{
|
||||
YOLO_V8* yoloDetector = new YOLO_V8;
|
||||
ReadCocoYaml(yoloDetector, "./yaml/coco8-pose.yaml");
|
||||
DL_INIT_PARAM params;
|
||||
params.rectConfidenceThreshold = 0.25;
|
||||
params.pointScoresThreshold = 0.5;
|
||||
params.iouThreshold = 0.7;
|
||||
params.modelPath = "./models/yolov8n-pose.onnx";
|
||||
params.imgSize = { 640, 640 };
|
||||
#ifdef USE_CUDA
|
||||
params.cudaEnable = true;
|
||||
|
||||
// GPU FP32 inference
|
||||
params.modelType = YOLO_POSE_V8;
|
||||
// GPU FP16 inference
|
||||
//Note: change fp16 onnx model
|
||||
//params.modelType = YOLO_DETECT_V8_HALF;
|
||||
|
||||
#else
|
||||
// CPU inference
|
||||
params.modelType = YOLO_POSE_V8;
|
||||
params.cudaEnable = false;
|
||||
|
||||
#endif
|
||||
yoloDetector->CreateSession(params);
|
||||
PoseEstimator(yoloDetector);
|
||||
|
||||
delete yoloDetector;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
//DetectTest();
|
||||
ClsTest();
|
||||
//ClsTest();
|
||||
PoseTest();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue