diff --git a/examples/YOLO11-Triton-CPP/CMakeLists.txt b/examples/YOLO11-Triton-CPP/CMakeLists.txt new file mode 100644 index 0000000000..38865cd539 --- /dev/null +++ b/examples/YOLO11-Triton-CPP/CMakeLists.txt @@ -0,0 +1,60 @@ +cmake_minimum_required(VERSION 3.5) + +project(YOLO11TritonCPP VERSION 0.1) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Require external Triton client directory as a parameter +if(NOT DEFINED TRITON_CLIENT_DIR) + message(FATAL_ERROR "Please specify -DTRITON_CLIENT_DIR=/path/to/tritonclient") +endif() + +# Triton-related paths +set(Protobuf_DIR "${TRITON_CLIENT_DIR}/protobuf/lib/cmake/protobuf") +set(gRPC_DIR "${TRITON_CLIENT_DIR}/grpc") +set(c-ares_DIR "${TRITON_CLIENT_DIR}/c-ares/lib/cmake/c-ares") +set(TritonClient_DIR "${TRITON_CLIENT_DIR}/lib/cmake/TritonClient") +set(TritonCommon_DIR "${TRITON_CLIENT_DIR}/lib/cmake/TritonCommon") + +# Compiler optimizations +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c -mavx2 -O3 -ffast-math -march=native") + +# OpenCV setup +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) + +# Find Triton packages +find_package(TritonCommon REQUIRED) +find_package(TritonClient REQUIRED) + +# Project source files +set(PROJECT_SOURCES + main.cpp + inference.cpp + inference.hpp +) + +# Define executable target +add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) + +# Include directories +target_include_directories(${PROJECT_NAME} + PRIVATE + ${TRITON_CLIENT_DIR}/include + ${OpenCV_INCLUDE_DIRS} +) + +# Link directories +target_link_directories(${PROJECT_NAME} + PRIVATE + ${TRITON_CLIENT_DIR}/lib +) + +# Link libraries +target_link_libraries(${PROJECT_NAME} + PRIVATE + ${OpenCV_LIBS} + grpcclient +) diff --git a/examples/YOLO11-Triton-CPP/README.md b/examples/YOLO11-Triton-CPP/README.md new file mode 100644 index 0000000000..3328188ff4 --- /dev/null +++ b/examples/YOLO11-Triton-CPP/README.md @@ -0,0 +1,110 @@ +# YOLO11 Triton Inference Server C++ Client + +[![Ultralytics](https://img.shields.io/badge/Ultralytics-YOLO11-orange)](https://github.com/ultralytics/ultralytics) +[![Triton](https://img.shields.io/badge/NVIDIA-Triton-green)](https://github.com/triton-inference-server/server) + +This example demonstrates how to perform object detection using Ultralytics YOLO11 models deployed on the NVIDIA Triton Inference Server. The implementation highlights efficient image preprocessing, FP16 (half-precision) data conversion, seamless communication with the Triton server via gRPC, and visualization of detection results with bounding boxes and confidence scores. + +## ⚡ Features + +- **High-Performance Inference**: Utilizes FP16 (half-precision) data format for optimized memory usage and accelerated inference. +- **Non-Maximum Suppression (NMS)**: Removes duplicate detections to ensure precise object detection results. +- **Seamless Triton Integration**: Communicates with the NVIDIA Triton Inference Server via gRPC for efficient and scalable model serving. +- **Detection Visualization**: Annotates images with bounding boxes, class labels, and confidence scores for intuitive result interpretation. + +## 🛠️ Dependencies + +Ensure you have the following dependencies installed before proceeding: + +| Dependency | Version | Description | +| ----------------------- | ------- | --------------------------------------------- | +| Triton Inference Server | 22.06 | Running with a deployed FP16 YOLO11 model | +| Triton Client libraries | 2.23 | Required for communication with Triton Server | +| C++ compiler | C++ 17+ | For compiling the C++ client application | +| OpenCV library | 3.4.15 | For image processing and visualization | +| CMake | 3.5+ | For building the project | + +For more information on Triton, see the [NVIDIA Triton Inference Server documentation](https://github.com/triton-inference-server/server) and explore [model deployment options with Ultralytics](https://docs.ultralytics.com/guides/model-deployment-options/). + +## 🏗️ Building the Project + +1. **Install the Triton Client libraries:** + + ```bash + wget https://github.com/triton-inference-server/server/releases/download/v2.23.0/v2.23.0_ubuntu2004.clients.tar.gz + mkdir tritonclient + tar -xvf v2.23.0_ubuntu2004.clients.tar.gz -C tritonclient + rm -rf v2.23.0_ubuntu2004.clients.tar.gz + ``` + +2. **Clone the Ultralytics repository:** + + ```bash + git clone https://github.com/ultralytics/ultralytics.git + cd ultralytics/examples/YOLO11-Triton-CPP + ``` + +3. **Configure and build the project using CMake:** + + ```bash + mkdir build + cd build + cmake .. -DTRITON_CLIENT_DIR=/path/to/tritonclient + make + ``` + +For additional guidance on integrating Ultralytics YOLO models with various platforms, check out the [Ultralytics integrations documentation](https://docs.ultralytics.com/integrations/). + +## 🚀 Usage + +1. **Deploy your FP16 (half-precision) YOLO11 model on a Triton Inference Server.** + Learn more about deploying models with [Ultralytics YOLO](https://docs.ultralytics.com/models/yolo11/). + +2. **Run the YOLO11-Triton-CPP application:** + + ```bash + ./YOLO11TritonCPP + ``` + +By default, the application will: + +- Connect to the Triton server at `localhost:8001` +- Use the model named `yolov11` with version `1` +- Process the image file `test.jpg` +- Save detection results to `output.jpg` + +For more on object detection workflows, see [Ultralytics object detection tasks](https://docs.ultralytics.com/tasks/detect/). + +## ⚙️ Configuration + +You can modify the following parameters in [main.cpp](main.cpp): + +```cpp +std::string triton_address = "localhost:8001"; +std::string model_name = "yolov11"; +std::string model_version = "1"; +std::string image_path = "test.jpg"; +std::string output_path = "output.jpg"; +std::vector object_class_list = {"class1", "class2"}; +``` + +To learn more about configuring and customizing YOLO models, visit the [Ultralytics configuration guide](https://docs.ultralytics.com/usage/cfg/). + +## 🌟 Contributors + +Contributions are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the [main Ultralytics repository](https://github.com/ultralytics/ultralytics). + +- Ahmet Selim Demirel +- Doğan Mehmet Başoğlu +- Enes Uzun +- Elif Cansu Ada +- Mevlüt Ardıç +- Serhat Karaca + +[![Ultralytics open-source contributors](https://raw.githubusercontent.com/ultralytics/assets/main/im/image-contributors.png)](https://github.com/ultralytics/ultralytics/graphs/contributors) + +--- + +For more resources, explore the [Ultralytics documentation](https://docs.ultralytics.com/), [Ultralytics blog](https://www.ultralytics.com/blog), and [Ultralytics HUB](https://docs.ultralytics.com/hub/). + +**We encourage your contributions to make this project even better! 🚀** diff --git a/examples/YOLO11-Triton-CPP/inference.cpp b/examples/YOLO11-Triton-CPP/inference.cpp new file mode 100644 index 0000000000..3a96d4ca22 --- /dev/null +++ b/examples/YOLO11-Triton-CPP/inference.cpp @@ -0,0 +1,304 @@ +#include "inference.hpp" +#include +#include +#include +#include +#include +#include + +#define IOU_THRESHOLD 0.45 + +uint16_t float32_to_float16(float value) { + __m128 input = _mm_set_ss(value); + return _mm_cvtsi128_si32(_mm_cvtps_ph(input, 0)); +} + +float float16_to_float32(uint16_t half) { + uint16_t sign = (half & 0x8000) >> 15; + uint16_t exponent = (half & 0x7C00) >> 10; + uint16_t mantissa = (half & 0x03FF); + + if (exponent == 0) { + if (mantissa == 0) { + return sign ? -0.0f : 0.0f; + } + return std::ldexp(mantissa / 1024.0f, -14) * (sign ? -1.0f : 1.0f); + } + else if (exponent == 31) { + return mantissa ? NAN : (sign ? -INFINITY : INFINITY); + } + + float real_value = std::ldexp(1.0f + mantissa / 1024.0f, exponent - 15); + return sign ? -real_value : real_value; +} + + +void Image::preprocess(cv::Mat* img, std::vector& triton_data, int input_w, int input_h) +{ + int w, h, x, y; + float r_w = input_w / (img->cols*1.0); + float r_h = input_h / (img->rows*1.0); + if (r_h > r_w) { + w = input_w; + h = r_w * img->rows; + x = 0; + y = (input_h - h) / 2; + } else { + w = r_h * img->cols; + h = input_h; + x = (input_w - w) / 2; + y = 0; + } + cv::Mat re(h, w, CV_8UC3); + cv::resize(*img, re, re.size(), 0, 0, cv::INTER_LINEAR); + cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(114, 114, 114)); + re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); + cv::cvtColor(out, out, cv::COLOR_BGR2RGB); + unsigned char* data = (unsigned char*)out.data; + int step = out.step; + + for (int yy = 0; yy < input_h; ++yy) + { + for (int kk = 0; kk < 3; ++kk) + { + for (int xx = 0; xx < input_w; ++xx) + { + float temp_f = data[yy * step + xx * 3 + kk] / 255.0f; + + triton_data[kk * input_w * input_h + yy * input_w + xx] = float32_to_float16(temp_f); + + } + } + } +} + +int getDetectionsFromTritonRawData(std::vector& detection_results, std::vector &detections, std::vector& object_class_list, float confidence_threshold, int image_width, int image_height) +{ + const size_t shape[3] = {1, object_class_list.size()+4, 8400}; + std::vector boxes; + for (size_t i = 0; i < shape[2]; i++) + { + + int x = int(detection_results[0 * shape[2] + i]); + int y = int(detection_results[1 * shape[2] + i]); + int w = int(detection_results[2 * shape[2] + i]); + int h = int(detection_results[3 * shape[2] + i]); + + for(size_t j =0 ; j < object_class_list.size(); j++) + { + if (detection_results[(4+j) * shape[2] + i] > 0.01) + { + BoundingBox box; + box.x = static_cast(x); + box.y = static_cast(y); + box.w = static_cast(w); + box.h = static_cast(h); + box.score = detection_results[(4 + j) * shape[2] + i]; + box.class_id = j; + boxes.push_back(box); + } + + } + + } + auto nms_boxes = NMS(boxes, IOU_THRESHOLD); + detections.clear(); + if(nms_boxes.size()==0) + { + return 0; + } + float scale_x = 0.0; + float scale_y = 0.0; + + int x1,y1 = 0; + int x2,y2 = 0; + + float shift_factor_x = 0.6; + float shift_factor_y = 0.5; + + int offset_shift = (image_width/640.0f)*10; + + if (image_width<=640) + { + scale_x = static_cast(image_width - 640.0f ) * 0.5 ; + scale_y = static_cast(image_height - 640.0f) * 0.5 ; + } + for (size_t i = 0; i < nms_boxes.size(); ++i) + { + if (nms_boxes[i].score< confidence_threshold) + { + continue; + } + struct detection_struct tespit_yapi ; + detections.push_back(tespit_yapi); + detections[detections.size() - 1].confidence_score = nms_boxes[i].score; + if (image_width==640) + { + scale_x = static_cast(image_width - 640.0f ) * 0.5 ; + scale_y = static_cast(image_height - 640.0f) * 0.5 ; + x1 = static_cast((nms_boxes[i].x - nms_boxes[i].w/2) + scale_x); + y1 = static_cast((nms_boxes[i].y - nms_boxes[i].h/2) + scale_y) ; + x2 = static_cast((nms_boxes[i].x + nms_boxes[i].w/2) + scale_x); + y2 = static_cast((nms_boxes[i].y + nms_boxes[i].h/2) + scale_y); + } + else if(image_width>=1080) + { + x1 = static_cast((nms_boxes[i].x - nms_boxes[i].w/2) * (image_width/640) ); + y1 = static_cast((nms_boxes[i].y - nms_boxes[i].h/2) * (image_width/640) - ((image_width - image_height) / 2.0)) ; + x2 = static_cast((nms_boxes[i].x + nms_boxes[i].w/2) * (image_width/640) ); + y2 = static_cast((nms_boxes[i].y + nms_boxes[i].h/2) * (image_width/640)- ((image_width - image_height) / 2.0)); + } + + float x_center, y_center, width, height; + x_center = (x1 + x2) / 2.0f; + y_center = (y1 + y2) / 2.0f; + width = x2 - x1; + height = y2 - y1; + detections[detections.size() - 1].bbox.x = x_center - width/2 ; + detections[detections.size() - 1].bbox.y = y_center - height/2; + detections[detections.size() - 1].bbox.width = width ; + detections[detections.size() - 1].bbox.height = height; + if (detections[detections.size() - 1].bbox.x <= 0) + detections[detections.size() - 1].bbox.x = offset_shift; + if (detections[detections.size() - 1].bbox.y <= 0) + detections[detections.size() - 1].bbox.y = offset_shift; + if (detections[detections.size() - 1].bbox.x + detections[detections.size() - 1].bbox.width >= image_width) + { + detections[detections.size() - 1].bbox.width -= detections[detections.size() - 1].bbox.x + detections[detections.size() - 1].bbox.width - image_width + offset_shift ; + } + if (detections[detections.size() - 1].bbox.y + detections[detections.size() - 1].bbox.height >= image_height) + { + detections[detections.size() - 1].bbox.height -= detections[detections.size() - 1].bbox.y + detections[detections.size() - 1].bbox.height - image_height + offset_shift ; + } + detections[detections.size() - 1].name = object_class_list[nms_boxes[i].class_id]; + detections[detections.size() - 1].class_id = nms_boxes[i].class_id; + } + return 0; +} + + +float IoU(const BoundingBox& box1, const BoundingBox& box2) { + float x1_min = box1.x - box1.w / 2.0f; + float y1_min = box1.y - box1.h / 2.0f; + float x1_max = box1.x + box1.w / 2.0f; + float y1_max = box1.y + box1.h / 2.0f; + + float x2_min = box2.x - box2.w / 2.0f; + float y2_min = box2.y - box2.h / 2.0f; + float x2_max = box2.x + box2.w / 2.0f; + float y2_max = box2.y + box2.h / 2.0f; + + float inter_x_min = std::max(x1_min, x2_min); + float inter_y_min = std::max(y1_min, y2_min); + float inter_x_max = std::min(x1_max, x2_max); + float inter_y_max = std::min(y1_max, y2_max); + + float inter_width = inter_x_max - inter_x_min; + float inter_height = inter_y_max - inter_y_min; + + if (inter_width <= 0 || inter_height <= 0) + return 0.0f; + + float inter_area = inter_width * inter_height; + float area1 = (x1_max - x1_min) * (y1_max - y1_min); + float area2 = (x2_max - x2_min) * (y2_max - y2_min); + float union_area = area1 + area2 - inter_area; + + return inter_area / union_area; +} + +std::vector NMS(const std::vector& boxes, float iou_threshold) { + + std::vector result; + + std::vector sorted_boxes = boxes; + std::sort(sorted_boxes.begin(), sorted_boxes.end(), [](const BoundingBox& a, const BoundingBox& b) { + return a.score > b.score; + }); + + std::vector suppressed(sorted_boxes.size(), false); + + for (size_t i = 0; i < sorted_boxes.size(); ++i) { + if (suppressed[i]) + continue; + + result.push_back(sorted_boxes[i]); + + for (size_t j = i + 1; j < sorted_boxes.size(); ++j) { + if (suppressed[j]) + continue; + if (IoU(sorted_boxes[i], sorted_boxes[j]) > iou_threshold) { + suppressed[j] = true; + } + } + } + return result; +} + +TritonCommunication::TritonCommunication(std::string triton_address, std::string model_name, std::string model_version, int image_channel, int image_width, int image_height, int class_count) : options(model_name) +{ + triton::client::Error err; + this->triton_url = triton_address; + + this->options.model_version_ = model_version; + + this->shape = {1, image_channel, image_width, image_height}; + this->input_byte_size = image_channel * image_width * image_height * sizeof(uint16_t) ; + this->output_byte_size = (class_count + 4) * 8400 * sizeof(uint16_t); + + err = tc::InferenceServerGrpcClient::Create(&(this->client), this->triton_url); + if (!err.IsOk()) { + std::cout<< "Create grpc client error:"<IsServerReady(&live); + if (!err.IsOk() || !live) { + std::cout<< "Triton server is not live !"<IsModelReady(&model_ready,model_name,model_version); + if (!err.IsOk() || !model_ready) { + std::cerr << "Model:[" << model_name << "] has not been deployed on Triton Server. Triton Server Address:["< input0_ptr; + input0_ptr.reset(input0); + + err = input0_ptr->AppendRaw((const uint8_t*)image_data, this->input_byte_size); + + std::vector inputs = {input0_ptr.get()}; + + tc::InferResult* results; + err = client->Infer(&results, options, inputs); + results_ptr.reset(results); + + float *output0_data; + size_t output0_byte_size; + std::vector result_fp16_raw_data; + + results->RawData("output0", (const uint8_t**)&output0_data, &output0_byte_size); // output0 is a specific name for the output tensor. + result_fp16_raw_data.resize(output0_byte_size/sizeof(uint16_t)); + + std::memcpy(result_fp16_raw_data.data(), output0_data, output0_byte_size); + std::vector float32_data(num_elements); + + for (size_t i = 0; i < num_elements; i++) { + float32_data[i] = float16_to_float32(result_fp16_raw_data[i]); + } + + output_raw_data = float32_data; + +} diff --git a/examples/YOLO11-Triton-CPP/inference.hpp b/examples/YOLO11-Triton-CPP/inference.hpp new file mode 100644 index 0000000000..58159ebb2c --- /dev/null +++ b/examples/YOLO11-Triton-CPP/inference.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include +#include "grpc_client.h" + +class Image +{ +public: + Image() = default; + static void preprocess(cv::Mat* image, std::vector& triton_data, int input_w, int input_h); +}; + +struct struct_yolo_output +{ + std::vector num_dets, det_classes; + std::vector det_boxes, det_scores; +}; + +struct BoundingBox { + float x, y, w, h; + float score; + int class_id; +}; + +struct detection_struct +{ + cv::Rect bbox; + int class_id; + std::string name; + double confidence_score; +}; + +// C-compatible declarations +#ifdef __cplusplus +extern "C" { +#endif + +int getDetectionsFromTritonRawData( + std::vector& detection_results, + std::vector& tespitler, + std::vector& object_class_list, + float confidence_threshold, + int image_width, + int image_height +); + +std::vector NMS(const std::vector& boxes, float iou_threshold); +float IoU(const BoundingBox& box1, const BoundingBox& box2); + +#ifdef __cplusplus +} +#endif + +namespace tc = triton::client; + +class TritonCommunication +{ +private: + std::unique_ptr client; + std::string triton_url; + std::vector shape; + tc::InferOptions options; + size_t input_byte_size; + size_t output_byte_size; + std::shared_ptr results_ptr; + +public: + std::vector output_raw_data; + + TritonCommunication(std::string triton_address, + std::string model_name, + std::string model_version, + int image_channel, + int image_width, + int image_height, + int class_count); + + void infer(uint16_t* triton_data); +}; diff --git a/examples/YOLO11-Triton-CPP/main.cpp b/examples/YOLO11-Triton-CPP/main.cpp new file mode 100644 index 0000000000..8e34232ef0 --- /dev/null +++ b/examples/YOLO11-Triton-CPP/main.cpp @@ -0,0 +1,74 @@ +#include "inference.hpp" +#include +#include +#include +#include +#include +#include +#include + +#define MODEL_INPUT_IMAGE_WIDTH 640 +#define MODEL_INPUT_IMAGE_HEIGHT 640 +#define NETWORK_THRESHOLD 0.50 +#define IMAGE_CHANNEL 3 + +double get_time_since_epoch_millis() +{ + using namespace std::chrono; + auto now = system_clock::now(); + auto duration = now.time_since_epoch(); + return duration_cast(duration).count() / 1000.0; +} + +int main(int argc, char *argv[]) +{ + std::string triton_address= "localhost:8001"; + std::string model_name= "yolo11"; + std::string model_version= "1"; + std::string image_path = "test.jpg"; + std::string output_path = "output.jpg"; + std::vector object_class_list = {"class1", "class2"}; + + std::vector triton_request_data; + triton_request_data.resize(IMAGE_CHANNEL*MODEL_INPUT_IMAGE_WIDTH*MODEL_INPUT_IMAGE_HEIGHT); + std::vector detections; + + std::shared_ptr triton_communication = std::make_shared(triton_address, model_name, model_version, IMAGE_CHANNEL, MODEL_INPUT_IMAGE_WIDTH, MODEL_INPUT_IMAGE_HEIGHT,object_class_list.size()); + + cv::Mat frame = cv::imread(image_path); + if (frame.empty()) + { + std::cerr << "Image couldn't read: " << image_path << std::endl; + return -1; + } + + int image_width = frame.cols; + int image_height = frame.rows; + + double preprocess_time = get_time_since_epoch_millis(); + Image::preprocess(&frame, triton_request_data, MODEL_INPUT_IMAGE_WIDTH, MODEL_INPUT_IMAGE_HEIGHT); + std::cout << "Preprocess time : " << (get_time_since_epoch_millis() - preprocess_time)<< " millisecond."<< std::endl; + + double infer_time = get_time_since_epoch_millis(); + triton_communication->infer(triton_request_data.data()); + std::cout << "Triton Server execute time : " << (get_time_since_epoch_millis() - infer_time) << " millisecond." << std::endl; + + getDetectionsFromTritonRawData(triton_communication->output_raw_data, detections, object_class_list, NETWORK_THRESHOLD, image_width, image_height); + + for (int i = 0; i < detections.size(); i++) + { + std::ostringstream oss; + oss << detections[i].name << " " + << std::fixed << std::setprecision(2) + << detections[i].confidence_score; + + cv::rectangle(frame, detections[i].bbox, cv::Scalar(255, 0, 0), 2); + cv::putText(frame, oss.str(), cv::Point((detections[i].bbox.x), (detections[i].bbox.y - 5)), cv::FONT_HERSHEY_DUPLEX, ((frame.cols / 640.0f) * 0.35), cv::Scalar(0, 0, 0), (int)(frame.cols / 640.0f) + 1); + cv::putText(frame, oss.str(), cv::Point((detections[i].bbox.x), (detections[i].bbox.y - 5)), cv::FONT_HERSHEY_DUPLEX, ((frame.cols / 640.0f) * 0.35), cv::Scalar(0xFF, 0xFF, 0xFF), (int)(frame.cols / 640.0f)); + } + + cv::imwrite(output_path, frame); + std::cout << "Result image saved!"<< std::endl; + + return 0; +}