Add YOLO11 Triton CPP Example (#20553)

Signed-off-by: Enes Uzun <42443500+uzunenes@users.noreply.github.com> Signed-off-by: Ahmet Selim Demirel <56585669+asdemirel@users.noreply.github.com> Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Enes Uzun <42443500+uzunenes@users.noreply.github.com> Co-authored-by: Doğan Mehmet Başoğlu <doganb00@gmail.com> Co-authored-by: mevlutardic <mardic1@ford.com.tr> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: mevlutardic <mevlutardic.1@hotmail.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2026-05-24 09:38:39 +00:00 · 2025-05-11 01:09:20 +03:00 · 2025-05-11 01:09:20 +03:00 · 1bccbacc13
commit 1bccbacc13
parent e52675371b
5 changed files with 627 additions and 0 deletions
--- a/examples/YOLO11-Triton-CPP/CMakeLists.txt
+++ b/examples/YOLO11-Triton-CPP/CMakeLists.txt
@ -0,0 +1,60 @@
+cmake_minimum_required(VERSION 3.5)
+
+project(YOLO11TritonCPP VERSION 0.1)
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Require external Triton client directory as a parameter
+if(NOT DEFINED TRITON_CLIENT_DIR)
+    message(FATAL_ERROR "Please specify -DTRITON_CLIENT_DIR=/path/to/tritonclient")
+endif()
+
+# Triton-related paths
+set(Protobuf_DIR "${TRITON_CLIENT_DIR}/protobuf/lib/cmake/protobuf")
+set(gRPC_DIR "${TRITON_CLIENT_DIR}/grpc")
+set(c-ares_DIR "${TRITON_CLIENT_DIR}/c-ares/lib/cmake/c-ares")
+set(TritonClient_DIR "${TRITON_CLIENT_DIR}/lib/cmake/TritonClient")
+set(TritonCommon_DIR "${TRITON_CLIENT_DIR}/lib/cmake/TritonCommon")
+
+# Compiler optimizations
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c -mavx2 -O3 -ffast-math -march=native")
+
+# OpenCV setup
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+# Find Triton packages
+find_package(TritonCommon REQUIRED)
+find_package(TritonClient REQUIRED)
+
+# Project source files
+set(PROJECT_SOURCES
+    main.cpp
+    inference.cpp
+    inference.hpp
+)
+
+# Define executable target
+add_executable(${PROJECT_NAME} ${PROJECT_SOURCES})
+
+# Include directories
+target_include_directories(${PROJECT_NAME}
+    PRIVATE
+    ${TRITON_CLIENT_DIR}/include
+    ${OpenCV_INCLUDE_DIRS}
+)
+
+# Link directories
+target_link_directories(${PROJECT_NAME}
+    PRIVATE
+    ${TRITON_CLIENT_DIR}/lib
+)
+
+# Link libraries
+target_link_libraries(${PROJECT_NAME}
+    PRIVATE
+    ${OpenCV_LIBS}
+    grpcclient
+)
--- a/examples/YOLO11-Triton-CPP/README.md
+++ b/examples/YOLO11-Triton-CPP/README.md
@ -0,0 +1,110 @@
+# YOLO11 Triton Inference Server C++ Client
+
+[![Ultralytics](https://img.shields.io/badge/Ultralytics-YOLO11-orange)](https://github.com/ultralytics/ultralytics)
+[![Triton](https://img.shields.io/badge/NVIDIA-Triton-green)](https://github.com/triton-inference-server/server)
+
+This example demonstrates how to perform object detection using Ultralytics YOLO11 models deployed on the NVIDIA Triton Inference Server. The implementation highlights efficient image preprocessing, FP16 (half-precision) data conversion, seamless communication with the Triton server via gRPC, and visualization of detection results with bounding boxes and confidence scores.
+
+## ⚡ Features
+
+- **High-Performance Inference**: Utilizes FP16 (half-precision) data format for optimized memory usage and accelerated inference.
+- **Non-Maximum Suppression (NMS)**: Removes duplicate detections to ensure precise object detection results.
+- **Seamless Triton Integration**: Communicates with the NVIDIA Triton Inference Server via gRPC for efficient and scalable model serving.
+- **Detection Visualization**: Annotates images with bounding boxes, class labels, and confidence scores for intuitive result interpretation.
+
+## 🛠️ Dependencies
+
+Ensure you have the following dependencies installed before proceeding:
+
+| Dependency              | Version | Description                                   |
+| ----------------------- | ------- | --------------------------------------------- |
+| Triton Inference Server | 22.06   | Running with a deployed FP16 YOLO11 model     |
+| Triton Client libraries | 2.23    | Required for communication with Triton Server |
+| C++ compiler            | C++ 17+ | For compiling the C++ client application      |
+| OpenCV library          | 3.4.15  | For image processing and visualization        |
+| CMake                   | 3.5+    | For building the project                      |
+
+For more information on Triton, see the [NVIDIA Triton Inference Server documentation](https://github.com/triton-inference-server/server) and explore [model deployment options with Ultralytics](https://docs.ultralytics.com/guides/model-deployment-options/).
+
+## 🏗️ Building the Project
+
+1. **Install the Triton Client libraries:**
+
+   ```bash
+   wget https://github.com/triton-inference-server/server/releases/download/v2.23.0/v2.23.0_ubuntu2004.clients.tar.gz
+   mkdir tritonclient
+   tar -xvf v2.23.0_ubuntu2004.clients.tar.gz -C tritonclient
+   rm -rf v2.23.0_ubuntu2004.clients.tar.gz
+   ```
+
+2. **Clone the Ultralytics repository:**
+
+   ```bash
+   git clone https://github.com/ultralytics/ultralytics.git
+   cd ultralytics/examples/YOLO11-Triton-CPP
+   ```
+
+3. **Configure and build the project using CMake:**
+
+   ```bash
+   mkdir build
+   cd build
+   cmake .. -DTRITON_CLIENT_DIR=/path/to/tritonclient
+   make
+   ```
+
+For additional guidance on integrating Ultralytics YOLO models with various platforms, check out the [Ultralytics integrations documentation](https://docs.ultralytics.com/integrations/).
+
+## 🚀 Usage
+
+1. **Deploy your FP16 (half-precision) YOLO11 model on a Triton Inference Server.**  
+   Learn more about deploying models with [Ultralytics YOLO](https://docs.ultralytics.com/models/yolo11/).
+
+2. **Run the YOLO11-Triton-CPP application:**
+
+   ```bash
+   ./YOLO11TritonCPP
+   ```
+
+By default, the application will:
+
+- Connect to the Triton server at `localhost:8001`
+- Use the model named `yolov11` with version `1`
+- Process the image file `test.jpg`
+- Save detection results to `output.jpg`
+
+For more on object detection workflows, see [Ultralytics object detection tasks](https://docs.ultralytics.com/tasks/detect/).
+
+## ⚙️ Configuration
+
+You can modify the following parameters in [main.cpp](main.cpp):
+
+```cpp
+std::string triton_address = "localhost:8001";
+std::string model_name = "yolov11";
+std::string model_version = "1";
+std::string image_path = "test.jpg";
+std::string output_path = "output.jpg";
+std::vector<std::string> object_class_list = {"class1", "class2"};
+```
+
+To learn more about configuring and customizing YOLO models, visit the [Ultralytics configuration guide](https://docs.ultralytics.com/usage/cfg/).
+
+## 🌟 Contributors
+
+Contributions are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the [main Ultralytics repository](https://github.com/ultralytics/ultralytics).
+
+- Ahmet Selim Demirel
+- Doğan Mehmet Başoğlu
+- Enes Uzun
+- Elif Cansu Ada
+- Mevlüt Ardıç
+- Serhat Karaca
+
+[![Ultralytics open-source contributors](https://raw.githubusercontent.com/ultralytics/assets/main/im/image-contributors.png)](https://github.com/ultralytics/ultralytics/graphs/contributors)
+
+---
+
+For more resources, explore the [Ultralytics documentation](https://docs.ultralytics.com/), [Ultralytics blog](https://www.ultralytics.com/blog), and [Ultralytics HUB](https://docs.ultralytics.com/hub/).
+
+**We encourage your contributions to make this project even better! 🚀**
--- a/examples/YOLO11-Triton-CPP/inference.cpp
+++ b/examples/YOLO11-Triton-CPP/inference.cpp
@ -0,0 +1,304 @@
+#include "inference.hpp"
+#include <immintrin.h>
+#include <iostream>
+#include <cstdint>
+#include <string>
+#include <vector>
+#include <cmath>
+
+#define IOU_THRESHOLD 0.45
+
+uint16_t float32_to_float16(float value) {
+    __m128 input = _mm_set_ss(value);
+    return _mm_cvtsi128_si32(_mm_cvtps_ph(input, 0));
+}
+
+float float16_to_float32(uint16_t half) {
+    uint16_t sign = (half & 0x8000) >> 15;   
+    uint16_t exponent = (half & 0x7C00) >> 10; 
+    uint16_t mantissa = (half & 0x03FF);    
+
+    if (exponent == 0) {
+        if (mantissa == 0) {
+            return sign ? -0.0f : 0.0f; 
+        }
+        return std::ldexp(mantissa / 1024.0f, -14) * (sign ? -1.0f : 1.0f);
+    } 
+    else if (exponent == 31) {
+        return mantissa ? NAN : (sign ? -INFINITY : INFINITY);
+    }
+
+    float real_value = std::ldexp(1.0f + mantissa / 1024.0f, exponent - 15);
+    return sign ? -real_value : real_value;
+}
+
+
+void Image::preprocess(cv::Mat* img, std::vector<uint16_t>& triton_data, int input_w, int input_h) 
+{
+    int w, h, x, y;
+    float r_w = input_w / (img->cols*1.0);
+    float r_h = input_h / (img->rows*1.0);
+    if (r_h > r_w) {
+        w = input_w;
+        h = r_w * img->rows;
+        x = 0;
+        y = (input_h - h) / 2;
+    } else {
+        w = r_h * img->cols;
+        h = input_h;
+        x = (input_w - w) / 2;
+        y = 0;
+    }
+    cv::Mat re(h, w, CV_8UC3);
+    cv::resize(*img, re, re.size(), 0, 0, cv::INTER_LINEAR);
+    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(114, 114, 114));
+    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
+    cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
+    unsigned char* data = (unsigned char*)out.data;
+    int step = out.step;
+
+    for (int yy = 0; yy < input_h; ++yy)
+    {
+        for (int kk = 0; kk < 3; ++kk)
+        {   
+            for (int xx = 0; xx < input_w; ++xx)
+            {
+                float temp_f = data[yy * step + xx * 3 + kk] / 255.0f;
+
+                triton_data[kk * input_w * input_h + yy * input_w + xx] = float32_to_float16(temp_f);
+
+            }
+        }
+    }
+}
+
+int getDetectionsFromTritonRawData(std::vector<float>& detection_results, std::vector<struct detection_struct> &detections, std::vector<std::string>& object_class_list, float confidence_threshold, int image_width, int image_height)
+{
+    const size_t shape[3] = {1, object_class_list.size()+4, 8400};
+	std::vector<BoundingBox> boxes;
+    for (size_t i = 0; i < shape[2]; i++) 
+    { 
+
+		int x = int(detection_results[0 * shape[2] + i]);
+		int y = int(detection_results[1 * shape[2] + i]);
+		int w = int(detection_results[2 * shape[2] + i]);
+		int h = int(detection_results[3 * shape[2] + i]);
+		
+		for(size_t j =0 ; j < object_class_list.size(); j++)
+		{
+			if (detection_results[(4+j) * shape[2] + i] > 0.01)
+			{
+				BoundingBox box;
+				box.x = static_cast<float>(x);
+				box.y = static_cast<float>(y);
+				box.w = static_cast<float>(w);
+				box.h = static_cast<float>(h);
+				box.score = detection_results[(4 + j) * shape[2] + i];
+				box.class_id = j; 
+				boxes.push_back(box);
+			}
+
+		}
+   
+    }
+	auto nms_boxes = NMS(boxes, IOU_THRESHOLD);
+	detections.clear();
+	if(nms_boxes.size()==0)
+	{
+		return 0;
+	}
+	float scale_x = 0.0;
+	float scale_y = 0.0;
+
+	int x1,y1 = 0;
+	int x2,y2 = 0;
+
+	float shift_factor_x = 0.6;
+    float shift_factor_y = 0.5;  
+
+	int offset_shift = (image_width/640.0f)*10;
+
+	if (image_width<=640)
+	{
+		scale_x = static_cast<float>(image_width - 640.0f ) * 0.5 ;  
+		scale_y = static_cast<float>(image_height - 640.0f) * 0.5 ;
+	}
+	for (size_t i = 0; i < nms_boxes.size(); ++i)
+	{
+		if (nms_boxes[i].score< confidence_threshold)
+		{
+			continue;
+		}
+		struct detection_struct tespit_yapi ;
+		detections.push_back(tespit_yapi);
+		detections[detections.size() - 1].confidence_score = nms_boxes[i].score;
+		if (image_width==640)
+		{
+			scale_x = static_cast<float>(image_width - 640.0f ) * 0.5 ;  
+			scale_y = static_cast<float>(image_height - 640.0f) * 0.5 ;
+			x1 = static_cast<int>((nms_boxes[i].x - nms_boxes[i].w/2) + scale_x);
+			y1 = static_cast<int>((nms_boxes[i].y - nms_boxes[i].h/2) + scale_y) ;
+			x2 = static_cast<int>((nms_boxes[i].x + nms_boxes[i].w/2) + scale_x);
+			y2 = static_cast<int>((nms_boxes[i].y + nms_boxes[i].h/2) + scale_y);
+		}
+		else if(image_width>=1080)
+		{
+			x1 = static_cast<int>((nms_boxes[i].x - nms_boxes[i].w/2) * (image_width/640) );
+			y1 = static_cast<int>((nms_boxes[i].y - nms_boxes[i].h/2) * (image_width/640) - ((image_width - image_height) / 2.0)) ;
+			x2 = static_cast<int>((nms_boxes[i].x + nms_boxes[i].w/2) * (image_width/640) );
+			y2 = static_cast<int>((nms_boxes[i].y + nms_boxes[i].h/2) * (image_width/640)- ((image_width - image_height) / 2.0));
+		}
+
+		float x_center, y_center, width, height;
+		x_center = (x1 + x2) / 2.0f;
+		y_center = (y1 + y2) / 2.0f;
+		width = x2 - x1;
+		height = y2 - y1;
+		detections[detections.size() - 1].bbox.x = x_center - width/2 ;
+		detections[detections.size() - 1].bbox.y = y_center - height/2;
+		detections[detections.size() - 1].bbox.width  = width ;
+		detections[detections.size() - 1].bbox.height =  height;  
+		if (detections[detections.size() - 1].bbox.x <= 0)
+			detections[detections.size() - 1].bbox.x = offset_shift;
+		if (detections[detections.size() - 1].bbox.y <= 0)
+			detections[detections.size() - 1].bbox.y = offset_shift;
+		if (detections[detections.size() - 1].bbox.x + detections[detections.size() - 1].bbox.width  >= image_width)
+		{
+			detections[detections.size() - 1].bbox.width  -= detections[detections.size() - 1].bbox.x + detections[detections.size() - 1].bbox.width  - image_width + offset_shift ;
+		}
+		if (detections[detections.size() - 1].bbox.y + detections[detections.size() - 1].bbox.height >= image_height)
+		{	
+			detections[detections.size() - 1].bbox.height -= detections[detections.size() - 1].bbox.y + detections[detections.size() - 1].bbox.height - image_height + offset_shift ;
+		}
+		detections[detections.size() - 1].name = object_class_list[nms_boxes[i].class_id];
+		detections[detections.size() - 1].class_id = nms_boxes[i].class_id;
+	}
+	return 0;
+}
+
+
+float IoU(const BoundingBox& box1, const BoundingBox& box2) {
+    float x1_min = box1.x - box1.w / 2.0f;
+    float y1_min = box1.y - box1.h / 2.0f;
+    float x1_max = box1.x + box1.w / 2.0f;
+    float y1_max = box1.y + box1.h / 2.0f;
+
+    float x2_min = box2.x - box2.w / 2.0f;
+    float y2_min = box2.y - box2.h / 2.0f;
+    float x2_max = box2.x + box2.w / 2.0f;
+    float y2_max = box2.y + box2.h / 2.0f;
+
+    float inter_x_min = std::max(x1_min, x2_min);
+    float inter_y_min = std::max(y1_min, y2_min);
+    float inter_x_max = std::min(x1_max, x2_max);
+    float inter_y_max = std::min(y1_max, y2_max);
+
+    float inter_width = inter_x_max - inter_x_min;
+    float inter_height = inter_y_max - inter_y_min;
+
+    if (inter_width <= 0 || inter_height <= 0)
+        return 0.0f;
+
+    float inter_area = inter_width * inter_height;
+    float area1 = (x1_max - x1_min) * (y1_max - y1_min);
+    float area2 = (x2_max - x2_min) * (y2_max - y2_min);
+    float union_area = area1 + area2 - inter_area;
+
+    return inter_area / union_area;
+}
+
+std::vector<BoundingBox> NMS(const std::vector<BoundingBox>& boxes, float iou_threshold) {
+
+    std::vector<BoundingBox> result;
+
+    std::vector<BoundingBox> sorted_boxes = boxes;
+    std::sort(sorted_boxes.begin(), sorted_boxes.end(), [](const BoundingBox& a, const BoundingBox& b) {
+        return a.score > b.score;
+    });
+    
+    std::vector<bool> suppressed(sorted_boxes.size(), false);
+
+    for (size_t i = 0; i < sorted_boxes.size(); ++i) {
+        if (suppressed[i])
+            continue;
+
+        result.push_back(sorted_boxes[i]);
+
+        for (size_t j = i + 1; j < sorted_boxes.size(); ++j) {
+            if (suppressed[j])
+                continue;
+            if (IoU(sorted_boxes[i], sorted_boxes[j]) > iou_threshold) {
+                suppressed[j] = true;
+            }
+        }
+    }
+    return result;
+}
+
+TritonCommunication::TritonCommunication(std::string triton_address, std::string model_name, std::string model_version, int image_channel, int image_width, int image_height, int class_count) : options(model_name)
+{
+    triton::client::Error err;
+    this->triton_url = triton_address;
+    
+    this->options.model_version_ = model_version;
+
+    this->shape = {1, image_channel, image_width, image_height}; 
+    this->input_byte_size = image_channel * image_width * image_height * sizeof(uint16_t) ;
+    this->output_byte_size = (class_count + 4) * 8400 * sizeof(uint16_t);
+
+    err = tc::InferenceServerGrpcClient::Create(&(this->client), this->triton_url);
+    if (!err.IsOk()) {
+        std::cout<< "Create grpc client error:"<<err.Message()<<std::endl;
+    }
+    bool live;
+    err = client->IsServerReady(&live);
+    if (!err.IsOk() || !live) {
+        std::cout<< "Triton server is not live !"<<std::endl;
+        exit(-1);
+    }
+
+    bool model_ready;
+    err = client->IsModelReady(&model_ready,model_name,model_version);
+    if (!err.IsOk() || !model_ready) {
+        std::cerr << "Model:[" << model_name << "] has not been deployed on Triton Server. Triton Server Address:["<<triton_address <<"]"<<std::endl;
+        exit(-1);
+    }
+
+    std::cout<<"Triton server is LIVE and model is READY!"<<std::endl;
+}
+
+void TritonCommunication::infer(uint16_t* image_data)
+{
+    size_t num_elements = output_byte_size / sizeof(uint16_t);
+    tc::Error err;         
+    tc::InferInput* input0;
+    
+    err = tc::InferInput::Create(&input0, "images", shape, "FP16"); // FP16 is the data type of the input tensor.
+    std::shared_ptr<tc::InferInput> input0_ptr;
+    input0_ptr.reset(input0);
+
+    err = input0_ptr->AppendRaw((const uint8_t*)image_data, this->input_byte_size);
+
+    std::vector<tc::InferInput*> inputs = {input0_ptr.get()};
+
+    tc::InferResult* results;
+    err = client->Infer(&results, options, inputs);
+    results_ptr.reset(results);
+
+    float *output0_data;
+    size_t output0_byte_size;
+    std::vector<uint16_t> result_fp16_raw_data;
+
+    results->RawData("output0", (const uint8_t**)&output0_data, &output0_byte_size); // output0 is a specific name for the output tensor.
+    result_fp16_raw_data.resize(output0_byte_size/sizeof(uint16_t));
+
+    std::memcpy(result_fp16_raw_data.data(), output0_data, output0_byte_size);
+    std::vector<float> float32_data(num_elements);
+
+    for (size_t i = 0; i < num_elements; i++) {
+        float32_data[i] = float16_to_float32(result_fp16_raw_data[i]);   
+    }
+
+    output_raw_data = float32_data;
+
+}
--- a/examples/YOLO11-Triton-CPP/inference.hpp
+++ b/examples/YOLO11-Triton-CPP/inference.hpp
@ -0,0 +1,79 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include "grpc_client.h"
+
+class Image 
+{
+public:
+    Image() = default;
+    static void preprocess(cv::Mat* image, std::vector<uint16_t>& triton_data, int input_w, int input_h);
+};
+
+struct struct_yolo_output
+{
+    std::vector<int> num_dets, det_classes;
+    std::vector<float> det_boxes, det_scores;
+};
+
+struct BoundingBox {
+    float x, y, w, h;
+    float score;
+    int class_id;
+};
+
+struct detection_struct
+{
+    cv::Rect bbox;
+    int class_id;
+    std::string name;
+    double confidence_score;
+};
+
+// C-compatible declarations
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int getDetectionsFromTritonRawData(
+    std::vector<float>& detection_results,
+    std::vector<struct detection_struct>& tespitler,
+    std::vector<std::string>& object_class_list,
+    float confidence_threshold,
+    int image_width,
+    int image_height
+);
+
+std::vector<BoundingBox> NMS(const std::vector<BoundingBox>& boxes, float iou_threshold);
+float IoU(const BoundingBox& box1, const BoundingBox& box2);
+
+#ifdef __cplusplus
+}
+#endif
+
+namespace tc = triton::client;
+
+class TritonCommunication
+{
+private:
+    std::unique_ptr<tc::InferenceServerGrpcClient> client;
+    std::string triton_url;
+    std::vector<int64_t> shape;
+    tc::InferOptions options;
+    size_t input_byte_size;
+    size_t output_byte_size;
+    std::shared_ptr<tc::InferResult> results_ptr;
+
+public:
+    std::vector<float> output_raw_data;
+
+    TritonCommunication(std::string triton_address,
+                        std::string model_name,
+                        std::string model_version,
+                        int image_channel,
+                        int image_width,
+                        int image_height,
+                        int class_count);
+
+    void infer(uint16_t* triton_data);
+};
--- a/examples/YOLO11-Triton-CPP/main.cpp
+++ b/examples/YOLO11-Triton-CPP/main.cpp
@ -0,0 +1,74 @@
+#include "inference.hpp"
+#include <iostream>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <cstdint>
+#include <chrono>
+
+#define MODEL_INPUT_IMAGE_WIDTH 640 
+#define MODEL_INPUT_IMAGE_HEIGHT 640
+#define NETWORK_THRESHOLD 0.50
+#define IMAGE_CHANNEL 3
+
+double get_time_since_epoch_millis()
+{
+    using namespace std::chrono;
+    auto now = system_clock::now();
+    auto duration = now.time_since_epoch();
+    return duration_cast<microseconds>(duration).count() / 1000.0;
+}
+
+int main(int argc, char *argv[])
+{
+    std::string triton_address= "localhost:8001"; 
+    std::string model_name= "yolo11"; 
+    std::string model_version= "1";
+	std::string image_path = "test.jpg";
+	std::string output_path = "output.jpg";
+	std::vector<std::string> object_class_list = {"class1", "class2"};
+
+    std::vector<uint16_t> triton_request_data;
+    triton_request_data.resize(IMAGE_CHANNEL*MODEL_INPUT_IMAGE_WIDTH*MODEL_INPUT_IMAGE_HEIGHT);
+    std::vector<struct detection_struct> detections;
+
+    std::shared_ptr<TritonCommunication> triton_communication = std::make_shared<TritonCommunication>(triton_address, model_name, model_version, IMAGE_CHANNEL, MODEL_INPUT_IMAGE_WIDTH, MODEL_INPUT_IMAGE_HEIGHT,object_class_list.size());
+
+    cv::Mat frame = cv::imread(image_path);
+    if (frame.empty())
+    {
+        std::cerr << "Image couldn't read: " << image_path << std::endl;
+        return -1;
+    }
+
+    int image_width = frame.cols;
+    int image_height = frame.rows;
+
+    double preprocess_time = get_time_since_epoch_millis();
+	Image::preprocess(&frame, triton_request_data, MODEL_INPUT_IMAGE_WIDTH, MODEL_INPUT_IMAGE_HEIGHT);
+    std::cout << "Preprocess time : " << (get_time_since_epoch_millis() - preprocess_time)<< " millisecond."<< std::endl;
+
+    double infer_time = get_time_since_epoch_millis();
+    triton_communication->infer(triton_request_data.data());
+    std::cout << "Triton Server execute time : " << (get_time_since_epoch_millis() - infer_time) << " millisecond." << std::endl;
+
+    getDetectionsFromTritonRawData(triton_communication->output_raw_data, detections, object_class_list, NETWORK_THRESHOLD, image_width, image_height);
+
+    for (int i = 0; i < detections.size(); i++)
+    {
+        std::ostringstream oss;
+        oss << detections[i].name << " "
+        << std::fixed << std::setprecision(2)
+        << detections[i].confidence_score;
+
+        cv::rectangle(frame, detections[i].bbox, cv::Scalar(255, 0, 0), 2);
+        cv::putText(frame, oss.str(), cv::Point((detections[i].bbox.x), (detections[i].bbox.y - 5)), cv::FONT_HERSHEY_DUPLEX, ((frame.cols / 640.0f) * 0.35), cv::Scalar(0, 0, 0), (int)(frame.cols / 640.0f) + 1);
+        cv::putText(frame, oss.str(), cv::Point((detections[i].bbox.x), (detections[i].bbox.y - 5)), cv::FONT_HERSHEY_DUPLEX, ((frame.cols / 640.0f) * 0.35), cv::Scalar(0xFF, 0xFF, 0xFF), (int)(frame.cols / 640.0f));
+    }
+
+    cv::imwrite(output_path, frame);
+    std::cout << "Result image saved!"<< std::endl;
+
+    return 0;
+}