From bf44eb1bd1b5bd077e4c5ea23e599708b49ba88a Mon Sep 17 00:00:00 2001 From: Francesco Mattioli Date: Wed, 29 Oct 2025 05:45:40 +0100 Subject: [PATCH] Support IMX export and inference for classification (#21405) Signed-off-by: Lakshantha Dissanayake Co-authored-by: Lakshantha Dissanayake Co-authored-by: UltralyticsAssistant Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Laughing-q <1185102784@qq.com> --- docs/en/integrations/sony-imx500.md | 109 ++++++++++++++++++++++++++-- ultralytics/engine/exporter.py | 4 +- ultralytics/utils/benchmarks.py | 4 +- ultralytics/utils/export/imx.py | 67 ++++++++++------- 4 files changed, 145 insertions(+), 39 deletions(-) diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md index b5a06627af..4843532393 100644 --- a/docs/en/integrations/sony-imx500.md +++ b/docs/en/integrations/sony-imx500.md @@ -41,6 +41,7 @@ Currently, you can only export models that include the following tasks to IMX500 - [Object detection](https://docs.ultralytics.com/tasks/detect/) - [Pose estimation](https://docs.ultralytics.com/tasks/pose/) +- [Classification](https://docs.ultralytics.com/tasks/classify/) ## Usage Examples @@ -110,6 +111,36 @@ Export an Ultralytics YOLO11 model to IMX500 format and run inference with the e yolo predict model=yolo11n-pose_imx_model source='https://ultralytics.com/images/bus.jpg' ``` +!!! example "Classification" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLO11n-cls PyTorch model + model = YOLO("yolo11n-cls.pt") + + # Export the model + model.export(format="imx", data="imagenet10") # exports with PTQ quantization by default + + # Load the exported model + imx_model = YOLO("yolo11n-cls_imx_model") + + # Run inference + results = imx_model("https://ultralytics.com/images/bus.jpg", imgsz=224) + ``` + + === "CLI" + + ```bash + # Export a YOLO11n-cls PyTorch model to imx format with Post-Training Quantization (PTQ) + yolo export model=yolo11n-cls.pt format=imx data=imagenet10 + + # Run inference with the exported model + yolo predict model=yolo11n-cls_imx_model source='https://ultralytics.com/images/bus.jpg' imgsz=224 + ``` + !!! warning The Ultralytics package installs additional export dependencies at runtime. The first time you run the export command, you may need to restart your console to ensure it works correctly. @@ -159,6 +190,18 @@ The export process will create an ONNX model for quantization validation, along └── yolo11n-pose_imx.pbtxt ``` + === "Classification" + + ```bash + yolo11n-cls_imx_model + ├── dnnParams.xml + ├── labels.txt + ├── packerOut.zip + ├── yolo11n-cls_imx.onnx + ├── yolo11n-cls_imx_MemoryReport.json + └── yolo11n-cls_imx.pbtxt + ``` + ## Using IMX500 Export in Deployment After exporting Ultralytics YOLO11n model to IMX500 format, it can be deployed to Raspberry Pi AI Camera for inference. @@ -299,20 +342,70 @@ Step 5: Run YOLO11 object detection and pose estimation by using the below scrip frame.display() ``` + === "Classification" + + ```python + import cv2 + import numpy as np + from modlib.apps import Annotator + from modlib.devices import AiCamera + from modlib.models import COLOR_FORMAT, MODEL_TYPE, Model + from modlib.models.post_processors import pp_cls + + + class YOLOClassification(Model): + """YOLO classification model for IMX500 deployment.""" + + def __init__(self): + """Initialize the YOLO classification model for IMX500 deployment.""" + super().__init__( + model_file="yolo11n-cls_imx_model/packerOut.zip", # replace with proper directory + model_type=MODEL_TYPE.CONVERTED, + color_format=COLOR_FORMAT.RGB, + preserve_aspect_ratio=False, + ) + + self.labels = np.genfromtxt("yolo11n-cls_imx_model/labels.txt", dtype=str, delimiter="\n") + + def post_process(self, output_tensors): + """Post-process the output tensors for classification.""" + return pp_cls(output_tensors) + + + device = AiCamera() + model = YOLOClassification() + device.deploy(model) + + annotator = Annotator() + + with device as stream: + for frame in stream: + for i, label in enumerate([model.labels[id] for id in frame.detections.class_id[:3]]): + text = f"{i + 1}. {label}: {frame.detections.confidence[i]:.2f}" + cv2.putText(frame.image, text, (50, 30 + 40 * (i + 1)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 0, 100), 2) + + frame.display() + ``` + ## Benchmarks -YOLOv8n, YOLO11n, YOLOv8n-pose and YOLO11n-pose benchmarks below were run by the Ultralytics team on Raspberry Pi AI Camera with `imx` model format measuring speed and accuracy. +YOLOv8n, YOLO11n, YOLOv8n-pose, YOLO11n-pose, YOLOv8n-cls and YOLO11n-cls benchmarks below were run by the Ultralytics team on Raspberry Pi AI Camera with `imx` model format measuring speed and accuracy. -| Model | Format | Status | Size of `packerOut.zip` (MB) | mAP50-95(B) | Inference time (ms/im) | -| ------------ | ------ | ------ | ---------------------------- | ----------- | ---------------------- | -| YOLOv8n | imx | ✅ | 2.1 | 0.470 | 58.79 | -| YOLO11n | imx | ✅ | 2.2 | 0.517 | 58.82 | -| YOLOv8n-pose | imx | ✅ | 2.0 | 0.687 | 58.79 | -| YOLO11n-pose | imx | ✅ | 2.1 | 0.788 | 62.50 | +| Model | Format | Size (pixels) | Size of `packerOut.zip` (MB) | mAP50-95(B) | Inference time (ms/im) | +| ------------ | ------ | ------------- | ---------------------------- | ----------- | ---------------------- | +| YOLOv8n | imx | 640 | 2.1 | 0.470 | 58.79 | +| YOLO11n | imx | 640 | 2.2 | 0.517 | 58.82 | +| YOLOv8n-pose | imx | 640 | 2.0 | 0.687 | 58.79 | +| YOLO11n-pose | imx | 640 | 2.1 | 0.788 | 62.50 | + +| Model | Format | Size (pixels) | Size of `packerOut.zip` (MB) | acc (top1) | acc (top5) | Inference time (ms/im) | +| ----------- | ------ | ------------- | ---------------------------- | ---------- | ---------- | ---------------------- | +| YOLOv8n-cls | imx | 224 | 2.3 | 0.25 | 0.5 | 33.31 | +| YOLO11n-cls | imx | 224 | 2.3 | 0.25 | 0.417 | 33.31 | !!! note - Validation for the above benchmarks were done using COCO128 dataset for detection models and COCO8-Pose dataset for pose estimation models + Validation for the above benchmarks were done using COCO128 dataset for detection models, COCO8-Pose dataset for pose estimation models and ImageNet10 for classification models. ## What's Under the Hood? diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 983e04342b..21f5eaa457 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -367,10 +367,10 @@ class Exporter: if not self.args.int8: LOGGER.warning("IMX export requires int8=True, setting int8=True.") self.args.int8 = True - if not self.args.nms: + if not self.args.nms and model.task in {"detect", "pose"}: LOGGER.warning("IMX export requires nms=True, setting nms=True.") self.args.nms = True - if model.task not in {"detect", "pose"}: + if model.task not in {"detect", "pose", "classify"}: raise ValueError("IMX export only supported for detection and pose estimation models.") if not hasattr(model, "names"): model.names = default_class_names() diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index d433b81557..443bb91589 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -144,7 +144,9 @@ def benchmark( if format == "imx": assert not is_end2end assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported" - assert model.task == "detect", "IMX only supported for detection task" + assert model.task in {"detect", "classify", "pose"}, ( + "IMX export is only supported for detection, classification and pose estimation tasks" + ) assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n" if format == "rknn": assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet" diff --git a/ultralytics/utils/export/imx.py b/ultralytics/utils/export/imx.py index a72ea31acc..3241fe5066 100644 --- a/ultralytics/utils/export/imx.py +++ b/ultralytics/utils/export/imx.py @@ -6,6 +6,7 @@ import subprocess import types from pathlib import Path +import numpy as np import torch from ultralytics.nn.modules import Detect, Pose @@ -13,6 +14,32 @@ from ultralytics.utils import LOGGER from ultralytics.utils.tal import make_anchors from ultralytics.utils.torch_utils import copy_attr +# Configuration for Model Compression Toolkit (MCT) quantization +MCT_CONFIG = { + "YOLO11": { + "detect": { + "layer_names": ["sub", "mul_2", "add_14", "cat_21"], + "weights_memory": 2585350.2439, + "n_layers": 238, + }, + "pose": { + "layer_names": ["sub", "mul_2", "add_14", "cat_22", "cat_23", "mul_4", "add_15"], + "weights_memory": 2437771.67, + "n_layers": 257, + }, + "classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": 112}, + }, + "YOLOv8": { + "detect": {"layer_names": ["sub", "mul", "add_6", "cat_17"], "weights_memory": 2550540.8, "n_layers": 168}, + "pose": { + "layer_names": ["add_7", "mul_2", "cat_19", "mul", "sub", "add_6", "cat_18"], + "weights_memory": 2482451.85, + "n_layers": 187, + }, + "classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": 73}, + }, +} + class FXModel(torch.nn.Module): """ @@ -200,30 +227,13 @@ def torch2imx( tpc = get_target_platform_capabilities(tpc_version="4.0", device_type="imx500") bit_cfg = mct.core.BitWidthConfig() - if "C2PSA" in model.__str__(): # YOLO11 - if model.task == "detect": - layer_names = ["sub", "mul_2", "add_14", "cat_21"] - weights_memory = 2585350.2439 - n_layers = 238 # 238 layers for fused YOLO11n - elif model.task == "pose": - layer_names = ["sub", "mul_2", "add_14", "cat_22", "cat_23", "mul_4", "add_15"] - weights_memory = 2437771.67 - n_layers = 257 # 257 layers for fused YOLO11n-pose - else: # YOLOv8 - if model.task == "detect": - layer_names = ["sub", "mul", "add_6", "cat_17"] - weights_memory = 2550540.8 - n_layers = 168 # 168 layers for fused YOLOv8n - elif model.task == "pose": - layer_names = ["add_7", "mul_2", "cat_19", "mul", "sub", "add_6", "cat_18"] - weights_memory = 2482451.85 - n_layers = 187 # 187 layers for fused YOLO11n-pose + mct_config = MCT_CONFIG["YOLO11" if "C2PSA" in model.__str__() else "YOLOv8"][model.task] # Check if the model has the expected number of layers - if len(list(model.modules())) != n_layers: + if len(list(model.modules())) != mct_config["n_layers"]: raise ValueError("IMX export only supported for YOLOv8n and YOLO11n models.") - for layer_name in layer_names: + for layer_name in mct_config["layer_names"]: bit_cfg.set_manual_activation_bit_width([mct.core.common.network_editors.NodeNameFilter(layer_name)], 16) config = mct.core.CoreConfig( @@ -232,7 +242,7 @@ def torch2imx( bit_width_config=bit_cfg, ) - resource_utilization = mct.core.ResourceUtilization(weights_memory=weights_memory) + resource_utilization = mct.core.ResourceUtilization(weights_memory=mct_config["weights_memory"]) quant_model = ( mct.gptq.pytorch_gradient_post_training_quantization( # Perform Gradient-Based Post Training Quantization @@ -255,13 +265,14 @@ def torch2imx( )[0] ) - quant_model = NMSWrapper( - model=quant_model, - score_threshold=conf or 0.001, - iou_threshold=iou, - max_detections=max_det, - task=model.task, - ) + if model.task != "classify": + quant_model = NMSWrapper( + model=quant_model, + score_threshold=conf or 0.001, + iou_threshold=iou, + max_detections=max_det, + task=model.task, + ) f = Path(str(file).replace(file.suffix, "_imx_model")) f.mkdir(exist_ok=True)