Fix YOLO26 OpenVINO export with int8=True and update tflite wrapper (#23185)

Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2026-04-21 14:07:18 +00:00 · 2026-01-15 00:22:43 +08:00 · 2026-01-15 00:22:43 +08:00 · 9fa6d9884b
commit 9fa6d9884b
parent d54577783f
7 changed files with 63 additions and 43 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -210,9 +210,7 @@ jobs:
        run: uv cache prune --ci

  SlowTests:
-    # TODO: Tests disabled to debug YOLO26 compatiblility
-    # if: (github.event_name == 'workflow_dispatch' && github.event.inputs.tests == 'true') || github.event_name == 'schedule'
-    if: false
+    if: (github.event_name == 'workflow_dispatch' && github.event.inputs.tests == 'true') || github.event_name == 'schedule'
    timeout-minutes: 360
    runs-on: ${{ matrix.os }}
    strategy:
@ -345,9 +343,7 @@ jobs:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

  RaspberryPi:
-    # TODO: Tests disabled to debug YOLO26 compatiblility
-    # if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
-    if: false
+    if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
    timeout-minutes: 120
    runs-on: raspberry-pi
    steps:
--- a/docs/en/reference/utils/export/tensorflow.md
+++ b/docs/en/reference/utils/export/tensorflow.md
@ -15,6 +15,14 @@ keywords: Ultralytics, TensorFlow, SavedModel, Protocol Buffer, TensorFlow Lite,

 <br><br><hr><br>

+## ::: ultralytics.utils.export.tensorflow._tf_decode_boxes
+
+<br><br><hr><br>
+
+## ::: ultralytics.utils.export.tensorflow._tf_kpts_decode
+
+<br><br><hr><br>
+
 ## ::: ultralytics.utils.export.tensorflow.onnx2saved_model

 <br><br><hr><br>
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -5,7 +5,7 @@ from unittest import mock

 import torch

-from tests import MODEL
+from tests import MODEL, SOURCE
 from ultralytics import YOLO
 from ultralytics.cfg import get_cfg
 from ultralytics.engine.exporter import Exporter
@ -24,7 +24,7 @@ def test_export():
    exporter.add_callback("on_export_start", test_func)
    assert test_func in exporter.callbacks["on_export_start"], "callback test failed"
    f = exporter(model=YOLO("yolo26n.yaml").model)
-    YOLO(f)(ASSETS)  # exported model inference
+    YOLO(f)(SOURCE)  # exported model inference


 def test_detect():
--- a/tests/test_exports.py
+++ b/tests/test_exports.py
@ -144,7 +144,9 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):


@pytest.mark.slow
-@pytest.mark.skipif(not checks.IS_PYTHON_MINIMUM_3_10, reason="TFLite export requires Python>=3.10")
+@pytest.mark.skipif(
+    not checks.IS_PYTHON_MINIMUM_3_10 or not TORCH_1_13, reason="TFLite export requires Python>=3.10 and torch>=1.13"
+)
@pytest.mark.skipif(
    not LINUX or IS_RASPBERRYPI,
    reason="Test disabled as TF suffers from install conflicts on Windows, macOS and Raspberry Pi",
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@ -463,6 +463,9 @@ class Exporter:
            )
        if tfjs and (ARM64 and LINUX):
            raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
+        if ncnn and hasattr(model.model[-1], "one2one_cv2"):
+            del model.model[-1].one2one_cv2  # Disable end2end branch for NCNN export as it does not support topk
+            LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
        # Recommend OpenVINO if export and Intel CPU
        if SETTINGS.get("openvino_msg"):
            if is_intel():
@ -791,7 +794,6 @@ class Exporter:
                        f".*{head_module_name}/.*/Sub*",
                        f".*{head_module_name}/.*/Mul*",
                        f".*{head_module_name}/.*/Div*",
-                        f".*{head_module_name}\\.dfl.*",
                    ],
                    types=["Sigmoid"],
                )
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -174,17 +174,7 @@ class Detect(nn.Module):
            self.anchors, self.strides = (a.transpose(0, 1) for a in make_anchors(x["feats"], self.stride, 0.5))
            self.shape = shape

-        boxes = x["boxes"]
-        if self.export and self.format in {"tflite", "edgetpu"}:
-            # Precompute normalization factor to increase numerical stability
-            # See https://github.com/ultralytics/ultralytics/issues/7371
-            grid_h = shape[2]
-            grid_w = shape[3]
-            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=boxes.device).reshape(1, 4, 1)
-            norm = self.strides / (self.stride[0] * grid_size)
-            dbox = self.decode_bboxes(self.dfl(boxes) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
-        else:
-            dbox = self.decode_bboxes(self.dfl(boxes), self.anchors.unsqueeze(0)) * self.strides
+        dbox = self.decode_bboxes(self.dfl(x["boxes"]), self.anchors.unsqueeze(0)) * self.strides
        return dbox

    def bias_init(self):
@ -636,14 +626,7 @@ class Pose(Detect):
        bs = kpts.shape[0]
        if self.export:
            y = kpts.view(bs, *self.kpt_shape, -1)
-            if self.format in {"tflite", "edgetpu"}:
-                # Precompute normalization factor to increase numerical stability
-                grid_h, grid_w = self.shape[2], self.shape[3]
-                grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
-                norm = self.strides / (self.stride[0] * grid_size)
-                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
-            else:
-                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
+            a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
            if ndim == 3:
                a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
            return a.view(bs, self.nk, -1)
@ -758,20 +741,9 @@ class Pose26(Pose):
        ndim = self.kpt_shape[1]
        bs = kpts.shape[0]
        if self.export:
-            if self.format in {
-                "tflite",
-                "edgetpu",
-            }:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
-                # Precompute normalization factor to increase numerical stability
-                y = kpts.view(bs, *self.kpt_shape, -1)
-                grid_h, grid_w = self.shape[2], self.shape[3]
-                grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
-                norm = self.strides / (self.stride[0] * grid_size)
-                a = (y[:, :, :2] + self.anchors) * norm
-            else:
-                # NCNN fix
-                y = kpts.view(bs, *self.kpt_shape, -1)
-                a = (y[:, :, :2] + self.anchors) * self.strides
+            y = kpts.view(bs, *self.kpt_shape, -1)
+            # NCNN fix
+            a = (y[:, :, :2] + self.anchors) * self.strides
            if ndim == 3:
                a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
            return a.view(bs, self.nk, -1)
--- a/ultralytics/utils/export/tensorflow.py
+++ b/ultralytics/utils/export/tensorflow.py
@ -2,21 +2,61 @@

 from __future__ import annotations

+from functools import partial
 from pathlib import Path

 import numpy as np
 import torch

+from ultralytics.nn.modules import Detect, Pose, Pose26
 from ultralytics.utils import LOGGER
 from ultralytics.utils.downloads import attempt_download_asset
 from ultralytics.utils.files import spaces_in_path
+from ultralytics.utils.tal import make_anchors


 def tf_wrapper(model: torch.nn.Module) -> torch.nn.Module:
    """A wrapper for TensorFlow export compatibility (TF-specific handling is now in head modules)."""
+    for m in model.modules():
+        if not isinstance(m, Detect):
+            continue
+        import types
+
+        m._get_decode_boxes = types.MethodType(_tf_decode_boxes, m)
+        if isinstance(m, Pose):
+            m.kpts_decode = types.MethodType(partial(_tf_kpts_decode, is_pose26=type(m) is Pose26), m)
    return model


+def _tf_decode_boxes(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
+    """Decode bounding boxes for TensorFlow export."""
+    shape = x["feats"][0].shape  # BCHW
+    boxes = x["boxes"]
+    if self.format != "imx" and (self.dynamic or self.shape != shape):
+        self.anchors, self.strides = (a.transpose(0, 1) for a in make_anchors(x["feats"], self.stride, 0.5))
+        self.shape = shape
+    grid_h, grid_w = shape[2:4]
+    grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=boxes.device).reshape(1, 4, 1)
+    norm = self.strides / (self.stride[0] * grid_size)
+    dbox = self.decode_bboxes(self.dfl(boxes) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+    return dbox
+
+
+def _tf_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) -> torch.Tensor:
+    """Decode keypoints for TensorFlow export."""
+    ndim = self.kpt_shape[1]
+    bs = kpts.shape[0]
+    # Precompute normalization factor to increase numerical stability
+    y = kpts.view(bs, *self.kpt_shape, -1)
+    grid_h, grid_w = self.shape[2:4]
+    grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
+    norm = self.strides / (self.stride[0] * grid_size)
+    a = ((y[:, :, :2] + self.anchors) if is_pose26 else (y[:, :, :2] * 2.0 + (self.anchors - 0.5))) * norm
+    if ndim == 3:
+        a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
+    return a.view(bs, self.nk, -1)
+
+
 def onnx2saved_model(
    onnx_file: str,
    output_dir: Path,