Fix YOLO26 OpenVINO export with int8=True and update tflite wrapper (#23185)

Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Jing Qiu 2026-01-15 00:22:43 +08:00 committed by GitHub
parent d54577783f
commit 9fa6d9884b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 63 additions and 43 deletions

View file

@ -210,9 +210,7 @@ jobs:
run: uv cache prune --ci
SlowTests:
# TODO: Tests disabled to debug YOLO26 compatiblility
# if: (github.event_name == 'workflow_dispatch' && github.event.inputs.tests == 'true') || github.event_name == 'schedule'
if: false
if: (github.event_name == 'workflow_dispatch' && github.event.inputs.tests == 'true') || github.event_name == 'schedule'
timeout-minutes: 360
runs-on: ${{ matrix.os }}
strategy:
@ -345,9 +343,7 @@ jobs:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
RaspberryPi:
# TODO: Tests disabled to debug YOLO26 compatiblility
# if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
if: false
if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
timeout-minutes: 120
runs-on: raspberry-pi
steps:

View file

@ -15,6 +15,14 @@ keywords: Ultralytics, TensorFlow, SavedModel, Protocol Buffer, TensorFlow Lite,
<br><br><hr><br>
## ::: ultralytics.utils.export.tensorflow._tf_decode_boxes
<br><br><hr><br>
## ::: ultralytics.utils.export.tensorflow._tf_kpts_decode
<br><br><hr><br>
## ::: ultralytics.utils.export.tensorflow.onnx2saved_model
<br><br><hr><br>

View file

@ -5,7 +5,7 @@ from unittest import mock
import torch
from tests import MODEL
from tests import MODEL, SOURCE
from ultralytics import YOLO
from ultralytics.cfg import get_cfg
from ultralytics.engine.exporter import Exporter
@ -24,7 +24,7 @@ def test_export():
exporter.add_callback("on_export_start", test_func)
assert test_func in exporter.callbacks["on_export_start"], "callback test failed"
f = exporter(model=YOLO("yolo26n.yaml").model)
YOLO(f)(ASSETS) # exported model inference
YOLO(f)(SOURCE) # exported model inference
def test_detect():

View file

@ -144,7 +144,9 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
@pytest.mark.slow
@pytest.mark.skipif(not checks.IS_PYTHON_MINIMUM_3_10, reason="TFLite export requires Python>=3.10")
@pytest.mark.skipif(
not checks.IS_PYTHON_MINIMUM_3_10 or not TORCH_1_13, reason="TFLite export requires Python>=3.10 and torch>=1.13"
)
@pytest.mark.skipif(
not LINUX or IS_RASPBERRYPI,
reason="Test disabled as TF suffers from install conflicts on Windows, macOS and Raspberry Pi",

View file

@ -463,6 +463,9 @@ class Exporter:
)
if tfjs and (ARM64 and LINUX):
raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
if ncnn and hasattr(model.model[-1], "one2one_cv2"):
del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
# Recommend OpenVINO if export and Intel CPU
if SETTINGS.get("openvino_msg"):
if is_intel():
@ -791,7 +794,6 @@ class Exporter:
f".*{head_module_name}/.*/Sub*",
f".*{head_module_name}/.*/Mul*",
f".*{head_module_name}/.*/Div*",
f".*{head_module_name}\\.dfl.*",
],
types=["Sigmoid"],
)

View file

@ -174,17 +174,7 @@ class Detect(nn.Module):
self.anchors, self.strides = (a.transpose(0, 1) for a in make_anchors(x["feats"], self.stride, 0.5))
self.shape = shape
boxes = x["boxes"]
if self.export and self.format in {"tflite", "edgetpu"}:
# Precompute normalization factor to increase numerical stability
# See https://github.com/ultralytics/ultralytics/issues/7371
grid_h = shape[2]
grid_w = shape[3]
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=boxes.device).reshape(1, 4, 1)
norm = self.strides / (self.stride[0] * grid_size)
dbox = self.decode_bboxes(self.dfl(boxes) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
else:
dbox = self.decode_bboxes(self.dfl(boxes), self.anchors.unsqueeze(0)) * self.strides
dbox = self.decode_bboxes(self.dfl(x["boxes"]), self.anchors.unsqueeze(0)) * self.strides
return dbox
def bias_init(self):
@ -636,14 +626,7 @@ class Pose(Detect):
bs = kpts.shape[0]
if self.export:
y = kpts.view(bs, *self.kpt_shape, -1)
if self.format in {"tflite", "edgetpu"}:
# Precompute normalization factor to increase numerical stability
grid_h, grid_w = self.shape[2], self.shape[3]
grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
norm = self.strides / (self.stride[0] * grid_size)
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
else:
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
if ndim == 3:
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
return a.view(bs, self.nk, -1)
@ -758,20 +741,9 @@ class Pose26(Pose):
ndim = self.kpt_shape[1]
bs = kpts.shape[0]
if self.export:
if self.format in {
"tflite",
"edgetpu",
}: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
# Precompute normalization factor to increase numerical stability
y = kpts.view(bs, *self.kpt_shape, -1)
grid_h, grid_w = self.shape[2], self.shape[3]
grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
norm = self.strides / (self.stride[0] * grid_size)
a = (y[:, :, :2] + self.anchors) * norm
else:
# NCNN fix
y = kpts.view(bs, *self.kpt_shape, -1)
a = (y[:, :, :2] + self.anchors) * self.strides
y = kpts.view(bs, *self.kpt_shape, -1)
# NCNN fix
a = (y[:, :, :2] + self.anchors) * self.strides
if ndim == 3:
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
return a.view(bs, self.nk, -1)

View file

@ -2,21 +2,61 @@
from __future__ import annotations
from functools import partial
from pathlib import Path
import numpy as np
import torch
from ultralytics.nn.modules import Detect, Pose, Pose26
from ultralytics.utils import LOGGER
from ultralytics.utils.downloads import attempt_download_asset
from ultralytics.utils.files import spaces_in_path
from ultralytics.utils.tal import make_anchors
def tf_wrapper(model: torch.nn.Module) -> torch.nn.Module:
"""A wrapper for TensorFlow export compatibility (TF-specific handling is now in head modules)."""
for m in model.modules():
if not isinstance(m, Detect):
continue
import types
m._get_decode_boxes = types.MethodType(_tf_decode_boxes, m)
if isinstance(m, Pose):
m.kpts_decode = types.MethodType(partial(_tf_kpts_decode, is_pose26=type(m) is Pose26), m)
return model
def _tf_decode_boxes(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
"""Decode bounding boxes for TensorFlow export."""
shape = x["feats"][0].shape # BCHW
boxes = x["boxes"]
if self.format != "imx" and (self.dynamic or self.shape != shape):
self.anchors, self.strides = (a.transpose(0, 1) for a in make_anchors(x["feats"], self.stride, 0.5))
self.shape = shape
grid_h, grid_w = shape[2:4]
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=boxes.device).reshape(1, 4, 1)
norm = self.strides / (self.stride[0] * grid_size)
dbox = self.decode_bboxes(self.dfl(boxes) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
return dbox
def _tf_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) -> torch.Tensor:
"""Decode keypoints for TensorFlow export."""
ndim = self.kpt_shape[1]
bs = kpts.shape[0]
# Precompute normalization factor to increase numerical stability
y = kpts.view(bs, *self.kpt_shape, -1)
grid_h, grid_w = self.shape[2:4]
grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
norm = self.strides / (self.stride[0] * grid_size)
a = ((y[:, :, :2] + self.anchors) if is_pose26 else (y[:, :, :2] * 2.0 + (self.anchors - 0.5))) * norm
if ndim == 3:
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
return a.view(bs, self.nk, -1)
def onnx2saved_model(
onnx_file: str,
output_dir: Path,