From 6aa70cd983a3c2f85ee45f167525a1a16a458671 Mon Sep 17 00:00:00 2001 From: Onuralp SEZER Date: Tue, 14 Apr 2026 13:54:51 +0300 Subject: [PATCH 01/25] docs: remove outdated Snyk badge from security documentation and update old links (#24221) Signed-off-by: Onuralp SEZER --- docs/en/help/security.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/help/security.md b/docs/en/help/security.md index 6acc7737ee..bdac9f49c7 100644 --- a/docs/en/help/security.md +++ b/docs/en/help/security.md @@ -9,9 +9,9 @@ At [Ultralytics](https://www.ultralytics.com/), the security of our users' data ## Snyk Scanning -We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users. +We utilize [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users. -[![ultralytics](https://snyk.io/test/github/ultralytics/ultralytics/badge.svg)](https://snyk.io/advisor/python/ultralytics) +[![ultralytics](https://img.shields.io/badge/Snyk_security-monitored-8A2BE2)](https://security.snyk.io/package/pip/ultralytics) ## GitHub CodeQL Scanning @@ -51,7 +51,7 @@ These tools ensure proactive identification and resolution of security issues, e ### How does Ultralytics use Snyk for security scanning? -Ultralytics utilizes [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable. +Ultralytics utilizes [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable. To see the Snyk badge and learn more about its deployment, check the [Snyk Scanning section](#snyk-scanning). From 6b5c2a86f7437d95063b6f1182af4c1126a458f0 Mon Sep 17 00:00:00 2001 From: Jin Xu Date: Wed, 15 Apr 2026 05:50:51 +0800 Subject: [PATCH 02/25] Update annotation viewer zoom shortcut docs (#24214) Signed-off-by: Jin Xu Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> --- docs/en/platform/data/annotation.md | 4 ++++ docs/en/platform/data/datasets.md | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/en/platform/data/annotation.md b/docs/en/platform/data/annotation.md index c30fa14c1e..e982df56e5 100644 --- a/docs/en/platform/data/annotation.md +++ b/docs/en/platform/data/annotation.md @@ -444,6 +444,10 @@ Efficient annotation with keyboard shortcuts: | `Delete` / `Backspace` | Delete selected annotation | | `1-9` | Select class 1-9 | | `Cmd/Ctrl+Scroll` | Zoom in/out | + | `Cmd/Ctrl++` or `Cmd/Ctrl+=` | Zoom in | + | `Cmd/Ctrl+-` | Zoom out | + | `Cmd/Ctrl+0` | Reset to fit | + | `Space+Drag` | Pan canvas when zoomed | | `Shift+Click` | Multi-select annotations | | `Cmd/Ctrl+A` | Select all annotations | diff --git a/docs/en/platform/data/datasets.md b/docs/en/platform/data/datasets.md index e9c4c5bdf9..9241b7e479 100644 --- a/docs/en/platform/data/datasets.md +++ b/docs/en/platform/data/datasets.md @@ -268,7 +268,9 @@ Click any image to open the fullscreen viewer with: - **Edit**: Enter annotation mode to add or modify labels - **Download**: Download the original image file - **Delete**: Delete the image from the dataset -- **Zoom**: `Cmd/Ctrl+Scroll` to zoom in/out +- **Zoom**: `Cmd/Ctrl+Scroll`, `Cmd/Ctrl++`, or `Cmd/Ctrl+=` to zoom in, and `Cmd/Ctrl+-` to zoom out +- **Reset view**: `Cmd/Ctrl + 0` or the reset button to fit the image to the viewer +- **Pan**: Hold `Space` and drag to pan the canvas when zoomed - **Pixel view**: Toggle pixelated rendering for close inspection ![Ultralytics Platform Datasets Fullscreen Viewer With Metadata Panel](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-datasets-fullscreen-viewer-with-metadata-panel.avif) From 826cd7f35744549c565261eb2a4d5e46ba3293d7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 14 Apr 2026 23:54:20 +0200 Subject: [PATCH 03/25] Fix Sentry bugs 2026-04-14 (#24220) --- ultralytics/utils/checks.py | 5 ++++- ultralytics/utils/export/rknn.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 4471793c4d..e6a217363b 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -478,7 +478,10 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=() text=True, ) return subprocess.check_output( - f"pip install --no-cache-dir {packages} {commands}", shell=True, stderr=subprocess.STDOUT, text=True + f'"{sys.executable}" -m pip install --no-cache-dir {packages} {commands}', + shell=True, + stderr=subprocess.STDOUT, + text=True, ) s = " ".join(f'"{x}"' for x in pkgs) # console string diff --git a/ultralytics/utils/export/rknn.py b/ultralytics/utils/export/rknn.py index 2f51a66bce..d1aacec58b 100644 --- a/ultralytics/utils/export/rknn.py +++ b/ultralytics/utils/export/rknn.py @@ -27,7 +27,7 @@ def onnx2rknn( from ultralytics.utils.checks import check_requirements LOGGER.info(f"\n{prefix} starting export with rknn-toolkit2...") - check_requirements("rknn-toolkit2") + check_requirements("rknn-toolkit2>=2.3.2") check_requirements("onnx<1.19.0") # fix AttributeError: module 'onnx' has no attribute 'mapping' if IS_COLAB: From da03154f86a74541899977031e3f9211d24add6f Mon Sep 17 00:00:00 2001 From: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Date: Wed, 15 Apr 2026 14:59:21 +0800 Subject: [PATCH 04/25] Fix table format in `annotation.md` (#24232) --- docs/en/platform/data/annotation.md | 50 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/platform/data/annotation.md b/docs/en/platform/data/annotation.md index e982df56e5..b30ac869f9 100644 --- a/docs/en/platform/data/annotation.md +++ b/docs/en/platform/data/annotation.md @@ -434,22 +434,22 @@ Efficient annotation with keyboard shortcuts: === "General" - | Shortcut | Action | - | ---------------------- | -------------------------- | - | `Cmd/Ctrl+S` | Save annotations | - | `Cmd/Ctrl+Z` | Undo | - | `Cmd/Ctrl+Shift+Z` | Redo | - | `Cmd/Ctrl+Y` | Redo (alternative) | - | `Escape` | Save / Deselect / Exit | - | `Delete` / `Backspace` | Delete selected annotation | - | `1-9` | Select class 1-9 | - | `Cmd/Ctrl+Scroll` | Zoom in/out | - | `Cmd/Ctrl++` or `Cmd/Ctrl+=` | Zoom in | - | `Cmd/Ctrl+-` | Zoom out | - | `Cmd/Ctrl+0` | Reset to fit | - | `Space+Drag` | Pan canvas when zoomed | - | `Shift+Click` | Multi-select annotations | - | `Cmd/Ctrl+A` | Select all annotations | + | Shortcut | Action | + | ----------------------------- | ---------------------------- | + | `Cmd/Ctrl+S` | Save annotations | + | `Cmd/Ctrl+Z` | Undo | + | `Cmd/Ctrl+Shift+Z` | Redo | + | `Cmd/Ctrl+Y` | Redo (alternative) | + | `Escape` | Save / Deselect / Exit | + | `Delete` / `Backspace` | Delete selected annotation | + | `1-9` | Select class 1-9 | + | `Cmd/Ctrl+Scroll` | Zoom in/out | + | `Cmd/Ctrl++` or `Cmd/Ctrl+=` | Zoom in | + | `Cmd/Ctrl+-` | Zoom out | + | `Cmd/Ctrl+0` | Reset to fit | + | `Space+Drag` | Pan canvas when zoomed | + | `Shift+Click` | Multi-select annotations | + | `Cmd/Ctrl+A` | Select all annotations | === "Modes" @@ -460,15 +460,15 @@ Efficient annotation with keyboard shortcuts: === "Drawing" - | Shortcut | Action | - | -------------- | --------------------------------------------------------- | - | `Click+Drag` | Draw bounding box (detect/OBB) | - | `Click` | Add polygon point (segment) / Place skeleton (pose) | - | `Right-click` | Complete polygon / Add SAM negative point | - | `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) | - | `A` | Toggle auto-apply (Smart mode) | - | `Enter` | Complete polygon / Confirm pose / Save SAM annotation | - | `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit | + | Shortcut | Action | + | ------------------------------- | ----------------------------------------------------------- | + | `Click+Drag` | Draw bounding box (detect/OBB) | + | `Click` | Add polygon point (segment) / Place skeleton (pose) | + | `Right-click` | Complete polygon / Add SAM negative point | + | `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) | + | `A` | Toggle auto-apply (Smart mode) | + | `Enter` | Complete polygon / Confirm pose / Save SAM annotation | + | `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit | === "Arrange (Z-Order)" From 4730551b1ff6c3c82837cb173dc7245f21752e3d Mon Sep 17 00:00:00 2001 From: Lakshantha Dissanayake Date: Wed, 15 Apr 2026 00:00:55 -0700 Subject: [PATCH 05/25] Update `axelera-runtime` installation command to allow prerelease versions (#24230) --- ultralytics/nn/backends/axelera.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/nn/backends/axelera.py b/ultralytics/nn/backends/axelera.py index 2720614f38..03222f0a25 100644 --- a/ultralytics/nn/backends/axelera.py +++ b/ultralytics/nn/backends/axelera.py @@ -28,7 +28,7 @@ class AxeleraBackend(BaseBackend): except ImportError: check_requirements( "axelera-rt==1.6.0rc3", - cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple", + cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple --pre", ) from axelera.runtime import op From a38a7705eee19bbf177cf1632f3e21ebbb6d0cdc Mon Sep 17 00:00:00 2001 From: Onuralp SEZER Date: Wed, 15 Apr 2026 10:10:22 +0300 Subject: [PATCH 06/25] fix: OpenVINO 2026 segfault on conda CI (#24224) Signed-off-by: Onuralp SEZER Co-authored-by: UltralyticsAssistant Co-authored-by: Lakshantha Dissanayake --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84864ccdd3..c578eca8ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -435,7 +435,7 @@ jobs: channel-priority: true activate-environment: anaconda-client-env - name: Install Ultralytics package from conda-forge - run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino!=2026.0.0" + run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino<2026" - name: Install pip packages run: uv pip install pytest - name: Check environment From c4315b27fe4ff65ac8e12b937553421ead5a8d87 Mon Sep 17 00:00:00 2001 From: Murat Raimbekov Date: Wed, 15 Apr 2026 15:49:00 +0600 Subject: [PATCH 07/25] Clarify tt100k uses 221 annotation categories with 45 trainable classes (#24234) Co-authored-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> --- docs/en/datasets/detect/tt100k.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/datasets/detect/tt100k.md b/docs/en/datasets/detect/tt100k.md index 4fbeaf9860..f672af7c69 100644 --- a/docs/en/datasets/detect/tt100k.md +++ b/docs/en/datasets/detect/tt100k.md @@ -8,7 +8,7 @@ keywords: TT100K, Tsinghua-Tencent 100K, traffic sign detection, YOLO26, dataset The [Tsinghua-Tencent 100K (TT100K)](https://cg.cs.tsinghua.edu.cn/traffic-sign/) is a large-scale traffic sign benchmark dataset created from 100,000 Tencent Street View panoramas. This dataset is specifically designed for traffic sign detection and classification in real-world conditions, providing researchers and developers with a comprehensive resource for building robust traffic sign recognition systems. -The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 different categories**. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios. +The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 annotation categories**. The original paper applies a 100-instance threshold per class for supervised training, yielding a commonly used **45-class** subset; however, the provided Ultralytics dataset configuration retains all **221 annotated categories**, many of which are very sparse. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios. This dataset is particularly valuable for: From f647b25f8b9baf351c72060dccb656746782aab4 Mon Sep 17 00:00:00 2001 From: Murat Raimbekov Date: Wed, 15 Apr 2026 18:47:25 +0600 Subject: [PATCH 08/25] Docs: add cli tabs for all 12 yolo solutions (#24217) Signed-off-by: Onuralp SEZER Co-authored-by: Onuralp SEZER Co-authored-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: UltralyticsAssistant --- docs/en/usage/cli.md | 89 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md index b503342654..12f2828a71 100644 --- a/docs/en/usage/cli.md +++ b/docs/en/usage/cli.md @@ -237,7 +237,7 @@ You can then pass this file as `cfg=default_copy.yaml` along with any additional ## Solutions Commands -Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. These solutions simplify the implementation of complex tasks like object counting, workout monitoring, and queue management. +Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. The `yolo solutions` command exposes object counting, cropping, blurring, workout monitoring, heatmaps, instance segmentation, VisionEye, speed estimation, queue management, analytics, Streamlit inference, and zone-based tracking — see the [Solutions](../solutions/index.md) page for the full catalog. Run `yolo solutions help` to list every supported solution and its arguments. !!! example @@ -250,6 +250,26 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions count source="path/to/video.mp4" # specify video file path ``` + === "Crop" + + Crop detected objects and save them to disk: + + ```bash + yolo solutions crop show=True + yolo solutions crop source="path/to/video.mp4" # specify video file path + yolo solutions crop classes="[0, 2]" # crop only selected classes + ``` + + === "Blur" + + Blur detected objects in a video for privacy or to highlight other regions: + + ```bash + yolo solutions blur show=True + yolo solutions blur source="path/to/video.mp4" # specify video file path + yolo solutions blur classes="[0, 5]" # blur only selected classes + ``` + === "Workout" Monitor workout exercises using a pose model: @@ -259,8 +279,49 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions workout source="path/to/video.mp4" # specify video file path # Use keypoints for ab-workouts - yolo solutions workout kpts=[5, 11, 13] # left side - yolo solutions workout kpts=[6, 12, 14] # right side + yolo solutions workout kpts="[5, 11, 13]" # left side + yolo solutions workout kpts="[6, 12, 14]" # right side + ``` + + === "Heatmap" + + Generate a heatmap showing object density and movement patterns: + + ```bash + yolo solutions heatmap show=True + yolo solutions heatmap source="path/to/video.mp4" # specify video file path + yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO # customize colormap + yolo solutions heatmap region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # restrict heatmap to a region + ``` + + === "Isegment" + + Run instance segmentation with tracking on a video: + + ```bash + yolo solutions isegment show=True + yolo solutions isegment source="path/to/video.mp4" # specify video file path + yolo solutions isegment classes="[0, 5]" # segment only selected classes + ``` + + === "VisionEye" + + Draw object-to-observer sightlines with VisionEye: + + ```bash + yolo solutions visioneye show=True + yolo solutions visioneye source="path/to/video.mp4" # specify video file path + yolo solutions visioneye classes="[0, 5]" # monitor only selected classes + ``` + + === "Speed" + + Estimate the speed of moving objects in a video: + + ```bash + yolo solutions speed show=True + yolo solutions speed source="path/to/video.mp4" # specify video file path + yolo solutions speed meter_per_pixel=0.05 # set scale for real-world units ``` === "Queue" @@ -273,6 +334,18 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions queue region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # configure queue coordinates ``` + === "Analytics" + + Generate analytical charts (line, bar, area, or pie) from tracked detections: + + ```bash + yolo solutions analytics show=True + yolo solutions analytics source="path/to/video.mp4" # specify video file path + yolo solutions analytics analytics_type="pie" show=True + yolo solutions analytics analytics_type="bar" show=True + yolo solutions analytics analytics_type="area" show=True + ``` + === "Inference" Perform object detection, instance segmentation, or pose estimation in a web browser using Streamlit: @@ -282,6 +355,16 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions inference model="path/to/model.pt" # use custom model ``` + === "TrackZone" + + Track objects only inside a specified polygonal zone: + + ```bash + yolo solutions trackzone show=True + yolo solutions trackzone source="path/to/video.mp4" # specify video file path + yolo solutions trackzone region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]" # configure zone coordinates + ``` + === "Help" View available solutions and their options: From 1bd9712f6c7a2d30e225d763e8438875c89efc30 Mon Sep 17 00:00:00 2001 From: Tim Schoonbeek <54444435+TimSchoonbeek@users.noreply.github.com> Date: Thu, 16 Apr 2026 03:53:40 +0200 Subject: [PATCH 09/25] Fix: corrected undocumented scaling of track_buffer in trackers (#24247) --- ultralytics/trackers/byte_tracker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py index 05aa364026..dac74d5b3f 100644 --- a/ultralytics/trackers/byte_tracker.py +++ b/ultralytics/trackers/byte_tracker.py @@ -241,7 +241,7 @@ class BYTETracker: removed_stracks (list[STrack]): List of removed tracks. frame_id (int): The current frame ID. args (Namespace): Command-line arguments. - max_time_lost (int): The maximum frames for a track to be considered as 'lost'. + max_frames_lost (int): The maximum frames for a track to be considered as 'lost'. kalman_filter (KalmanFilterXYAH): Kalman Filter object. Methods: @@ -276,7 +276,7 @@ class BYTETracker: self.frame_id = 0 self.args = args - self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer) + self.max_frames_lost = args.track_buffer self.kalman_filter = self.get_kalmanfilter() self.reset_id() @@ -377,7 +377,7 @@ class BYTETracker: activated_stracks.append(track) # Step 5: Update state for track in self.lost_stracks: - if self.frame_id - track.end_frame > self.max_time_lost: + if self.frame_id - track.end_frame > self.max_frames_lost: track.mark_removed() removed_stracks.append(track) From 5dc005dfaa5017d47a76750d27d16c400d745c52 Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:08:34 +0600 Subject: [PATCH 10/25] Check for `lrpc` attribute before attempting fuse during YOLOE model export (#24239) Signed-off-by: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Co-authored-by: UltralyticsAssistant --- ultralytics/engine/exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index d0c241ba18..4565f86f46 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -476,7 +476,7 @@ class Exporter: m.agnostic_nms = self.args.agnostic_nms m.xyxy = self.args.nms and fmt != "coreml" m.shape = None # reset cached shape for new export input size - if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models + if hasattr(model, "pe") and hasattr(m, "fuse") and not hasattr(m, "lrpc"): # for YOLOE models m.fuse(model.pe.to(self.device)) elif isinstance(m, C2f) and not is_tf_format: # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph From 518d31c1eef2015a2d7d6497b5b32eb27f72fe21 Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:16:05 +0600 Subject: [PATCH 11/25] Return the model instance when calling `fuse` method (#24246) Signed-off-by: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Co-authored-by: UltralyticsAssistant --- ultralytics/engine/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 903de57f9b..ee953fbe1f 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -426,7 +426,7 @@ class Model(torch.nn.Module): self._check_is_pytorch_model() return self.model.info(detailed=detailed, verbose=verbose, imgsz=imgsz) - def fuse(self) -> None: + def fuse(self) -> Model: """Fuse Conv2d and BatchNorm2d layers in the model for optimized inference. This method iterates through the model's modules and fuses consecutive Conv2d and BatchNorm2d layers into a @@ -444,6 +444,7 @@ class Model(torch.nn.Module): """ self._check_is_pytorch_model() self.model.fuse() + return self def embed( self, From ca24fdd77d01d31439df593cb8e9be2c73a07eac Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:29:35 +0600 Subject: [PATCH 12/25] SAM3: Skip geometry token when using text prompts (#24244) Co-authored-by: UltralyticsAssistant Co-authored-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> --- ultralytics/models/sam/predict.py | 3 ++- ultralytics/models/sam/sam3/sam3_image.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 700a9a1513..b3b9831a23 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -2273,8 +2273,9 @@ class SAM3SemanticPredictor(SAM3Predictor): """Run inference on the extracted features with optional bounding boxes and labels.""" # NOTE: priority: bboxes > text > pre-set classes nc = 1 if bboxes is not None else len(text) if text is not None else len(self.model.names) - geometric_prompt = self._get_dummy_prompt(nc) + geometric_prompt = None if bboxes is not None: + geometric_prompt = self._get_dummy_prompt(nc) for i in range(len(bboxes)): geometric_prompt.append_boxes(bboxes[[i]], labels[[i]]) if text is None: diff --git a/ultralytics/models/sam/sam3/sam3_image.py b/ultralytics/models/sam/sam3/sam3_image.py index 105aecaf86..92efe160c5 100644 --- a/ultralytics/models/sam/sam3/sam3_image.py +++ b/ultralytics/models/sam/sam3/sam3_image.py @@ -290,15 +290,18 @@ class SAM3SemanticModel(torch.nn.Module): self, backbone_out, batch=len(text_ids) ) backbone_out.update({k: v for k, v in self.text_embeddings.items()}) - with torch.profiler.record_function("SAM3Image._encode_prompt"): - prompt, prompt_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt) # index text features (note that regardless of early or late fusion, the batch size of # `txt_feats` is always the number of *prompts* in the encoder) txt_feats = backbone_out["language_features"][:, text_ids] txt_masks = backbone_out["language_mask"][text_ids] - # encode text - prompt = torch.cat([txt_feats, prompt], dim=0) - prompt_mask = torch.cat([txt_masks, prompt_mask], dim=1) + if geometric_prompt is not None: + with torch.profiler.record_function("SAM3Image._encode_prompt"): + geo_prompt, geo_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt) + prompt = torch.cat([txt_feats, geo_prompt], dim=0) + prompt_mask = torch.cat([txt_masks, geo_mask], dim=1) + else: + prompt = txt_feats + prompt_mask = txt_masks # Run the encoder with torch.profiler.record_function("SAM3Image._run_encoder"): From dd7f30e51ee529af838c68278c0418b87e6c1f49 Mon Sep 17 00:00:00 2001 From: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:29:59 +0800 Subject: [PATCH 13/25] Update solution tests to use cached session assets (#24237) Co-authored-by: UltralyticsAssistant --- tests/conftest.py | 45 ++++++++++++++- tests/test_engine.py | 18 +++--- tests/test_exports.py | 4 +- tests/test_python.py | 28 ++++++---- tests/test_solutions.py | 119 +++++++++++++++++++--------------------- 5 files changed, 127 insertions(+), 87 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6a6b644d6d..d53768aff4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,49 @@ import shutil from pathlib import Path +import pytest + + +@pytest.fixture(scope="session") +def solution_assets(): + """Session-scoped fixture to cache solution test assets. + + Lazily downloads solution assets into a persistent directory (WEIGHTS_DIR/solution_assets) and returns a callable + that resolves asset names to cached paths. + """ + from ultralytics.utils import ASSETS_URL, WEIGHTS_DIR + from ultralytics.utils.downloads import safe_download + + # Use persistent directory alongside weights + cache_dir = WEIGHTS_DIR / "solution_assets" + cache_dir.mkdir(parents=True, exist_ok=True) + + # Define all assets needed for solution tests + assets = { + # Videos + "demo_video": "solutions_ci_demo.mp4", + "crop_video": "decelera_landscape_min.mov", + "pose_video": "solution_ci_pose_demo.mp4", + "parking_video": "solution_ci_parking_demo.mp4", + "vertical_video": "solution_vertical_demo.mp4", + # Parking manager files + "parking_areas": "solution_ci_parking_areas.json", + "parking_model": "solutions_ci_parking_model.pt", + } + + asset_paths = {} + + def get_asset(name): + """Return the cached path for a named solution asset, downloading it on first use.""" + if name not in asset_paths: + asset_path = cache_dir / assets[name] + if not asset_path.exists(): + safe_download(url=f"{ASSETS_URL}/{asset_path.name}", dir=cache_dir) + asset_paths[name] = asset_path + return asset_paths[name] + + return get_asset + def pytest_addoption(parser): """Add custom command-line options to pytest.""" @@ -55,5 +98,5 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): # Remove directories models = [path for x in {"*.mlpackage", "*_openvino_model"} for path in WEIGHTS_DIR.rglob(x)] - for directory in [WEIGHTS_DIR / "path with spaces", *models]: + for directory in [WEIGHTS_DIR / "solution_assets", WEIGHTS_DIR / "path with spaces", *models]: shutil.rmtree(directory, ignore_errors=True) diff --git a/tests/test_engine.py b/tests/test_engine.py index a198b2685d..b7ef006e14 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -25,7 +25,7 @@ def test_export(): """Test model exporting functionality by adding a callback and verifying its execution.""" exporter = Exporter() exporter.add_callback("on_export_start", test_func) - assert test_func in exporter.callbacks["on_export_start"], "callback test failed" + assert test_func in exporter.callbacks["on_export_start"], "on_export_start callback not registered" f = exporter(model=YOLO("yolo26n.yaml").model) YOLO(f)(SOURCE) # exported model inference @@ -76,7 +76,7 @@ def test_task(trainer_cls, validator_cls, predictor_cls, data, model, weights): # Trainer trainer = trainer_cls(overrides=overrides) trainer.add_callback("on_train_start", test_func) - assert test_func in trainer.callbacks["on_train_start"], "callback test failed" + assert test_func in trainer.callbacks["on_train_start"], "on_train_start callback not registered" trainer.train() # Validator @@ -85,13 +85,13 @@ def test_task(trainer_cls, validator_cls, predictor_cls, data, model, weights): cfg.imgsz = 32 val = validator_cls(args=cfg) val.add_callback("on_val_start", test_func) - assert test_func in val.callbacks["on_val_start"], "callback test failed" + assert test_func in val.callbacks["on_val_start"], "on_val_start callback not registered" val(model=trainer.best) # Predictor pred = predictor_cls(overrides={"imgsz": [64, 64]}) pred.add_callback("on_predict_start", test_func) - assert test_func in pred.callbacks["on_predict_start"], "callback test failed" + assert test_func in pred.callbacks["on_predict_start"], "on_predict_start callback not registered" # Determine model path for prediction model_path = weights if weights else trainer.best @@ -99,10 +99,10 @@ def test_task(trainer_cls, validator_cls, predictor_cls, data, model, weights): # Confirm there is no issue with sys.argv being empty with mock.patch.object(sys, "argv", []): result = pred(source=ASSETS, model=model_path) - assert len(result), "predictor test failed" + assert len(result) > 0, f"Predictor returned no results for {model}" else: result = pred(source=ASSETS, model=model_path) - assert len(result), "predictor test failed" + assert len(result) > 0, f"Predictor returned no results for {model}" # Test resume functionality with pytest.raises(AssertionError): @@ -198,6 +198,6 @@ def test_train_reuses_loaded_checkpoint_model(monkeypatch): model.train(data="coco8.yaml", epochs=1) - assert captured["trainer"].model is original_model - assert captured["cfg"] == original_model.yaml - assert captured["weights"] is original_model + assert captured["trainer"].model is original_model, "Trainer model does not match original" + assert captured["cfg"] == original_model.yaml, f"Config mismatch: {captured['cfg']} != {original_model.yaml}" + assert captured["weights"] is original_model, "Weights do not match original model" diff --git a/tests/test_exports.py b/tests/test_exports.py index 0eb11dee5d..38fcd668ba 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -64,8 +64,8 @@ def test_torch2onnx_serializes_concurrent_exports(monkeypatch, tmp_path): for thread in threads: thread.join() - assert not errors - assert max_active == 1 + assert not errors, f"Concurrent export errors: {errors}" + assert max_active == 1, f"Expected max 1 concurrent export, got {max_active}" @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1") diff --git a/tests/test_python.py b/tests/test_python.py index 692577633b..871c7bb8af 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -82,7 +82,7 @@ def test_predict_txt(tmp_path): for src in SOURCES_LIST: f.write(f"{src}\n") results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from source list, got {len(results)}" @pytest.mark.skipif(True, reason="disabled for testing") @@ -94,7 +94,7 @@ def test_predict_csv_multi_row(tmp_path): writer.writerow(["source"]) writer.writerows([[src] for src in SOURCES_LIST]) results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from multi-row CSV, got {len(results)}" @pytest.mark.skipif(True, reason="disabled for testing") @@ -105,7 +105,7 @@ def test_predict_csv_single_row(tmp_path): writer = csv.writer(f) writer.writerow(SOURCES_LIST) results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from single-row CSV, got {len(results)}" @pytest.mark.parametrize("model_name", MODELS) @@ -156,7 +156,7 @@ def test_predict_gray_and_4ch(tmp_path): for f in source_rgba, source_grayscale, source_non_utf, source_spaces: for source in Image.open(f), cv2.imread(str(f)), f: results = model(source, save=True, verbose=True, imgsz=32) - assert len(results) == 1 # verify that an image was run + assert len(results) == 1, f"Expected 1 result for {f.name}, got {len(results)}" f.unlink() # cleanup @@ -335,16 +335,21 @@ def test_labels_and_crops(): assert len(cls_idxs) >= 2, f"Expected at least 2 detections, got {len(cls_idxs)}" # Check label path labels = save_path / f"labels/{im_name}.txt" - assert labels.exists() + assert labels.exists(), f"Label file {labels} does not exist" # Check detections match label count - assert len(r.boxes.data) == len([line for line in labels.read_text().splitlines() if line]) + label_count = len([line for line in labels.read_text().splitlines() if line]) + assert len(r.boxes.data) == label_count, f"Box count {len(r.boxes.data)} != label count {label_count}" # Check crops path and files crop_dirs = list((save_path / "crops").iterdir()) crop_files = [f for p in crop_dirs for f in p.glob("*")] # Crop directories match detections - assert all(r.names.get(c) in {d.name for d in crop_dirs} for c in cls_idxs) + crop_dir_names = {d.name for d in crop_dirs} + assert all(r.names.get(c) in crop_dir_names for c in cls_idxs), ( + f"Crop dirs {crop_dir_names} don't match classes {cls_idxs}" + ) # Same number of crops as detections - assert len([f for f in crop_files if im_name in f.name]) == len(r.boxes.data) + crop_count = len([f for f in crop_files if im_name in f.name]) + assert crop_count == len(r.boxes.data), f"Crop count {crop_count} != detection count {len(r.boxes.data)}" @pytest.mark.skipif(not ONLINE, reason="environment is offline") @@ -383,9 +388,10 @@ def test_safe_download_unzips_local_path_archive(tmp_path): zf.write(path, arcname=path.relative_to(tmp_path)) extracted = safe_download(archive, dir=tmp_path / "datasets", unzip=True, progress=False) - assert extracted == (tmp_path / "datasets" / dataset_dir.name) - assert (extracted / "data.yaml").is_file() - assert (extracted / "images" / "val").is_dir() + expected_path = tmp_path / "datasets" / dataset_dir.name + assert extracted == expected_path, f"Extracted path {extracted} != expected {expected_path}" + assert (extracted / "data.yaml").is_file(), f"data.yaml not found in {extracted}" + assert (extracted / "images" / "val").is_dir(), f"images/val not found in {extracted}" @pytest.mark.skipif(not ONLINE, reason="environment is offline") diff --git a/tests/test_solutions.py b/tests/test_solutions.py index dd05a7267e..5892efc7a5 100644 --- a/tests/test_solutions.py +++ b/tests/test_solutions.py @@ -18,13 +18,6 @@ from ultralytics.utils.torch_utils import TORCH_2_4 # Predefined argument values SHOW = False -DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management -CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution -POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution -PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution -PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution -PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution -VERTICAL_VIDEO = "solution_vertical_demo.mp4" # only for vertical line counting REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management HORIZONTAL_LINE = [(10, 200), (540, 200)] # for object counting VERTICAL_LINE = [(320, 0), (320, 400)] # for object counting @@ -50,129 +43,129 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False): @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled for testing due to --slow test errors after YOLOE PR.") @pytest.mark.parametrize( - "name, solution_class, needs_frame_count, video, kwargs", + "name, solution_class, needs_frame_count, video_key, kwargs_update", [ ( "ObjectCounter", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "ObjectCounter", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": HORIZONTAL_LINE, "model": MODEL, "show": SHOW}, ), ( "ObjectCounterVertical", solutions.ObjectCounter, False, - DEMO_VIDEO, + "vertical_video", {"region": VERTICAL_LINE, "model": MODEL, "show": SHOW}, ), ( "ObjectCounterwithOBB", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": "yolo26n-obb.pt", "show": SHOW}, ), ( "Heatmap", solutions.Heatmap, False, - DEMO_VIDEO, + "demo_video", {"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None}, ), ( "HeatmapWithRegion", solutions.Heatmap, False, - DEMO_VIDEO, + "demo_video", {"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW}, ), ( "SpeedEstimator", solutions.SpeedEstimator, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "QueueManager", solutions.QueueManager, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "LineAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "line", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "PieAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "pie", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "BarAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "bar", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "AreaAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "area", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), - ("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}), + ("TrackZone", solutions.TrackZone, False, "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}), ( "ObjectCropper", solutions.ObjectCropper, False, - CROP_VIDEO, + "crop_video", {"temp_crop_dir": "cropped-detections", "model": MODEL, "show": SHOW}, ), ( "ObjectBlurrer", solutions.ObjectBlurrer, False, - DEMO_VIDEO, + "demo_video", {"blur_ratio": 0.02, "model": MODEL, "show": SHOW}, ), ( "InstanceSegmentation", solutions.InstanceSegmentation, False, - DEMO_VIDEO, + "demo_video", {"model": "yolo26n-seg.pt", "show": SHOW}, ), - ("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}), + ("VisionEye", solutions.VisionEye, False, "demo_video", {"model": MODEL, "show": SHOW}), ( "RegionCounter", solutions.RegionCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), - ("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}), + ("AIGym", solutions.AIGym, False, "pose_video", {"kpts": [6, 8, 10], "show": SHOW}), ( "ParkingManager", solutions.ParkingManagement, False, - PARKING_VIDEO, - {"temp_model": str(PARKING_MODEL), "show": SHOW, "temp_json_file": str(PARKING_AREAS_JSON)}, + "parking_video", + {"model": "parking_model", "show": SHOW, "json_file": "parking_areas"}, ), ( "StreamlitInference", @@ -183,34 +176,31 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False): ), ], ) -def test_solution(name, solution_class, needs_frame_count, video, kwargs, tmp_path): +def test_solution(name, solution_class, needs_frame_count, video_key, kwargs_update, tmp_path, solution_assets): """Test individual Ultralytics solution with video processing and parameter validation.""" - if video: - if name != "ObjectCounterVertical": - safe_download(url=f"{ASSETS_URL}/{video}", dir=tmp_path) - else: - safe_download(url=f"{ASSETS_URL}/{VERTICAL_VIDEO}", dir=tmp_path) - if name == "ParkingManager": - safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=tmp_path) - safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=tmp_path) + # Get video path from persistent cache (no copying needed, read-only access) + video_path = str(solution_assets(video_key)) if video_key else None - elif name == "StreamlitInference": + # Update kwargs to use cached paths for parking manager + kwargs = {} + for key, value in kwargs_update.items(): + if key.startswith("temp_"): + kwargs[key.replace("temp_", "")] = str(tmp_path / value) + elif value == "parking_model": + kwargs[key] = str(solution_assets("parking_model")) + elif value == "parking_areas": + kwargs[key] = str(solution_assets("parking_areas")) + else: + kwargs[key] = value + + if name == "StreamlitInference": if checks.check_imshow(): # do not merge with elif above solution_class(**kwargs).inference() # requires interactive GUI environment return - # Update kwargs to use tmp_path - kwargs_updated = {} - for key in kwargs: - if key.startswith("temp_"): - kwargs_updated[key.replace("temp_", "")] = str(tmp_path / kwargs[key]) - else: - kwargs_updated[key] = kwargs[key] - - video = VERTICAL_VIDEO if name == "ObjectCounterVertical" else video process_video( - solution=solution_class(**kwargs_updated), - video_path=str(tmp_path / video), + solution=solution_class(**kwargs), + video_path=video_path, needs_frame_count=needs_frame_count, ) @@ -220,7 +210,7 @@ def test_left_click_selection(): dc = solutions.DistanceCalculation() dc.boxes, dc.track_ids = [[10, 10, 50, 50]], [1] dc.mouse_event_for_distance(cv2.EVENT_LBUTTONDOWN, 30, 30, None, None) - assert 1 in dc.selected_boxes + assert 1 in dc.selected_boxes, f"Expected track_id 1 in selected_boxes, got {dc.selected_boxes}" def test_right_click_reset(): @@ -228,8 +218,8 @@ def test_right_click_reset(): dc = solutions.DistanceCalculation() dc.selected_boxes, dc.left_mouse_count = {1: [10, 10, 50, 50]}, 1 dc.mouse_event_for_distance(cv2.EVENT_RBUTTONDOWN, 0, 0, None, None) - assert not dc.selected_boxes - assert dc.left_mouse_count == 0 + assert not dc.selected_boxes, f"Expected empty selected_boxes after reset, got {dc.selected_boxes}" + assert dc.left_mouse_count == 0, f"Expected left_mouse_count=0 after reset, got {dc.left_mouse_count}" def test_parking_json_none(): @@ -249,7 +239,7 @@ def test_analytics_graph_not_supported(): analytics.process(im0=np.zeros((640, 480, 3), dtype=np.uint8), frame_number=0) assert False, "Expected ValueError for unsupported chart type" except ValueError as e: - assert "Unsupported analytics_type" in str(e) + assert "Unsupported analytics_type" in str(e), f"Expected 'Unsupported analytics_type' in error, got: {e}" def test_area_chart_padding(): @@ -257,7 +247,7 @@ def test_area_chart_padding(): analytics = solutions.Analytics(analytics_type="area") analytics.update_graph(frame_number=1, count_dict={"car": 2}, plot="area") plot_im = analytics.update_graph(frame_number=2, count_dict={"car": 3, "person": 1}, plot="area") - assert plot_im is not None + assert plot_im is not None, "Area chart plot returned None" def test_config_update_method_with_invalid_argument(): @@ -267,7 +257,7 @@ def test_config_update_method_with_invalid_argument(): obj.update(invalid_key=123) assert False, "Expected ValueError for invalid update argument" except ValueError as e: - assert "is not a valid solution argument" in str(e) + assert "is not a valid solution argument" in str(e), f"Expected validation error message, got: {e}" def test_plot_with_no_masks(): @@ -275,7 +265,7 @@ def test_plot_with_no_masks(): im0 = np.zeros((640, 480, 3), dtype=np.uint8) isegment = solutions.InstanceSegmentation(model="yolo26n-seg.pt") results = isegment(im0) - assert results.plot_im is not None + assert results.plot_im is not None, "Instance segmentation plot returned None" def test_streamlit_handle_video_upload_creates_file(): @@ -291,10 +281,11 @@ def test_streamlit_handle_video_upload_creates_file(): output_path = "ultralytics.mp4" else: output_path = None - assert output_path == "ultralytics.mp4" - assert os.path.exists("ultralytics.mp4") + assert output_path == "ultralytics.mp4", f"Expected output_path 'ultralytics.mp4', got {output_path}" + assert os.path.exists("ultralytics.mp4"), "ultralytics.mp4 file not created" with open("ultralytics.mp4", "rb") as f: - assert f.read() == b"fake video content" + content = f.read() + assert content == b"fake video content", f"File content mismatch: {content}" os.remove("ultralytics.mp4") @@ -329,7 +320,7 @@ def test_similarity_search_complete(tmp_path): img.save(image_dir / f"test_image_{i}.jpg") searcher = solutions.VisualAISearch(data=str(image_dir)) results = searcher("a red and white object") - assert results + assert results, "Similarity search returned empty results" def test_distance_calculation_process_method(): @@ -347,9 +338,9 @@ def test_distance_calculation_process_method(): frame = np.zeros((480, 640, 3), dtype=np.uint8) with patch.object(dc, "extract_tracks"), patch.object(dc, "display_output"), patch("cv2.setMouseCallback"): result = dc.process(frame) - assert isinstance(result, SolutionResults) - assert result.total_tracks == 2 - assert result.pixels_distance > 0 + assert isinstance(result, SolutionResults), f"Expected SolutionResults, got {type(result)}" + assert result.total_tracks == 2, f"Expected 2 tracks, got {result.total_tracks}" + assert result.pixels_distance > 0, f"Expected positive distance, got {result.pixels_distance}" def test_object_crop_with_show_True(): From 3108aa614d801b6b4a18ef7d1755b8c53dffe8a4 Mon Sep 17 00:00:00 2001 From: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Date: Thu, 16 Apr 2026 20:00:28 +0800 Subject: [PATCH 14/25] `ultralytics 8.4.38` Unify args naming for standalone export functions (#24120) Signed-off-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> Signed-off-by: Onuralp SEZER Signed-off-by: Glenn Jocher Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Onuralp SEZER Co-authored-by: UltralyticsAssistant Co-authored-by: Glenn Jocher Co-authored-by: Lakshantha Dissanayake --- docs/en/integrations/sony-imx500.md | 24 +++---- tests/test_exports.py | 5 +- ultralytics/__init__.py | 2 +- ultralytics/engine/exporter.py | 88 ++++++++++++++++--------- ultralytics/nn/backends/coreml.py | 6 +- ultralytics/utils/export/axelera.py | 26 ++++---- ultralytics/utils/export/coreml.py | 18 ++--- ultralytics/utils/export/engine.py | 58 +++++++++------- ultralytics/utils/export/executorch.py | 15 ++--- ultralytics/utils/export/imx.py | 26 ++++---- ultralytics/utils/export/mnn.py | 28 +++++--- ultralytics/utils/export/ncnn.py | 30 ++++----- ultralytics/utils/export/openvino.py | 27 ++++---- ultralytics/utils/export/paddle.py | 13 ++-- ultralytics/utils/export/rknn.py | 22 ++++--- ultralytics/utils/export/tensorflow.py | 42 ++++++++---- ultralytics/utils/export/torchscript.py | 17 +++-- 17 files changed, 251 insertions(+), 196 deletions(-) diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md index 5d2eed7f96..40a65ab822 100644 --- a/docs/en/integrations/sony-imx500.md +++ b/docs/en/integrations/sony-imx500.md @@ -205,9 +205,9 @@ The export process will create an ONNX model for quantization validation, along ├── dnnParams.xml ├── labels.txt ├── packerOut.zip - ├── yolo11n_imx.onnx - ├── yolo11n_imx_MemoryReport.json - └── yolo11n_imx.pbtxt + ├── model_imx.onnx + ├── model_imx_MemoryReport.json + └── model_imx.pbtxt ``` === "Pose Estimation" @@ -217,9 +217,9 @@ The export process will create an ONNX model for quantization validation, along ├── dnnParams.xml ├── labels.txt ├── packerOut.zip - ├── yolo11n-pose_imx.onnx - ├── yolo11n-pose_imx_MemoryReport.json - └── yolo11n-pose_imx.pbtxt + ├── model_imx.onnx + ├── model_imx_MemoryReport.json + └── model_imx.pbtxt ``` === "Classification" @@ -229,9 +229,9 @@ The export process will create an ONNX model for quantization validation, along ├── dnnParams.xml ├── labels.txt ├── packerOut.zip - ├── yolo11n-cls_imx.onnx - ├── yolo11n-cls_imx_MemoryReport.json - └── yolo11n-cls_imx.pbtxt + ├── model_imx.onnx + ├── model_imx_MemoryReport.json + └── model_imx.pbtxt ``` === "Instance Segmentation" @@ -241,9 +241,9 @@ The export process will create an ONNX model for quantization validation, along ├── dnnParams.xml ├── labels.txt ├── packerOut.zip - ├── yolo11n-seg_imx.onnx - ├── yolo11n-seg_imx_MemoryReport.json - └── yolo11n-seg_imx.pbtxt + ├── model_imx.onnx + ├── model_imx_MemoryReport.json + └── model_imx.pbtxt ``` ## Using IMX500 Export in Deployment diff --git a/tests/test_exports.py b/tests/test_exports.py index 38fcd668ba..51b73040d5 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -341,7 +341,7 @@ def test_export_executorch(): file = YOLO(MODEL).export(format="executorch", imgsz=32) assert Path(file).exists(), f"ExecuTorch export failed, directory not found: {file}" # Check that .pte file exists in the exported directory - pte_file = Path(file) / Path(MODEL).with_suffix(".pte").name + pte_file = Path(file) / "model.pte" assert pte_file.exists(), f"ExecuTorch .pte file not found: {pte_file}" # Check that metadata.yaml exists metadata_file = Path(file) / "metadata.yaml" @@ -359,8 +359,7 @@ def test_export_executorch_matrix(task): file = YOLO(TASK2MODEL[task]).export(format="executorch", imgsz=32) assert Path(file).exists(), f"ExecuTorch export failed for task '{task}', directory not found: {file}" # Check that .pte file exists in the exported directory - model_name = Path(TASK2MODEL[task]).with_suffix(".pte").name - pte_file = Path(file) / model_name + pte_file = Path(file) / "model.pte" assert pte_file.exists(), f"ExecuTorch .pte file not found for task '{task}': {pte_file}" # Check that metadata.yaml exists metadata_file = Path(file) / "metadata.yaml" diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index a939faa1cb..1184e63c07 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license -__version__ = "8.4.37" +__version__ = "8.4.38" import importlib import os diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 4565f86f46..32ee9d544f 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -601,9 +601,9 @@ class Exporter: from ultralytics.utils.export.torchscript import torch2torchscript return torch2torchscript( - NMSModel(self.model, self.args) if self.args.nms else self.model, - self.im, - self.file, + model=NMSModel(self.model, self.args) if self.args.nms else self.model, + im=self.im, + output_file=self.file.with_suffix(".torchscript"), optimize=self.args.optimize, metadata=self.metadata, prefix=prefix, @@ -692,9 +692,9 @@ class Exporter: @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): """Export YOLO model to OpenVINO format.""" - from ultralytics.utils.export import torch2openvino + from ultralytics.utils.export.openvino import torch2openvino - # OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023" + # OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023 check_requirements("openvino>=2025.2.0" if MACOS and MACOS_VERSION >= "15.4" else "openvino>=2024.0.0") import openvino as ov @@ -757,16 +757,26 @@ class Exporter: """Export YOLO model to PaddlePaddle format.""" from ultralytics.utils.export.paddle import torch2paddle - return torch2paddle(self.model, self.im, self.file, self.metadata, prefix) + return torch2paddle( + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}"), + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_mnn(self, prefix=colorstr("MNN:")): """Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN.""" from ultralytics.utils.export.mnn import onnx2mnn - f_onnx = self.export_onnx() return onnx2mnn( - f_onnx, self.file, half=self.args.half, int8=self.args.int8, metadata=self.metadata, prefix=prefix + onnx_file=self.export_onnx(), + output_file=self.file.with_suffix(".mnn"), + half=self.args.half, + int8=self.args.int8, + metadata=self.metadata, + prefix=prefix, ) @try_export @@ -775,9 +785,9 @@ class Exporter: from ultralytics.utils.export.ncnn import torch2ncnn return torch2ncnn( - self.model, - self.im, - self.file, + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, "_ncnn_model/"), half=self.args.half, metadata=self.metadata, device=self.device, @@ -986,9 +996,7 @@ class Exporter: """Export YOLO model to TensorFlow GraphDef *.pb format https://github.com/leimao/Frozen-Graph-TensorFlow.""" from ultralytics.utils.export.tensorflow import keras2pb - f = self.file.with_suffix(".pb") - keras2pb(keras_model, f, prefix) - return f + return keras2pb(keras_model, output_file=self.file.with_suffix(".pb"), prefix=prefix) @try_export def export_tflite(self, prefix=colorstr("TensorFlow Lite:")): @@ -1016,11 +1024,13 @@ class Exporter: from ultralytics.utils.export.axelera import torch2axelera + output_dir = self.file.parent / f"{self.file.stem}_axelera_model" return torch2axelera( model=self.model, - file=self.file, + output_dir=output_dir, calibration_dataset=self.get_int8_calibration_dataloader(prefix), transform_fn=self._transform_fn, + model_name=self.file.stem, metadata=self.metadata, prefix=prefix, ) @@ -1032,7 +1042,13 @@ class Exporter: check_executorch_requirements() from ultralytics.utils.export.executorch import torch2executorch - return torch2executorch(self.model, self.file, self.im, metadata=self.metadata, prefix=prefix) + return torch2executorch( + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, "_executorch_model/"), + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): @@ -1055,10 +1071,9 @@ class Exporter: from ultralytics.utils.export.tensorflow import tflite2edgetpu LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...") - tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix) - f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model - self._add_tflite_metadata(f) - return f + output_file = tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix) + self._add_tflite_metadata(output_file) + return output_file @try_export def export_tfjs(self, prefix=colorstr("TensorFlow.js:")): @@ -1066,12 +1081,15 @@ class Exporter: check_requirements("tensorflowjs") from ultralytics.utils.export.tensorflow import pb2tfjs - f = str(self.file).replace(self.file.suffix, "_web_model") # js dir - f_pb = str(self.file.with_suffix(".pb")) # *.pb path - pb2tfjs(pb_file=f_pb, output_dir=f, half=self.args.half, int8=self.args.int8, prefix=prefix) - # Add metadata - YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml - return f + output_dir = pb2tfjs( + pb_file=str(self.file.with_suffix(".pb")), + output_dir=str(self.file).replace(self.file.suffix, "_web_model/"), + half=self.args.half, + int8=self.args.int8, + prefix=prefix, + ) + YAML.save(Path(output_dir) / "metadata.yaml", self.metadata) + return output_dir @try_export def export_rknn(self, prefix=colorstr("RKNN:")): @@ -1080,7 +1098,13 @@ class Exporter: self.args.opset = min(self.args.opset or 19, 19) # rknn-toolkit expects opset<=19 f_onnx = self.export_onnx() - return onnx2rknn(f_onnx, name=self.args.name, metadata=self.metadata, prefix=prefix) + return onnx2rknn( + onnx_file=f_onnx, + output_dir=str(self.file).replace(self.file.suffix, f"_rknn_model{os.sep}"), + name=self.args.name, + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_imx(self, prefix=colorstr("IMX:")): @@ -1120,11 +1144,11 @@ class Exporter: check_apt_requirements(["openjdk-17-jre"]) return torch2imx( - self.model, - self.file, - self.args.conf, - self.args.iou, - self.args.max_det, + model=self.model, + output_dir=str(self.file).replace(self.file.suffix, "_imx_model/"), + conf=self.args.conf, + iou=self.args.iou, + max_det=self.args.max_det, metadata=self.metadata, dataset=self.get_int8_calibration_dataloader(prefix), prefix=prefix, diff --git a/ultralytics/nn/backends/coreml.py b/ultralytics/nn/backends/coreml.py index 9f96c66ec7..d36dfc2e56 100644 --- a/ultralytics/nn/backends/coreml.py +++ b/ultralytics/nn/backends/coreml.py @@ -32,7 +32,9 @@ class CoreMLBackend(BaseBackend): LOGGER.info(f"Loading {weight} for CoreML inference...") self.model = ct.models.MLModel(weight) - self.dynamic = self.model.get_spec().description.input[0].type.HasField("multiArrayType") + spec = self.model.get_spec() + self.input_name = spec.description.input[0].name + self.dynamic = spec.description.input[0].type.HasField("multiArrayType") # Load metadata self.apply_metadata(dict(self.model.user_defined_metadata)) @@ -50,7 +52,7 @@ class CoreMLBackend(BaseBackend): h, w = im.shape[1:3] im = im.transpose(0, 3, 1, 2) if self.dynamic else Image.fromarray((im[0] * 255).astype("uint8")) - y = self.model.predict({"image": im}) + y = self.model.predict({self.input_name: im}) if "confidence" in y: # NMS included from ultralytics.utils.ops import xywh2xyxy diff --git a/ultralytics/utils/export/axelera.py b/ultralytics/utils/export/axelera.py index cdd3b0f390..35cd24f516 100644 --- a/ultralytics/utils/export/axelera.py +++ b/ultralytics/utils/export/axelera.py @@ -16,24 +16,26 @@ from ultralytics.utils.checks import check_requirements def torch2axelera( model: torch.nn.Module, - file: str | Path, + output_dir: Path | str, calibration_dataset: torch.utils.data.DataLoader, transform_fn: Callable[[Any], np.ndarray], + model_name: str = "model", metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Convert a YOLO model to Axelera format. Args: model (torch.nn.Module): Source YOLO model for quantization. - file (str | Path): Source model file path used to derive output names. + output_dir (Path | str): Directory to save the exported Axelera model. calibration_dataset (torch.utils.data.DataLoader): Calibration dataloader for quantization. transform_fn (Callable[[Any], np.ndarray]): Calibration preprocessing transform function. + model_name (str, optional): Name for the compiled model. Defaults to "model". metadata (dict | None, optional): Optional metadata to save as YAML. Defaults to None. prefix (str, optional): Prefix for log messages. Defaults to "". Returns: - (Path): Path to exported Axelera model directory. + (str): Path to exported Axelera model directory. """ prev_protobuf = os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" @@ -51,10 +53,8 @@ def torch2axelera( LOGGER.info(f"\n{prefix} starting export with Axelera compiler...") - file = Path(file) - model_name = file.stem - export_path = Path(f"{model_name}_axelera_model") - export_path.mkdir(exist_ok=True) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) axelera_model_metadata = extract_ultralytics_metadata(model) config = CompilerConfig( @@ -71,22 +71,22 @@ def torch2axelera( config=config, transform_fn=transform_fn, ) - compiler.compile(model=qmodel, config=config, output_dir=export_path) + compiler.compile(model=qmodel, config=config, output_dir=output_dir) for artifact in [f"{model_name}.axm", "compiler_config_final.toml"]: artifact_path = Path(artifact) if artifact_path.exists(): - artifact_path.replace(export_path / artifact_path.name) + artifact_path.replace(output_dir / artifact_path.name) # Remove intermediate compiler artifacts, keeping only the compiled model and config. keep_suffixes = {".axm"} keep_names = {"compiler_config_final.toml", "metadata.yaml"} - for f in export_path.iterdir(): + for f in output_dir.iterdir(): if f.is_file() and f.suffix not in keep_suffixes and f.name not in keep_names: f.unlink() if metadata is not None: - YAML.save(export_path / "metadata.yaml", metadata) + YAML.save(output_dir / "metadata.yaml", metadata) # Restore original PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION value if prev_protobuf is None: @@ -94,4 +94,4 @@ def torch2axelera( else: os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = prev_protobuf - return export_path + return str(output_dir) diff --git a/ultralytics/utils/export/coreml.py b/ultralytics/utils/export/coreml.py index 12c19dd129..6c6c72f15f 100644 --- a/ultralytics/utils/export/coreml.py +++ b/ultralytics/utils/export/coreml.py @@ -46,7 +46,7 @@ class IOSDetectModel(nn.Module): def pipeline_coreml( model: Any, - output_shape: tuple, + output_shape: tuple[int, ...], metadata: dict, mlmodel: bool = False, iou: float = 0.45, @@ -59,7 +59,7 @@ def pipeline_coreml( Args: model: CoreML model. - output_shape (tuple): Output shape tuple from the exporter. + output_shape (tuple[int, ...]): Output shape tuple from the exporter. metadata (dict): Model metadata. mlmodel (bool): Whether the model is an MLModel (vs MLProgram). iou (float): IoU threshold for NMS. @@ -168,13 +168,13 @@ def torch2coreml( inputs: list, im: torch.Tensor, classifier_names: list[str] | None, - coreml_file: Path | str | None = None, + output_file: Path | str | None = None, mlmodel: bool = False, half: bool = False, int8: bool = False, metadata: dict | None = None, prefix: str = "", -): +) -> Any: """Export a PyTorch model to CoreML ``.mlpackage`` or ``.mlmodel`` format. Args: @@ -182,7 +182,7 @@ def torch2coreml( inputs (list): CoreML input descriptions for the model. im (torch.Tensor): Example input tensor for tracing. classifier_names (list[str] | None): Class names for classifier config, or None if not a classifier. - coreml_file (Path | str | None): Output file path, or None to skip saving. + output_file (Path | str | None): Output file path, or None to skip saving. mlmodel (bool): Whether to export as ``.mlmodel`` (neural network) instead of ``.mlpackage`` (ML program). half (bool): Whether to quantize to FP16. int8 (bool): Whether to quantize to INT8. @@ -229,14 +229,14 @@ def torch2coreml( ct_model.version = m.pop("version", "") ct_model.user_defined_metadata.update({k: str(v) for k, v in m.items()}) - if coreml_file is not None: + if output_file is not None: try: - ct_model.save(str(coreml_file)) # save *.mlpackage + ct_model.save(str(output_file)) # save *.mlpackage except Exception as e: LOGGER.warning( f"{prefix} CoreML export to *.mlpackage failed ({e}), reverting to *.mlmodel export. " f"Known coremltools Python 3.11 and Windows bugs https://github.com/apple/coremltools/issues/1928." ) - coreml_file = Path(coreml_file).with_suffix(".mlmodel") - ct_model.save(str(coreml_file)) + output_file = Path(output_file).with_suffix(".mlmodel") + ct_model.save(str(output_file)) return ct_model diff --git a/ultralytics/utils/export/engine.py b/ultralytics/utils/export/engine.py index a2ce93e458..ab619761c4 100644 --- a/ultralytics/utils/export/engine.py +++ b/ultralytics/utils/export/engine.py @@ -44,46 +44,54 @@ def best_onnx_opset(onnx: types.ModuleType, cuda: bool = False) -> int: @ThreadingLocked() def torch2onnx( - torch_model: torch.nn.Module, - im: torch.Tensor, - onnx_file: str, + model: torch.nn.Module, + im: torch.Tensor | tuple[torch.Tensor, ...], + output_file: Path | str, opset: int = 14, - input_names: list[str] = ["images"], - output_names: list[str] = ["output0"], - dynamic: bool | dict = False, -) -> None: + input_names: list[str] | None = None, + output_names: list[str] | None = None, + dynamic: dict | None = None, +) -> str: """Export a PyTorch model to ONNX format. Args: - torch_model (torch.nn.Module): The PyTorch model to export. - im (torch.Tensor): Example input tensor for the model. - onnx_file (str): Path to save the exported ONNX file. + model (torch.nn.Module): The PyTorch model to export. + im (torch.Tensor | tuple[torch.Tensor, ...]): Example input tensor(s) for tracing. + output_file (Path | str): Path to save the exported ONNX file. opset (int): ONNX opset version to use for export. - input_names (list[str]): List of input tensor names. - output_names (list[str]): List of output tensor names. - dynamic (bool | dict, optional): Whether to enable dynamic axes. + input_names (list[str] | None): List of input tensor names. Defaults to ``["images"]``. + output_names (list[str] | None): List of output tensor names. Defaults to ``["output0"]``. + dynamic (dict | None): Dictionary specifying dynamic axes for inputs and outputs. + + Returns: + (str): Path to the exported ONNX file. Notes: Setting `do_constant_folding=True` may cause issues with DNN inference for torch>=1.12. """ + if input_names is None: + input_names = ["images"] + if output_names is None: + output_names = ["output0"] kwargs = {"dynamo": False} if TORCH_2_4 else {} torch.onnx.export( - torch_model, + model, im, - onnx_file, + output_file, verbose=False, opset_version=opset, do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False input_names=input_names, output_names=output_names, - dynamic_axes=dynamic or None, + dynamic_axes=dynamic, **kwargs, ) + return str(output_file) def onnx2engine( onnx_file: str, - engine_file: str | None = None, + output_file: Path | str | None = None, workspace: int | None = None, half: bool = False, int8: bool = False, @@ -94,12 +102,12 @@ def onnx2engine( metadata: dict | None = None, verbose: bool = False, prefix: str = "", -) -> None: +) -> str: """Export a YOLO model to TensorRT engine format. Args: onnx_file (str): Path to the ONNX file to be converted. - engine_file (str | None): Path to save the generated TensorRT engine file. + output_file (Path | str | None): Path to save the generated TensorRT engine file. workspace (int | None): Workspace size in GB for TensorRT. half (bool, optional): Enable FP16 precision. int8 (bool, optional): Enable INT8 precision. @@ -111,6 +119,9 @@ def onnx2engine( verbose (bool, optional): Enable verbose logging. prefix (str, optional): Prefix for log messages. + Returns: + (str): Path to the exported engine file. + Raises: ValueError: If DLA is enabled on non-Jetson devices or required precision is not set. RuntimeError: If the ONNX file cannot be parsed. @@ -122,7 +133,7 @@ def onnx2engine( """ import tensorrt as trt - engine_file = engine_file or Path(onnx_file).with_suffix(".engine") + output_file = output_file or Path(onnx_file).with_suffix(".engine") logger = trt.Logger(trt.Logger.INFO) if verbose: @@ -178,7 +189,7 @@ def onnx2engine( if int8 and not is_trt10: # deprecated in TensorRT 10, causes internal errors config.set_calibration_profile(profile) - LOGGER.info(f"{prefix} building {'INT8' if int8 else 'FP' + ('16' if half else '32')} engine as {engine_file}") + LOGGER.info(f"{prefix} building {'INT8' if int8 else 'FP' + ('16' if half else '32')} engine as {output_file}") if int8: config.set_flag(trt.BuilderFlag.INT8) config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED @@ -263,16 +274,17 @@ def onnx2engine( engine = builder.build_serialized_network(network, config) if engine is None: raise RuntimeError("TensorRT engine build failed, check logs for errors") - with open(engine_file, "wb") as t: + with open(output_file, "wb") as t: if metadata is not None: meta = json.dumps(metadata) t.write(len(meta).to_bytes(4, byteorder="little", signed=True)) t.write(meta.encode()) t.write(engine) else: - with builder.build_engine(network, config) as engine, open(engine_file, "wb") as t: + with builder.build_engine(network, config) as engine, open(output_file, "wb") as t: if metadata is not None: meta = json.dumps(metadata) t.write(len(meta).to_bytes(4, byteorder="little", signed=True)) t.write(meta.encode()) t.write(engine.serialize()) + return str(output_file) diff --git a/ultralytics/utils/export/executorch.py b/ultralytics/utils/export/executorch.py index 5606be8acc..15e00805b9 100644 --- a/ultralytics/utils/export/executorch.py +++ b/ultralytics/utils/export/executorch.py @@ -39,8 +39,8 @@ def _executorch_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) - def torch2executorch( model: torch.nn.Module, - file: Path | str, - sample_input: torch.Tensor, + im: torch.Tensor, + output_dir: Path | str, metadata: dict | None = None, prefix: str = "", ) -> str: @@ -48,8 +48,8 @@ def torch2executorch( Args: model (torch.nn.Module): The PyTorch model to export. - file (Path | str): Source model file path used to derive output names. - sample_input (torch.Tensor): Example input tensor for tracing/export. + im (torch.Tensor): Example input tensor for tracing/export. + output_dir (Path | str): Directory to save the exported ExecuTorch model. metadata (dict | None, optional): Optional metadata to save as YAML. prefix (str, optional): Prefix for log messages. @@ -62,13 +62,12 @@ def torch2executorch( LOGGER.info(f"\n{prefix} starting export with ExecuTorch {executorch_version.__version__}...") - file = Path(file) - output_dir = Path(str(file).replace(file.suffix, "_executorch_model")) + output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - pte_file = output_dir / file.with_suffix(".pte").name + pte_file = output_dir / "model.pte" et_program = to_edge_transform_and_lower( - torch.export.export(model, (sample_input,)), + torch.export.export(model, (im,)), partitioner=[XnnpackPartitioner()], ).to_executorch() pte_file.write_bytes(et_program.buffer) diff --git a/ultralytics/utils/export/imx.py b/ultralytics/utils/export/imx.py index ba8e462d71..21689891fa 100644 --- a/ultralytics/utils/export/imx.py +++ b/ultralytics/utils/export/imx.py @@ -203,7 +203,7 @@ class NMSWrapper(torch.nn.Module): def torch2imx( model: torch.nn.Module, - file: Path | str, + output_dir: Path | str, conf: float, iou: float, max_det: int, @@ -211,7 +211,7 @@ def torch2imx( gptq: bool = False, dataset=None, prefix: str = "", -): +) -> str: """Export YOLO model to IMX format for deployment on Sony IMX500 devices. This function quantizes a YOLO model using Model Compression Toolkit (MCT) and exports it to IMX format compatible @@ -220,7 +220,7 @@ def torch2imx( Args: model (torch.nn.Module): The YOLO model to export. Must be YOLOv8n or YOLO11n. - file (Path | str): Output file path for the exported model. + output_dir (Path | str): Directory to save the exported IMX model. conf (float): Confidence threshold for NMS post-processing. iou (float): IoU threshold for NMS post-processing. max_det (int): Maximum number of detections to return. @@ -231,7 +231,7 @@ def torch2imx( prefix (str, optional): Logging prefix string. Defaults to "". Returns: - (Path): Path to the exported IMX model directory. + (str): Path to the exported IMX model directory. Raises: ValueError: If the model is not a supported YOLOv8n or YOLO11n variant. @@ -239,7 +239,7 @@ def torch2imx( Examples: >>> from ultralytics import YOLO >>> model = YOLO("yolo11n.pt") - >>> path = torch2imx(model, "model.imx", conf=0.25, iou=0.7, max_det=300) + >>> path = torch2imx(model, "output_dir/", conf=0.25, iou=0.7, max_det=300) Notes: - Requires model_compression_toolkit, onnx, edgemdt_tpc, and edge-mdt-cl packages @@ -309,9 +309,9 @@ def torch2imx( task=model.task, ) - f = Path(str(file).replace(file.suffix, "_imx_model")) - f.mkdir(exist_ok=True) - onnx_model = f / Path(str(file.name).replace(file.suffix, "_imx.onnx")) # js dir + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + onnx_model = output_dir / "model_imx.onnx" with onnx_export_patch(): mct.exporter.pytorch_export_model( @@ -319,7 +319,7 @@ def torch2imx( ) model_onnx = onnx.load(onnx_model) # load onnx model - for k, v in metadata.items(): + for k, v in (metadata or {}).items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) @@ -334,12 +334,12 @@ def torch2imx( raise FileNotFoundError("imxconv-pt not found. Install with: pip install imx500-converter[pt]") subprocess.run( - [str(imxconv), "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"], + [str(imxconv), "-i", str(onnx_model), "-o", str(output_dir), "--no-input-persistency", "--overwrite-output"], check=True, ) # Needed for imx models. - with open(f / "labels.txt", "w", encoding="utf-8") as file: - file.writelines([f"{name}\n" for _, name in model.names.items()]) + with open(output_dir / "labels.txt", "w", encoding="utf-8") as labels_file: + labels_file.writelines([f"{name}\n" for _, name in model.names.items()]) - return f + return str(output_dir) diff --git a/ultralytics/utils/export/mnn.py b/ultralytics/utils/export/mnn.py index 3cebf890ec..0142f83385 100644 --- a/ultralytics/utils/export/mnn.py +++ b/ultralytics/utils/export/mnn.py @@ -9,8 +9,8 @@ from ultralytics.utils import LOGGER def onnx2mnn( - f_onnx: str, - file: Path | str, + onnx_file: str, + output_file: Path | str, half: bool = False, int8: bool = False, metadata: dict | None = None, @@ -19,8 +19,8 @@ def onnx2mnn( """Convert an ONNX model to MNN format. Args: - f_onnx (str): Path to the source ONNX file. - file (Path | str): Source model path used to derive the output ``.mnn`` path. + onnx_file (str): Path to the source ONNX file. + output_file (Path | str): Path to save the exported MNN model. half (bool): Whether to enable FP16 conversion. int8 (bool): Whether to enable INT8 weight quantization. metadata (dict | None): Optional metadata embedded via ``--bizCode``. @@ -33,23 +33,31 @@ def onnx2mnn( from ultralytics.utils.torch_utils import TORCH_1_10 assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults" - assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}" + assert Path(onnx_file).exists(), f"failed to export ONNX file: {onnx_file}" check_requirements("MNN>=2.9.6") import MNN from MNN.tools import mnnconvert LOGGER.info(f"\n{prefix} starting export with MNN {MNN.version()}...") - file = Path(file) - f = str(file.with_suffix(".mnn")) # MNN model file - mnn_args = ["", "-f", "ONNX", "--modelFile", f_onnx, "--MNNModel", f, "--bizCode", json.dumps(metadata or {})] + mnn_args = [ + "", + "-f", + "ONNX", + "--modelFile", + onnx_file, + "--MNNModel", + str(output_file), + "--bizCode", + json.dumps(metadata or {}), + ] if int8: mnn_args.extend(("--weightQuantBits", "8")) if half: mnn_args.append("--fp16") mnnconvert.convert(mnn_args) # Remove scratch file created during model convert optimize - convert_scratch = file.parent / ".__convert_external_data.bin" + convert_scratch = Path(output_file).parent / ".__convert_external_data.bin" if convert_scratch.exists(): convert_scratch.unlink() - return f + return str(output_file) diff --git a/ultralytics/utils/export/ncnn.py b/ultralytics/utils/export/ncnn.py index 28d33517d1..177e267ee7 100644 --- a/ultralytics/utils/export/ncnn.py +++ b/ultralytics/utils/export/ncnn.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path import torch @@ -13,7 +12,7 @@ from ultralytics.utils import LOGGER, YAML def torch2ncnn( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_dir: Path | str, half: bool = False, metadata: dict | None = None, device: torch.device | None = None, @@ -24,7 +23,7 @@ def torch2ncnn( Args: model (torch.nn.Module): The PyTorch model to export. im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model path used to derive the output directory. + output_dir (Path | str): Directory to save the exported NCNN model. half (bool): Whether to enable FP16 export. metadata (dict | None): Optional metadata saved as ``metadata.yaml``. device (torch.device | None): Device the model lives on. @@ -41,23 +40,22 @@ def torch2ncnn( import pnnx LOGGER.info(f"\n{prefix} starting export with NCNN {ncnn.__version__} and PNNX {pnnx.__version__}...") - file = Path(file) - f = Path(str(file).replace(file.suffix, f"_ncnn_model{os.sep}")) + output_dir = Path(output_dir) ncnn_args = dict( - ncnnparam=(f / "model.ncnn.param").as_posix(), - ncnnbin=(f / "model.ncnn.bin").as_posix(), - ncnnpy=(f / "model_ncnn.py").as_posix(), + ncnnparam=(output_dir / "model.ncnn.param").as_posix(), + ncnnbin=(output_dir / "model.ncnn.bin").as_posix(), + ncnnpy=(output_dir / "model_ncnn.py").as_posix(), ) pnnx_args = dict( - ptpath=(f / "model.pt").as_posix(), - pnnxparam=(f / "model.pnnx.param").as_posix(), - pnnxbin=(f / "model.pnnx.bin").as_posix(), - pnnxpy=(f / "model_pnnx.py").as_posix(), - pnnxonnx=(f / "model.pnnx.onnx").as_posix(), + ptpath=(output_dir / "model.pt").as_posix(), + pnnxparam=(output_dir / "model.pnnx.param").as_posix(), + pnnxbin=(output_dir / "model.pnnx.bin").as_posix(), + pnnxpy=(output_dir / "model_pnnx.py").as_posix(), + pnnxonnx=(output_dir / "model.pnnx.onnx").as_posix(), ) - f.mkdir(exist_ok=True) # make ncnn_model directory + output_dir.mkdir(parents=True, exist_ok=True) # make ncnn_model directory device_type = device.type if device is not None else "cpu" pnnx.export(model, inputs=im, **ncnn_args, **pnnx_args, fp16=half, device=device_type) @@ -65,5 +63,5 @@ def torch2ncnn( Path(f_debug).unlink(missing_ok=True) if metadata: - YAML.save(f / "metadata.yaml", metadata) # add metadata.yaml - return str(f) + YAML.save(output_dir / "metadata.yaml", metadata) # add metadata.yaml + return str(output_dir) diff --git a/ultralytics/utils/export/openvino.py b/ultralytics/utils/export/openvino.py index c804a982d5..d15f07dbda 100644 --- a/ultralytics/utils/export/openvino.py +++ b/ultralytics/utils/export/openvino.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path from typing import Any @@ -13,25 +12,25 @@ from ultralytics.utils import LOGGER def torch2openvino( model: torch.nn.Module, - im: torch.Tensor, - file: Path | str | None = None, + im: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor, ...], + output_dir: Path | str | None = None, dynamic: bool = False, half: bool = False, int8: bool = False, calibration_dataset: Any | None = None, ignored_scope: dict | None = None, prefix: str = "", -) -> str: +) -> Any: """Export a PyTorch model to OpenVINO format with optional INT8 quantization. Args: model (torch.nn.Module): The model to export (may be NMS-wrapped). - im (torch.Tensor): Example input tensor. - file (Path | str | None): Source model path used to derive output directory. + im (torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor, ...]): Example input tensor(s) for tracing. + output_dir (Path | str | None): Directory to save the exported OpenVINO model. dynamic (bool): Whether to use dynamic input shapes. half (bool): Whether to compress to FP16. int8 (bool): Whether to apply INT8 quantization. - calibration_dataset (nn.Dataset): Dataset for nncf.Dataset (required when ``int8=True``). + calibration_dataset (nncf.Dataset | None): Dataset for INT8 calibration (required when ``int8=True``). ignored_scope (dict | None): Kwargs passed to ``nncf.IgnoredScope`` for head patterns. prefix (str): Prefix for log messages. @@ -42,7 +41,8 @@ def torch2openvino( LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...") - ov_model = ov.convert_model(model, input=None if dynamic else [im.shape], example_input=im) + input_shape = [i.shape for i in im] if isinstance(im, (list, tuple)) else im.shape + ov_model = ov.convert_model(model, input=None if dynamic else input_shape, example_input=im) if int8: import nncf @@ -53,10 +53,9 @@ def torch2openvino( ignored_scope=ignored_scope, ) - if file is not None: - file = Path(file) - suffix = f"_{'int8_' if int8 else ''}openvino_model{os.sep}" - f = str(file).replace(file.suffix, suffix) - f_ov = str(Path(f) / file.with_suffix(".xml").name) - ov.save_model(ov_model, f_ov, compress_to_fp16=half) + if output_dir is not None: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + output_file = output_dir / "model.xml" + ov.save_model(ov_model, output_file, compress_to_fp16=half) return ov_model diff --git a/ultralytics/utils/export/paddle.py b/ultralytics/utils/export/paddle.py index 841a01a919..7d3f467019 100644 --- a/ultralytics/utils/export/paddle.py +++ b/ultralytics/utils/export/paddle.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path import torch @@ -13,7 +12,7 @@ from ultralytics.utils import ARM64, IS_JETSON, LOGGER, YAML def torch2paddle( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_dir: Path | str, metadata: dict | None = None, prefix: str = "", ) -> str: @@ -22,7 +21,7 @@ def torch2paddle( Args: model (torch.nn.Module): The PyTorch model to export. im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model path used to derive the output directory. + output_dir (Path | str): Directory to save the exported PaddlePaddle model. metadata (dict | None): Optional metadata saved as ``metadata.yaml``. prefix (str): Prefix for log messages. @@ -47,10 +46,8 @@ def torch2paddle( from x2paddle.convert import pytorch2paddle LOGGER.info(f"\n{prefix} starting export with X2Paddle {x2paddle.__version__}...") - file = Path(file) - f = str(file).replace(file.suffix, f"_paddle_model{os.sep}") - pytorch2paddle(module=model, save_dir=f, jit_type="trace", input_examples=[im]) # export + pytorch2paddle(module=model, save_dir=output_dir, jit_type="trace", input_examples=[im]) # export if metadata: - YAML.save(Path(f) / "metadata.yaml", metadata) # add metadata.yaml - return f + YAML.save(Path(output_dir) / "metadata.yaml", metadata) # add metadata.yaml + return str(output_dir) diff --git a/ultralytics/utils/export/rknn.py b/ultralytics/utils/export/rknn.py index d1aacec58b..1c8da6ba9d 100644 --- a/ultralytics/utils/export/rknn.py +++ b/ultralytics/utils/export/rknn.py @@ -8,21 +8,23 @@ from ultralytics.utils import IS_COLAB, LOGGER, YAML def onnx2rknn( - f_onnx: str, + onnx_file: str, + output_dir: Path | str, name: str = "rk3588", metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Export an ONNX model to RKNN format for Rockchip NPUs. Args: - f_onnx (str): Path to the source ONNX file (already exported, opset <=19). + onnx_file (str): Path to the source ONNX file (already exported, opset <=19). + output_dir (Path | str): Directory to save the exported RKNN model. name (str): Target platform name (e.g. ``"rk3588"``). metadata (dict | None): Metadata saved as ``metadata.yaml``. prefix (str): Prefix for log messages. Returns: - (Path): Path to the exported ``_rknn_model`` directory. + (str): Path to the exported ``_rknn_model`` directory. """ from ultralytics.utils.checks import check_requirements @@ -38,14 +40,14 @@ def onnx2rknn( from rknn.api import RKNN - export_path = Path(f"{Path(f_onnx).stem}_rknn_model") - export_path.mkdir(exist_ok=True) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) rknn = RKNN(verbose=False) rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform=name) - rknn.load_onnx(model=f_onnx) + rknn.load_onnx(model=onnx_file) rknn.build(do_quantization=False) # TODO: Add quantization support - rknn.export_rknn(str(export_path / f"{Path(f_onnx).stem}-{name}.rknn")) + rknn.export_rknn(str(output_dir / f"{Path(onnx_file).stem}-{name}.rknn")) if metadata: - YAML.save(export_path / "metadata.yaml", metadata) - return export_path + YAML.save(output_dir / "metadata.yaml", metadata) + return str(output_dir) diff --git a/ultralytics/utils/export/tensorflow.py b/ultralytics/utils/export/tensorflow.py index c70a709875..322edb76c9 100644 --- a/ultralytics/utils/export/tensorflow.py +++ b/ultralytics/utils/export/tensorflow.py @@ -59,19 +59,19 @@ def _tf_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) -> torch. def onnx2saved_model( onnx_file: str, - output_dir: Path, + output_dir: Path | str, int8: bool = False, - images: np.ndarray = None, + images: np.ndarray | None = None, disable_group_convolution: bool = False, - prefix="", + prefix: str = "", ): """Convert an ONNX model to TensorFlow SavedModel format using onnx2tf. Args: onnx_file (str): ONNX file path. - output_dir (Path): Output directory path for the SavedModel. + output_dir (Path | str): Output directory path for the SavedModel. int8 (bool, optional): Enable INT8 quantization. Defaults to False. - images (np.ndarray, optional): Calibration images for INT8 quantization in BHWC format. + images (np.ndarray | None, optional): Calibration images for INT8 quantization in BHWC format. disable_group_convolution (bool, optional): Disable group convolution optimization. Defaults to False. prefix (str, optional): Logging prefix. Defaults to "". @@ -82,6 +82,7 @@ def onnx2saved_model( - Requires onnx2tf package. Downloads calibration data if INT8 quantization is enabled. - Removes temporary files and renames quantized models after conversion. """ + output_dir = Path(output_dir) # Pre-download calibration file to fix https://github.com/PINTO0309/onnx2tf/issues/545 onnx2tf_file = Path("calibration_image_sample_data_20x128x128x3_float32.npy") if not onnx2tf_file.exists(): @@ -118,7 +119,7 @@ def onnx2saved_model( verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873 output_integer_quantized_tflite=int8, custom_input_op_name_np_data_path=np_data, - enable_batchmatmul_unfold=True and not int8, # fix lower no. of detected objects on GPU delegate + enable_batchmatmul_unfold=not int8, # fix lower no. of detected objects on GPU delegate output_signaturedefs=True, # fix error with Attention block group convolution disable_group_convolution=disable_group_convolution, # fix error with group convolution ) @@ -133,14 +134,17 @@ def onnx2saved_model( return keras_model -def keras2pb(keras_model, file: Path, prefix=""): +def keras2pb(keras_model, output_file: Path | str, prefix: str = "") -> str: """Convert a Keras model to TensorFlow GraphDef (.pb) format. Args: keras_model (keras.Model): Keras model to convert to frozen graph format. - file (Path): Output file path (suffix will be changed to .pb). + output_file (Path | str): Output file path (suffix will be changed to .pb). prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported ``.pb`` file. + Notes: Creates a frozen graph by converting variables to constants for inference optimization. """ @@ -152,10 +156,14 @@ def keras2pb(keras_model, file: Path, prefix=""): m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) frozen_func = convert_variables_to_constants_v2(m) frozen_func.graph.as_graph_def() - tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(file.parent), name=file.name, as_text=False) + output_file = Path(output_file) + tf.io.write_graph( + graph_or_graph_def=frozen_func.graph, logdir=str(output_file.parent), name=output_file.name, as_text=False + ) + return str(output_file) -def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str = ""): +def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str = "") -> str: """Convert a TensorFlow Lite model to Edge TPU format using the Edge TPU compiler. Args: @@ -163,6 +171,9 @@ def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str output_dir (str | Path): Output directory path for the compiled Edge TPU model. prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported Edge TPU model file. + Notes: Requires the Edge TPU compiler to be installed. The function compiles the TFLite model for optimal performance on Google's Edge TPU hardware accelerator. @@ -180,9 +191,10 @@ def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str ) LOGGER.info(f"{prefix} running '{cmd}'") subprocess.run(cmd, shell=True) + return str(Path(output_dir) / f"{Path(tflite_file).stem}_edgetpu.tflite") -def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = False, prefix: str = ""): +def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = False, prefix: str = "") -> str: """Convert a TensorFlow GraphDef (.pb) model to TensorFlow.js format. Args: @@ -192,6 +204,9 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals int8 (bool, optional): Enable INT8 quantization. Defaults to False. prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported TensorFlow.js model directory. + Notes: Requires tensorflowjs package. Uses tensorflowjs_converter command-line tool for conversion. Handles spaces in file paths and warns if output directory contains spaces. @@ -204,8 +219,8 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals LOGGER.info(f"\n{prefix} starting export with tensorflowjs {tfjs.__version__}...") gd = tf.Graph().as_graph_def() # TF GraphDef - with open(pb_file, "rb") as file: - gd.ParseFromString(file.read()) + with open(pb_file, "rb") as f: + gd.ParseFromString(f.read()) outputs = ",".join(gd_outputs(gd)) LOGGER.info(f"\n{prefix} output node names: {outputs}") @@ -220,6 +235,7 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals if " " in output_dir: LOGGER.warning(f"{prefix} your model may not work correctly with spaces in path '{output_dir}'.") + return str(output_dir) def gd_outputs(gd): diff --git a/ultralytics/utils/export/torchscript.py b/ultralytics/utils/export/torchscript.py index 6805eac217..077b69f075 100644 --- a/ultralytics/utils/export/torchscript.py +++ b/ultralytics/utils/export/torchscript.py @@ -13,35 +13,34 @@ from ultralytics.utils import LOGGER, TORCH_VERSION def torch2torchscript( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_file: Path | str, optimize: bool = False, metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Export a PyTorch model to TorchScript format. Args: model (torch.nn.Module): The PyTorch model to export (may be NMS-wrapped). im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model file path used to derive output path. + output_file (Path | str): Path to save the exported TorchScript model. optimize (bool): Whether to optimize for mobile deployment. metadata (dict | None): Optional metadata to embed in the TorchScript archive. prefix (str): Prefix for log messages. Returns: - (Path): Path to the exported ``.torchscript`` file. + (str): Path to the exported ``.torchscript`` file. """ LOGGER.info(f"\n{prefix} starting export with torch {TORCH_VERSION}...") - file = Path(file) - f = file.with_suffix(".torchscript") + output_file = str(output_file) ts = torch.jit.trace(model, im, strict=False) extra_files = {"config.txt": json.dumps(metadata or {})} # torch._C.ExtraFilesMap() if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html LOGGER.info(f"{prefix} optimizing for mobile...") from torch.utils.mobile_optimizer import optimize_for_mobile - optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) + optimize_for_mobile(ts)._save_for_lite_interpreter(output_file, _extra_files=extra_files) else: - ts.save(str(f), _extra_files=extra_files) - return f + ts.save(output_file, _extra_files=extra_files) + return output_file From d93fb45e033d9447fe58918b1265687d8aabb0b5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 16 Apr 2026 15:43:22 +0200 Subject: [PATCH 15/25] Fix redirected URLs (#24255) --- CONTRIBUTING.md | 10 +++++----- README.md | 2 +- README.zh-CN.md | 2 +- docs/en/datasets/explorer/index.md | 6 +++--- docs/en/guides/heatmaps.md | 2 +- docs/en/guides/ros-quickstart.md | 2 +- docs/en/guides/vertex-ai-deployment-with-docker.md | 4 ++-- docs/en/help/contributing.md | 6 +++--- docs/en/index.md | 2 +- docs/en/integrations/axelera.md | 2 +- docs/en/integrations/google-colab.md | 4 ++-- docs/en/integrations/ibm-watsonx.md | 2 +- docs/en/integrations/weights-biases.md | 2 +- docs/en/platform/account/billing.md | 2 +- docs/en/platform/account/settings.md | 2 +- docs/en/platform/account/teams.md | 6 +++--- docs/en/platform/api/index.md | 2 +- docs/en/platform/index.md | 2 +- docs/en/platform/train/projects.md | 2 +- .../environments/google_cloud_quickstart_tutorial.md | 2 +- docs/en/yolov5/tutorials/train_custom_data.md | 2 +- examples/RTDETR-ONNXRuntime-Python/README.md | 2 +- examples/YOLO-Series-ONNXRuntime-Rust/README.md | 4 ++-- examples/YOLOv8-ONNXRuntime-CPP/README.md | 4 ++-- examples/YOLOv8-ONNXRuntime-Rust/README.md | 2 +- .../YOLOv8-Segmentation-ONNXRuntime-Python/README.md | 2 +- 26 files changed, 40 insertions(+), 40 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dacd46261d..4939441c29 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,9 +29,9 @@ First-time contributors are expected to submit small, well-scoped pull requests. #### Established Contributors -Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values/). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards. +Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards. -Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development/) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics. +Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics. #### Feature PRs @@ -156,11 +156,11 @@ We highly value bug reports as they help us improve the quality and reliability Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation. -We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community. +We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community. ## 🌍 Open-Sourcing Your YOLO Project Under AGPL-3.0 -Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. +Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. ### Why AGPL-3.0 Compliance Matters @@ -179,7 +179,7 @@ Complying means making the **complete corresponding source code** of your projec - **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO. 2. **License Your Project:** - - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3). + - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0). - Add a notice at the top of each source file indicating the license. 3. **Publish Your Source Code:** diff --git a/README.md b/README.md index f67fbdc864..7d6ddf4b1c 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ We look forward to your contributions to help make the Ultralytics ecosystem eve Ultralytics offers two licensing options to suit different needs: -- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. +- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. - **Ultralytics Enterprise License**: Designed for commercial use, this license allows for the seamless integration of Ultralytics software and AI models into commercial products and services, bypassing the open-source requirements of AGPL-3.0. If your use case involves commercial deployment, please contact us via [Ultralytics Licensing](https://www.ultralytics.com/license). ## 📞 Contact diff --git a/README.zh-CN.md b/README.zh-CN.md index a064cc07b5..8179b41271 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -252,7 +252,7 @@ Ultralytics 支持广泛的 YOLO 模型,从早期的版本如 [YOLOv3](https:/ Ultralytics 提供两种许可选项以满足不同需求: -- **AGPL-3.0 许可证**:这种经 [OSI 批准](https://opensource.org/license/agpl-v3)的开源许可证非常适合学生、研究人员和爱好者。它鼓励开放协作和知识共享。有关完整详细信息,请参阅 [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 文件。 +- **AGPL-3.0 许可证**:这种经 [OSI 批准](https://opensource.org/license/agpl-3.0)的开源许可证非常适合学生、研究人员和爱好者。它鼓励开放协作和知识共享。有关完整详细信息,请参阅 [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 文件。 - **Ultralytics 企业许可证**:专为商业用途设计,此许可证允许将 Ultralytics 软件和 AI 模型无缝集成到商业产品和服务中,绕过 AGPL-3.0 的开源要求。如果您的使用场景涉及商业部署,请通过 [Ultralytics 授权许可](https://www.ultralytics.com/license)与我们联系。 ## 📞 联系方式 diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md index 4844474705..7d119ba806 100644 --- a/docs/en/datasets/explorer/index.md +++ b/docs/en/datasets/explorer/index.md @@ -39,7 +39,7 @@ pip install ultralytics[explorer] !!! tip - Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory. + Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://www.lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory. ## Explorer API @@ -68,7 +68,7 @@ yolo explorer ### What is Ultralytics Explorer and how can it help with CV datasets? -Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process. +Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://www.lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process. Learn more about the [Explorer API](api.md). @@ -80,7 +80,7 @@ To manually install the optional dependencies needed for Ultralytics Explorer, y pip install ultralytics[explorer] ``` -These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md). +These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://www.lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md). ### How can I use the GUI version of Ultralytics Explorer? diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md index a3a27cecac..9c49d1b874 100644 --- a/docs/en/guides/heatmaps.md +++ b/docs/en/guides/heatmaps.md @@ -216,4 +216,4 @@ cv2.destroyAllWindows() ### Why should businesses choose Ultralytics YOLO26 for heatmap generation in data analysis? -Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans). +Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/pricing). diff --git a/docs/en/guides/ros-quickstart.md b/docs/en/guides/ros-quickstart.md index f2f4594c16..aa6eb76096 100644 --- a/docs/en/guides/ros-quickstart.md +++ b/docs/en/guides/ros-quickstart.md @@ -243,7 +243,7 @@ Using YOLO, it is possible to extract and combine information from both RGB and !!! warning "RGB-D Cameras" - When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment. + When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://www.realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment. #### Depth Step-by-Step Usage diff --git a/docs/en/guides/vertex-ai-deployment-with-docker.md b/docs/en/guides/vertex-ai-deployment-with-docker.md index 7352616722..8612e93c22 100644 --- a/docs/en/guides/vertex-ai-deployment-with-docker.md +++ b/docs/en/guides/vertex-ai-deployment-with-docker.md @@ -27,7 +27,7 @@ Before we start, you will need to create a Google Cloud Platform (GCP) project. ## Prerequisites 1. Install [Docker](https://docs.docker.com/engine/install/) on your machine. -2. Install the [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) and [authenticate for using the gcloud CLI](https://cloud.google.com/docs/authentication/gcloud). +2. Install the [Google Cloud SDK](https://docs.cloud.google.com/sdk/docs/install-sdk) and [authenticate for using the gcloud CLI](https://docs.cloud.google.com/docs/authentication/gcloud). 3. It is highly recommended that you go through the [Docker Quickstart Guide for Ultralytics](https://docs.ultralytics.com/guides/docker-quickstart/), because you will need to extend one of the official Ultralytics Docker images while following this guide. ## 1. Create an inference backend with FastAPI @@ -507,7 +507,7 @@ docker push YOUR_REGION-docker.pkg.dev/YOUR_PROJECT_ID/YOUR_REPOSITORY_NAME/IMAG Wait for the process to complete. You should now see the image in your Artifact Registry repository. -For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling). +For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://docs.cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling). ## 4. Import your model in Vertex AI diff --git a/docs/en/help/contributing.md b/docs/en/help/contributing.md index 32e930ef6e..143af3d1a0 100644 --- a/docs/en/help/contributing.md +++ b/docs/en/help/contributing.md @@ -207,11 +207,11 @@ We highly value bug reports as they help us improve the quality and reliability Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation. -We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community. +We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community. ## 🌍 Open-Sourcing Your YOLO Project Under AGPL-3.0 -Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. +Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. ### Why AGPL-3.0 Compliance Matters @@ -230,7 +230,7 @@ Complying means making the **complete corresponding source code** of your projec - **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO. 2. **License Your Project:** - - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3). + - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0). - Add a notice at the top of each source file indicating the license. 3. **Publish Your Source Code:** diff --git a/docs/en/index.md b/docs/en/index.md index dc085171a3..862712974b 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -164,7 +164,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde Ultralytics offers two licensing options to accommodate diverse use cases: -- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. +- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. - **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://www.ultralytics.com/license). Our licensing strategy is designed to ensure that any improvements to our open-source projects are returned to the community. We believe in open source, and our mission is to ensure that our contributions can be used and expanded in ways that benefit everyone. diff --git a/docs/en/integrations/axelera.md b/docs/en/integrations/axelera.md index ae49e77214..d58a6ac356 100644 --- a/docs/en/integrations/axelera.md +++ b/docs/en/integrations/axelera.md @@ -6,7 +6,7 @@ keywords: Axelera AI, Metis AIPU, Voyager SDK, Edge AI, YOLOv8, YOLO11, YOLO26, # Axelera AI Export and Deployment -Ultralytics partners with [Axelera AI](https://www.axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metis® AIPU** using the **Voyager SDK**. +Ultralytics partners with [Axelera AI](https://axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metis® AIPU** using the **Voyager SDK**. ![Axelera AI edge deployment ecosystem for YOLO](https://github.com/user-attachments/assets/c97a0297-390d-47df-bb13-ff1aa499f34a) diff --git a/docs/en/integrations/google-colab.md b/docs/en/integrations/google-colab.md index c5f758576a..af3269d3c7 100644 --- a/docs/en/integrations/google-colab.md +++ b/docs/en/integrations/google-colab.md @@ -103,7 +103,7 @@ If you'd like to dive deeper into Google Colab, here are a few resources to guid - **[Image Segmentation with Ultralytics YOLO26 on Google Colab](https://www.ultralytics.com/blog/image-segmentation-with-ultralytics-yolo11-on-google-colab)**: Explore how to perform image segmentation tasks using YOLO26 in the Google Colab environment, with practical examples using datasets like the Roboflow Carparts Segmentation Dataset. -- **[Curated Notebooks](https://colab.google/notebooks/)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas. +- **[Curated Notebooks](https://developers.google.com/colab)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas. - **[Google Colab's Medium Page](https://medium.com/google-colab)**: You can find tutorials, updates, and community contributions here that can help you better understand and utilize this tool. @@ -130,7 +130,7 @@ Google Colab offers several advantages for training YOLO26 models: - **Integration with Google Drive:** Easily store and access datasets and models. - **Collaboration:** Share notebooks with others and collaborate in real-time. -For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://colab.google/notebooks/). +For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://developers.google.com/colab). ### How can I handle Google Colab session timeouts during YOLO26 training? diff --git a/docs/en/integrations/ibm-watsonx.md b/docs/en/integrations/ibm-watsonx.md index 0df5e21ea1..518aaeda7d 100644 --- a/docs/en/integrations/ibm-watsonx.md +++ b/docs/en/integrations/ibm-watsonx.md @@ -87,7 +87,7 @@ Then, you can import the needed packages. For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLO26 model to detect and classify litter and biological objects in underwater images. -We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md) under the section "API credentials". +We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md) under the section "API credentials". Copy and paste your Kaggle username and API key into the following code. Then run the code to install the API and load the dataset into Watsonx. diff --git a/docs/en/integrations/weights-biases.md b/docs/en/integrations/weights-biases.md index 6b4ac5dc56..1b0534b1da 100644 --- a/docs/en/integrations/weights-biases.md +++ b/docs/en/integrations/weights-biases.md @@ -214,7 +214,7 @@ These features help in tracking experiments, optimizing models, and collaboratin After running your training script with W&B integration: 1. A link to your W&B dashboard will be provided in the console output. -2. Click on the link or go to [wandb.ai](https://wandb.ai/) and log in to your account. +2. Click on the link or go to [wandb.ai](https://wandb.ai/site) and log in to your account. 3. Navigate to your project to view detailed metrics, visualizations, and model performance data. The dashboard offers insights into your model's training process, allowing you to analyze and improve your YOLO26 models effectively. diff --git a/docs/en/platform/account/billing.md b/docs/en/platform/account/billing.md index 2cc39a4ee1..7564546386 100644 --- a/docs/en/platform/account/billing.md +++ b/docs/en/platform/account/billing.md @@ -84,7 +84,7 @@ For organizations with advanced needs: - SLA guarantees (coming soon) - Enterprise support -See [Ultralytics Licensing](https://www.ultralytics.com/licensing) for Enterprise plan details. +See [Ultralytics Licensing](https://www.ultralytics.com/license) for Enterprise plan details. ## Credits diff --git a/docs/en/platform/account/settings.md b/docs/en/platform/account/settings.md index 7a126c4096..0de67bd8e8 100644 --- a/docs/en/platform/account/settings.md +++ b/docs/en/platform/account/settings.md @@ -237,7 +237,7 @@ From this tab you can: - **Compare features** across Free, Pro, and Enterprise tiers - **Upgrade to Pro** to unlock more storage, models, team collaboration, and priority GPU access -- **Review Enterprise** capabilities including SSO/SAML, RBAC, and commercial licensing — see [Ultralytics Licensing](https://www.ultralytics.com/licensing) +- **Review Enterprise** capabilities including SSO/SAML, RBAC, and commercial licensing — see [Ultralytics Licensing](https://www.ultralytics.com/license) See [Billing](billing.md) for detailed plan information, pricing, and upgrade instructions. diff --git a/docs/en/platform/account/teams.md b/docs/en/platform/account/teams.md index ae7b8633f4..52a25d45e3 100644 --- a/docs/en/platform/account/teams.md +++ b/docs/en/platform/account/teams.md @@ -101,11 +101,11 @@ Enterprise plans include additional capabilities for organizations with advanced !!! warning "License Expiration" - If your Enterprise license expires, workspace access is blocked until the license is renewed. See [Ultralytics Licensing](https://www.ultralytics.com/licensing) for details. + If your Enterprise license expires, workspace access is blocked until the license is renewed. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details. ### Getting Started with Enterprise -Enterprise plans are provisioned by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/licensing) for plan details. Once your enterprise configuration is set up, you'll receive a provisioning invite to accept as the team Owner, after which you can invite your team members. +Enterprise plans are provisioned by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details. Once your enterprise configuration is set up, you'll receive a provisioning invite to accept as the team Owner, after which you can invite your team members. ## FAQ @@ -123,4 +123,4 @@ All team members share a single credit balance. The Owner and Admins can top up ### How do I upgrade from Pro to Enterprise? -Enterprise pricing and provisioning are handled directly by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/licensing) for plan details. +Enterprise pricing and provisioning are handled directly by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details. diff --git a/docs/en/platform/api/index.md b/docs/en/platform/api/index.md index 8497b1fa7d..b5662ef636 100644 --- a/docs/en/platform/api/index.md +++ b/docs/en/platform/api/index.md @@ -2183,7 +2183,7 @@ Webhooks notify your server of Platform events via HTTP POST callbacks: **All plans**: Training webhooks via the Python SDK (real-time metrics, completion notifications) work automatically on every plan -- no configuration required. - **Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. See [Ultralytics Licensing](https://www.ultralytics.com/licensing) for details. + **Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details. --- diff --git a/docs/en/platform/index.md b/docs/en/platform/index.md index 062ecc2efb..eb8bbb1dbc 100644 --- a/docs/en/platform/index.md +++ b/docs/en/platform/index.md @@ -473,4 +473,4 @@ See [Models Export](train/models.md#export-model), the [Export mode guide](../mo ??? question "Can I use Platform models commercially?" - Free and Pro plans use the AGPL license. For commercial use without AGPL requirements, see [Ultralytics Licensing](https://www.ultralytics.com/licensing). + Free and Pro plans use the AGPL license. For commercial use without AGPL requirements, see [Ultralytics Licensing](https://www.ultralytics.com/license). diff --git a/docs/en/platform/train/projects.md b/docs/en/platform/train/projects.md index 8757654afc..8c75f48c95 100644 --- a/docs/en/platform/train/projects.md +++ b/docs/en/platform/train/projects.md @@ -40,7 +40,7 @@ Enter your project details: - **Name**: A descriptive name for your project (a random name is auto-generated) - **Description**: Optional notes about the project purpose - **Visibility**: Public (anyone can view) or Private (only you can access) -- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements and is available with an Enterprise plan — see [Ultralytics Licensing](https://www.ultralytics.com/licensing). +- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements and is available with an Enterprise plan — see [Ultralytics Licensing](https://www.ultralytics.com/license). ![Ultralytics Platform New Project Dialog Name Visibility License](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-new-project-dialog-name-visibility-license.avif) diff --git a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md index f507986bc8..8c591409e2 100644 --- a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md +++ b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md @@ -8,7 +8,7 @@ keywords: YOLOv5, Google Cloud Platform, GCP, Deep Learning VM, object detection Embarking on the journey of [artificial intelligence (AI)](https://www.ultralytics.com/glossary/artificial-intelligence-ai) and [machine learning (ML)](https://www.ultralytics.com/glossary/machine-learning-ml) can be exhilarating, especially when you leverage the power and flexibility of a [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) platform. Google Cloud Platform (GCP) offers robust tools tailored for ML enthusiasts and professionals alike. One such tool is the Deep Learning VM, preconfigured for data science and ML tasks. In this tutorial, we will navigate the process of setting up [Ultralytics YOLOv5](../../models/yolov5.md) on a [GCP Deep Learning VM](https://docs.cloud.google.com/deep-learning-vm/docs). Whether you're taking your first steps in ML or you're a seasoned practitioner, this guide provides a clear pathway to implementing [object detection](https://www.ultralytics.com/glossary/object-detection) models powered by YOLOv5. -🆓 Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://cloud.google.com/free/docs/free-cloud-features#free-trial) to kickstart your projects. +🆓 Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://docs.cloud.google.com/free/docs/free-cloud-features) to kickstart your projects. In addition to GCP, explore other accessible quickstart options for YOLOv5, like our [Google Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab for a browser-based experience, or the scalability of [Amazon AWS](./aws_quickstart_tutorial.md). Furthermore, container aficionados can utilize our official Docker image available on [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls for an encapsulated environment, following our [Docker Quickstart Guide](../../guides/docker-quickstart.md). diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md index 426a73fc18..6d4048ff83 100644 --- a/docs/en/yolov5/tutorials/train_custom_data.md +++ b/docs/en/yolov5/tutorials/train_custom_data.md @@ -39,7 +39,7 @@ Developing a custom [object detection](https://docs.ultralytics.com/tasks/detect Ultralytics provides two licensing options to accommodate diverse usage scenarios: - - **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. + - **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. - **Enterprise License**: Designed for commercial applications, this license permits the seamless integration of Ultralytics software and AI models into commercial products and services without the open-source stipulations of AGPL-3.0. If your project requires commercial deployment, request an [Enterprise License](https://www.ultralytics.com/license). Explore our licensing options further on the [Ultralytics Licensing](https://www.ultralytics.com/license) page. diff --git a/examples/RTDETR-ONNXRuntime-Python/README.md b/examples/RTDETR-ONNXRuntime-Python/README.md index 398a662586..b558b6f092 100644 --- a/examples/RTDETR-ONNXRuntime-Python/README.md +++ b/examples/RTDETR-ONNXRuntime-Python/README.md @@ -16,7 +16,7 @@ pip install -r requirements.txt ### Installing `onnxruntime-gpu` (Optional) -For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions. +For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda/toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions. ```bash pip install onnxruntime-gpu diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/README.md b/examples/YOLO-Series-ONNXRuntime-Rust/README.md index cbb8839158..40292746f4 100644 --- a/examples/YOLO-Series-ONNXRuntime-Rust/README.md +++ b/examples/YOLO-Series-ONNXRuntime-Rust/README.md @@ -12,7 +12,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U - **Extensive Model Compatibility**: Supports a wide range of YOLO versions including [YOLOv5](https://docs.ultralytics.com/models/yolov5/), [YOLOv6](https://docs.ultralytics.com/models/yolov6/), [YOLOv7](https://docs.ultralytics.com/models/yolov7/), [YOLOv8](https://docs.ultralytics.com/models/yolov8/), [YOLOv9](https://docs.ultralytics.com/models/yolov9/), [YOLOv10](https://docs.ultralytics.com/models/yolov10/), [YOLO11](https://docs.ultralytics.com/models/yolo11/), [YOLO-World](https://docs.ultralytics.com/models/yolo-world/), [RT-DETR](https://docs.ultralytics.com/models/rtdetr/), and others. - **Versatile Task Coverage**: Includes examples for `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`. - **Precision Flexibility**: Works seamlessly with `FP16` and `FP32` precision [ONNX models](https://docs.ultralytics.com/integrations/onnx/). -- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda-toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/). +- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda/toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/). - **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input. - **Flexible Data Loading**: The `DataLoader` component handles images, folders, videos, and real-time video streams. - **Real-Time Display and Video Export**: The `Viewer` provides real-time frame visualization and video export functions, similar to OpenCV’s `imshow()` and `imwrite()`. @@ -45,7 +45,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U ### 2. [Optional] Install CUDA, CuDNN, and TensorRT -- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) version `12.x`. +- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) version `12.x`. - The TensorRT execution provider requires both CUDA `12.x` and [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt) `10.x`. Ensure [cuDNN](https://developer.nvidia.com/cudnn) is also correctly installed. ### 3. [Optional] Install ffmpeg diff --git a/examples/YOLOv8-ONNXRuntime-CPP/README.md b/examples/YOLOv8-ONNXRuntime-CPP/README.md index ec5b3cf314..49fa403e75 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/README.md +++ b/examples/YOLOv8-ONNXRuntime-CPP/README.md @@ -8,7 +8,7 @@ This example provides a practical guide on performing inference with [Ultralytic - **Deployment-Friendly:** Well-suited for deployment in industrial and production environments. - **Performance:** Offers faster [inference latency](https://www.ultralytics.com/glossary/inference-latency) compared to OpenCV's DNN module on both CPU and [GPU](https://www.ultralytics.com/glossary/gpu-graphics-processing-unit). -- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda-toolkit). +- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda/toolkit). ## ☕ Note @@ -85,7 +85,7 @@ Ensure you have the following dependencies installed: | [OpenCV](https://opencv.org/releases/) | >=4.0.0 | Required for image loading and preprocessing. | | C++ Compiler | C++17 Support | Needed for features like ``. ([GCC](https://gcc.gnu.org/), [Clang](https://clang.llvm.org/), [MSVC](https://visualstudio.microsoft.com/vs/features/cplusplus/)) | | [CMake](https://cmake.org/download/) | >=3.18 | Cross-platform build system generator. Version 3.18+ recommended for better CUDA support discovery. | -| [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. | +| [CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. | | [cuDNN](https://developer.nvidia.com/cudnn) (CUDA required) | =8.x | Required by CUDA Execution Provider. **Must be cuDNN 8.x** compatible with your CUDA 11.x version. | **Important Notes:** diff --git a/examples/YOLOv8-ONNXRuntime-Rust/README.md b/examples/YOLOv8-ONNXRuntime-Rust/README.md index 6a9b6552f5..cf99fc1536 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/README.md +++ b/examples/YOLOv8-ONNXRuntime-Rust/README.md @@ -35,7 +35,7 @@ Please follow the official Rust installation guide: [https://www.rust-lang.org/t ### 3. [Optional] Install CUDA & CuDNN & TensorRT -- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda-toolkit) v11.6+. +- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda/toolkit) v11.6+. - The TensorRT execution provider requires CUDA v11.4+ and [TensorRT](https://developer.nvidia.com/tensorrt) v8.4+. You may also need [cuDNN](https://developer.nvidia.com/cudnn). ## ▶️ Get Started diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md index c5fed39006..280f7bc5be 100644 --- a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md +++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md @@ -54,7 +54,7 @@ We welcome contributions to improve this demo! If you encounter bugs, have featu ## 📄 License -This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-v3). +This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-3.0). ## 🙏 Acknowledgments From eefe58d89a3c8875c1a7d47a3eb60150f9e09d0e Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Thu, 16 Apr 2026 22:21:31 -0500 Subject: [PATCH 16/25] # model.model.model[-1].savpe.init_weights() --- train_yoloe26.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_yoloe26.py b/train_yoloe26.py index 1d298018a6..f5925d594a 100644 --- a/train_yoloe26.py +++ b/train_yoloe26.py @@ -379,7 +379,7 @@ if args.trainer == "YOLOETrainerFromScratch" or args.trainer== "YOLOESegTrainerF elif args.trainer == "YOLOEVPTrainer": print("Using YOLOEVPTrainer for training.") # reinit the model.model.savpe. - model.model.model[-1].savpe.init_weights() + # model.model.model[-1].savpe.init_weights() # freeze every layer except of the savpe module. head_index = len(model.model.model) - 1 From d4380d3c643d68921f6c60f199deba7902c7827a Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Thu, 16 Apr 2026 23:06:32 -0500 Subject: [PATCH 17/25] add old_engine1cache --- train_yoloe26.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/train_yoloe26.py b/train_yoloe26.py index f5925d594a..f6af379d79 100644 --- a/train_yoloe26.py +++ b/train_yoloe26.py @@ -258,6 +258,51 @@ DATA_CONFIG["old_enginecache"] = dict( ) ) +DATA_CONFIG["old_enginecache"] = dict( + train=dict( + grounding_data=[ + dict( + img_path="../datasets/Objects365v1/images/train", + json_file="../datasets/Objects365v1/annotations/objects365_train_segm.engine.cache", + ) , + dict( + img_path="../datasets/flickr/full_images/", + json_file="../datasets/flickr/annotations/final_flickr_separateGT_train_segm.engine.cache" + ), + dict( + img_path="../datasets/mixed_grounding/gqa/images", + json_file="../datasets/mixed_grounding/annotations/final_mixed_train_no_coco_segm.engine.cache" + ), + ] + ), + val=dict( + yolo_data=["../datasets/lvis.yaml"] + ) +) + +DATA_CONFIG["old_engine1cache"] = dict( + train=dict( + grounding_data=[ + dict( + img_path="../datasets/Objects365v1/images/train", + json_file="../datasets/Objects365v1/annotations/objects365_train_segm.engine1.cache", + ) , + dict( + img_path="../datasets/flickr/full_images/", + json_file="../datasets/flickr/annotations/final_flickr_separateGT_train_segm.engine1.cache" + ), + dict( + img_path="../datasets/mixed_grounding/gqa/images", + json_file="../datasets/mixed_grounding/annotations/final_mixed_train_no_coco_segm.engine1.cache" + ), + ] + ), + val=dict( + yolo_data=["../datasets/lvis.yaml"] + ) +) + + import ultralytics,os From 11ed37279baa7c0959dd63fcbe13bd2496202c51 Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Fri, 17 Apr 2026 00:51:14 -0500 Subject: [PATCH 18/25] feat: add missing text embeddings preprocessing in preprocess_batch --- ultralytics/models/yolo/world/train.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ultralytics/models/yolo/world/train.py b/ultralytics/models/yolo/world/train.py index 19dc4ccd30..75a7fe3b8a 100644 --- a/ultralytics/models/yolo/world/train.py +++ b/ultralytics/models/yolo/world/train.py @@ -166,6 +166,14 @@ class WorldTrainer(DetectionTrainer): # Add text features texts = list(itertools.chain(*batch["texts"])) + missing = [t for t in texts if t not in self.text_embeddings] + if missing: + missing_unique = list(set(missing)) + LOGGER.warning(f"Missing {len(missing_unique)} text embeddings, generating on the fly: {missing_unique}") + new_feats = unwrap_model(self.model).get_text_pe(missing_unique, len(missing_unique), cache_clip_model=False) + for name, feat in zip(missing_unique, new_feats.squeeze(0)): + self.text_embeddings[name] = feat + txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to( self.device, non_blocking=self.device.type == "cuda" ) From 0579ac71fc3c0ac0b136ec94597d186fb65073c2 Mon Sep 17 00:00:00 2001 From: Murat Raimbekov Date: Fri, 17 Apr 2026 12:08:35 +0600 Subject: [PATCH 19/25] Add CLI parity for region, security, and parking solutions (#24251) Co-authored-by: UltralyticsAssistant Co-authored-by: Onuralp SEZER --- docs/en/guides/parking-management.md | 9 ++++++++ docs/en/guides/region-counting.md | 6 ++++++ docs/en/guides/security-alarm-system.md | 9 ++++++++ docs/en/usage/cli.md | 28 +++++++++++++++++++++++++ ultralytics/cfg/__init__.py | 14 ++++++++++++- ultralytics/solutions/security_alarm.py | 2 +- 6 files changed, 66 insertions(+), 2 deletions(-) diff --git a/docs/en/guides/parking-management.md b/docs/en/guides/parking-management.md index d4619d3c40..7037ff9fed 100644 --- a/docs/en/guides/parking-management.md +++ b/docs/en/guides/parking-management.md @@ -109,6 +109,15 @@ Parking management with [Ultralytics YOLO26](https://github.com/ultralytics/ultr cv2.destroyAllWindows() # destroy all opened windows ``` + === "CLI" + + ```bash + yolo solutions parking source="path/to/video.mp4" json_file="bounding_boxes.json" show=True + ``` + + !!! note + Create parking zone annotations first using `ParkingPtsSelection()` in Python (Step 2 above), then pass the JSON file to the CLI command. + ### `ParkingManagement` Arguments Here's a table with the `ParkingManagement` arguments: diff --git a/docs/en/guides/region-counting.md b/docs/en/guides/region-counting.md index b5e5b2482d..3c5a22d02f 100644 --- a/docs/en/guides/region-counting.md +++ b/docs/en/guides/region-counting.md @@ -87,6 +87,12 @@ keywords: object counting, regions, YOLO26, computer vision, Ultralytics, effici cv2.destroyAllWindows() # destroy all opened windows ``` + === "CLI" + + ```bash + yolo solutions region source="path/to/video.mp4" show=True region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" + ``` + !!! tip "Ultralytics Example Code" The Ultralytics region counting module is available in our [examples section](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py). You can explore this example for code customization and modify it to suit your specific use case. diff --git a/docs/en/guides/security-alarm-system.md b/docs/en/guides/security-alarm-system.md index 11aba1a9f0..18c9911656 100644 --- a/docs/en/guides/security-alarm-system.md +++ b/docs/en/guides/security-alarm-system.md @@ -79,6 +79,15 @@ The Security Alarm System Project utilizing Ultralytics YOLO26 integrates advanc cv2.destroyAllWindows() # destroy all opened windows ``` + === "CLI" + + ```bash + yolo solutions security source="path/to/video.mp4" show=True + ``` + + !!! note + Email alerts require the Python API to call `.authenticate()`. The CLI provides detection and visualization only. + When you run the code, you will receive a single email notification if any object is detected. The notification is sent immediately, not repeatedly. You can customize the code to suit your project requirements. #### Email Received Sample diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md index 12f2828a71..55e72c72b9 100644 --- a/docs/en/usage/cli.md +++ b/docs/en/usage/cli.md @@ -365,6 +365,34 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions trackzone region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]" # configure zone coordinates ``` + === "Region" + + Count objects inside specific polygonal regions: + + ```bash + yolo solutions region show=True + yolo solutions region source="path/to/video.mp4" # specify video file path + yolo solutions region region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # configure region coordinates + ``` + + === "Security" + + Run security alarm monitoring with object detection: + + ```bash + yolo solutions security show=True + yolo solutions security source="path/to/video.mp4" # specify video file path + ``` + + === "Parking" + + Monitor parking lot occupancy using pre-defined zones: + + ```bash + yolo solutions parking source="path/to/video.mp4" json_file="bounding_boxes.json" # requires pre-built JSON + yolo solutions parking source="path/to/video.mp4" json_file="bounding_boxes.json" model="yolo26n.pt" + ``` + === "Help" View available solutions and their options: diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index c9bc904137..185fc08ae4 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -48,6 +48,9 @@ SOLUTION_MAP = { "analytics": "Analytics", "inference": "Inference", "trackzone": "TrackZone", + "region": "RegionCounter", + "security": "SecurityAlarm", + "parking": "ParkingManagement", "help": None, } @@ -111,7 +114,16 @@ SOLUTIONS_HELP_MSG = f""" 6. Track objects within specific zones yolo solutions trackzone source="path/to/video.mp4" region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]" - 7. Streamlit real-time webcam inference GUI + 7. Count objects inside specific regions + yolo solutions region source="path/to/video.mp4" region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" + + 8. Run security alarm monitoring (email alerts require Python API) + yolo solutions security source="path/to/video.mp4" + + 9. Monitor parking occupancy (create JSON annotations first via Python ParkingPtsSelection) + yolo solutions parking source="path/to/video.mp4" json_file="bounding_boxes.json" + + 10. Streamlit real-time webcam inference GUI yolo streamlit-predict """ CLI_HELP_MSG = f""" diff --git a/ultralytics/solutions/security_alarm.py b/ultralytics/solutions/security_alarm.py index e92cdcb09c..a0a949f761 100644 --- a/ultralytics/solutions/security_alarm.py +++ b/ultralytics/solutions/security_alarm.py @@ -140,7 +140,7 @@ class SecurityAlarm(BaseSolution): annotator.box_label(box, label=self.names[cls], color=colors(cls, True)) total_det = len(self.clss) - if total_det >= self.records and not self.email_sent: # Only send email if not sent before + if total_det >= self.records and not self.email_sent and self.server: self.send_email(im0, total_det) self.email_sent = True From 08612af9500e0a7181efcf845721f96494540ca1 Mon Sep 17 00:00:00 2001 From: Murat Raimbekov Date: Fri, 17 Apr 2026 12:57:20 +0600 Subject: [PATCH 20/25] Add supported model variants note to sony-imx500 integration page (#24248) Co-authored-by: Jing Qiu <61612323+Laughing-q@users.noreply.github.com> --- docs/en/integrations/sony-imx500.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md index 40a65ab822..4f11a8dc11 100644 --- a/docs/en/integrations/sony-imx500.md +++ b/docs/en/integrations/sony-imx500.md @@ -44,6 +44,10 @@ Currently, you can only export models that include the following tasks to IMX500 - [Classification](https://docs.ultralytics.com/tasks/classify/) - [Instance segmentation](https://docs.ultralytics.com/tasks/segment/) +!!! note "Supported model variants" + + IMX export is designed and benchmarked for **YOLOv8n** and **YOLO11n** (nano). Other architectures and model scales are not supported. + ## Usage Examples Export an Ultralytics YOLO11 model to IMX500 format and run inference with the exported model. From 4912cff3b694312226f1267e29cf874e562a91b5 Mon Sep 17 00:00:00 2001 From: Mason Date: Fri, 17 Apr 2026 16:04:59 +0800 Subject: [PATCH 21/25] Fix in-place obb `gt_bboxes` modification in `RotatedTaskAlignedAssigner` (#24260) --- ultralytics/utils/tal.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py index c23996944f..5fb868ffa6 100644 --- a/ultralytics/utils/tal.py +++ b/ultralytics/utils/tal.py @@ -373,15 +373,16 @@ class RotatedTaskAlignedAssigner(TaskAlignedAssigner): Returns: (torch.Tensor): Boolean mask of positive anchors with shape (b, n_boxes, h*w). """ - wh_mask = gt_bboxes[..., 2:4] < self.stride[0] - gt_bboxes[..., 2:4] = torch.where( + gt_bboxes_clone = gt_bboxes.clone() + wh_mask = gt_bboxes_clone[..., 2:4] < self.stride[0] + gt_bboxes_clone[..., 2:4] = torch.where( (wh_mask * mask_gt).bool(), - torch.tensor(self.stride_val, dtype=gt_bboxes.dtype, device=gt_bboxes.device), - gt_bboxes[..., 2:4], + torch.tensor(self.stride_val, dtype=gt_bboxes_clone.dtype, device=gt_bboxes_clone.device), + gt_bboxes_clone[..., 2:4], ) # (b, n_boxes, 5) --> (b, n_boxes, 4, 2) - corners = xywhr2xyxyxyxy(gt_bboxes) + corners = xywhr2xyxyxyxy(gt_bboxes_clone) # (b, n_boxes, 1, 2) a, b, _, d = corners.split(1, dim=-2) ab = b - a From f4d0fda2cb5aa9925f3b08c56fc883664cf3872d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 17 Apr 2026 13:56:11 +0200 Subject: [PATCH 22/25] Harden retry behavior in docker and links workflows (#24261) --- .github/workflows/docker.yml | 40 ++++++++++++++++++++++++++---------- .github/workflows/links.yml | 2 ++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f4116d20da..7322ee2a0d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -118,24 +118,42 @@ jobs: uses: docker/setup-buildx-action@v4 - name: Login to Docker Hub - uses: docker/login-action@v4 + uses: ultralytics/actions/retry@main + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} + run: | + if ! out=$(printf '%s' "$DOCKERHUB_TOKEN" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin 2>&1); then + printf '%s\n' "$out" >&2 + exit 1 + fi + echo "Logged in to docker.io" - name: Login to GHCR - uses: docker/login-action@v4 + uses: ultralytics/actions/retry@main + env: + GHCR_USERNAME: ${{ github.repository_owner }} + GHCR_TOKEN: ${{ secrets._GITHUB_TOKEN }} with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets._GITHUB_TOKEN }} + run: | + if ! out=$(printf '%s' "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin 2>&1); then + printf '%s\n' "$out" >&2 + exit 1 + fi + echo "Logged in to ghcr.io" - name: Login to NVIDIA NGC - uses: docker/login-action@v4 + uses: ultralytics/actions/retry@main + env: + NVIDIA_NGC_API_KEY: ${{ secrets.NVIDIA_NGC_API_KEY }} with: - registry: nvcr.io - username: $oauthtoken - password: ${{ secrets.NVIDIA_NGC_API_KEY }} + run: | + if ! out=$(printf '%s' "$NVIDIA_NGC_API_KEY" | docker login nvcr.io -u '$oauthtoken' --password-stdin 2>&1); then + printf '%s\n' "$out" >&2 + exit 1 + fi + echo "Logged in to nvcr.io" - name: Retrieve Ultralytics version id: get_version diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 23645a6559..2cf1059f3e 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -35,6 +35,7 @@ jobs: timeout_minutes: 60 retry_delay_seconds: 1800 retries: 2 + backoff: fixed run: | lychee \ --scheme https \ @@ -70,6 +71,7 @@ jobs: timeout_minutes: 60 retry_delay_seconds: 1800 retries: 2 + backoff: fixed run: | lychee \ --scheme https \ From 6108bad693bbc9d4ca20f2499472ebb0979f2702 Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Sat, 18 Apr 2026 09:25:55 -0500 Subject: [PATCH 23/25] add YOLOEDetectVpValidator --- ultralytics/cfg/default.yaml | 1 + ultralytics/models/yolo/yoloe/train.py | 20 +++++- ultralytics/models/yolo/yoloe/val.py | 86 ++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 14c9214f5c..8eaf077c49 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -132,3 +132,4 @@ cfg: # (str, optional) path to a config.yaml that overrides defaults # Tracker settings ------------------------------------------------------------------------------------------------------ tracker: botsort.yaml # (str) tracker config file: botsort.yaml or bytetrack.yaml +refer_data: # (str, optional) path to data.yaml for reference class names and colors during tracking diff --git a/ultralytics/models/yolo/yoloe/train.py b/ultralytics/models/yolo/yoloe/train.py index e42a5fa059..84b4189b71 100644 --- a/ultralytics/models/yolo/yoloe/train.py +++ b/ultralytics/models/yolo/yoloe/train.py @@ -15,7 +15,7 @@ from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK from ultralytics.utils.torch_utils import unwrap_model from ..world.train_world import WorldTrainerFromScratch -from .val import YOLOEDetectValidator +from .val import YOLOEDetectValidator,YOLOEDetectVpValidator class YOLOETrainer(DetectionTrainer): @@ -95,7 +95,8 @@ class YOLOETrainer(DetectionTrainer): Returns: (Dataset): YOLO dataset configured for training or validation. """ - gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32) + self.load_vp = False + gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32) return build_yolo_dataset( self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train" ) @@ -265,6 +266,8 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch): (YOLOConcatDataset | Dataset): YOLO dataset configured for training or validation, with visual prompts for training mode. """ + self.load_vp = True + self.refer_data=self.args.refer_data dataset = super().build_dataset(img_path, mode, batch) if isinstance(dataset, YOLOConcatDataset): for d in dataset.datasets: @@ -281,3 +284,16 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch): d.transforms.append(LoadVisualPrompt()) else: self.train_loader.dataset.transforms.append(LoadVisualPrompt()) + + def preprocess_batch(self, batch): + """Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device.""" + batch = super().preprocess_batch(batch) + batch["visuals"] = batch["visuals"].to(self.device, non_blocking=True) + return batch + + def get_validator(self): + """Return a YOLOEDetectValidator for YOLOE model validation.""" + self.loss_names = "box", "cls", "dfl" + return YOLOEDetectVpValidator( + self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks + ) \ No newline at end of file diff --git a/ultralytics/models/yolo/yoloe/val.py b/ultralytics/models/yolo/yoloe/val.py index 48ee5d3a10..02388c7e12 100644 --- a/ultralytics/models/yolo/yoloe/val.py +++ b/ultralytics/models/yolo/yoloe/val.py @@ -200,6 +200,92 @@ class YOLOEDetectValidator(DetectionValidator): return stats + +class YOLOEDetectVpValidator(YOLOEDetectValidator): + """YOLOE detection validator that supports both text and visual prompt embeddings.""" + @smart_inference_mode() + def __call__( + self, + trainer: Any | None = None, + model: YOLOEModel | str | None = None, + vp_weight: float= 1.0, + ) -> dict[str, Any]: + """ + Run validation on the model using either text or visual prompt embeddings. + + This method validates the model using either text prompts or visual prompts, depending on the load_vp flag. + It supports validation during training (using a trainer object) or standalone validation with a provided + model. For visual prompts, reference data can be specified to extract embeddings from a different dataset. + + Args: + trainer (object, optional): Trainer object containing the model and device. + model (YOLOEModel | str, optional): Model to validate. Required if trainer is not provided. + refer_data (str, optional): Path to reference data for visual prompts. + load_vp (bool): Whether to load visual prompts. If False, text prompts are used. + vp_weight (float): Weight for visual prompt embeddings when combining with text embeddings. Default is 1.0. + + Returns: + (dict): Validation statistics containing metrics computed during validation. + """ + if trainer is not None: + self.device = trainer.device + model = trainer.ema.ema + refer_data=self.args.refer_data + if refer_data: + LOGGER.info("Validate using the visual prompt.") + if vp_weight<1: + LOGGER.info(f"Using vp_weight {vp_weight} to combine visual and text prompt embeddings.") + self.args.half = False + # Directly use the same dataloader for visual embeddings extracted during training + vp_data = check_det_dataset(trainer.refer_data) + names = [name.split("/", 1)[0] for name in list(vp_data["names"].values())] + dataloader = self.get_vpe_dataloader(vp_data) + vpe = self.get_visual_pe(dataloader, model) + if vp_weight<1: + vpe= vpe*vp_weight + (1-vp_weight)*model.get_text_pe(names) + model.set_classes(names, vpe) + + stats = DetectionValidator.__call__(self,trainer, model) + else: + self.device = select_device(self.args.device, verbose=False) + + if isinstance(model, (str, Path)): + from ultralytics.nn.tasks import load_checkpoint + + model, _ = load_checkpoint(model, device=self.device) # model, ckpt + model.eval().to(self.device) + refer_data=self.args.refer_data + vp_data = check_det_dataset(refer_data) + names = [name.split("/", 1)[0] for name in list(vp_data["names"].values())] + + + LOGGER.info("Validate using the visual prompt.") + if vp_weight<1: + LOGGER.info(f"Using vp_weight {vp_weight} to combine visual and text prompt embeddings.") + + + self.args.half = False + + dataloader = self.get_vpe_dataloader(vp_data) + vpe = self.get_visual_pe(dataloader, model) + + if vp_weight<1: + vpe= vpe*vp_weight + (1-vp_weight)*model.get_text_pe(names) + + + + model.set_classes(names, vpe) + stats = DetectionValidator.__call__(self,model=deepcopy(model)) + + return stats + + + + + + + + class YOLOESegValidator(YOLOEDetectValidator, SegmentationValidator): """YOLOE segmentation validator that supports both text and visual prompt embeddings.""" From e27bb67517a37f6dad88cfc9f041c1dd4317ffde Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Sun, 19 Apr 2026 19:56:24 -0500 Subject: [PATCH 24/25] add refer_data arg back in train_yoloe26.py --- train_yoloe26.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_yoloe26.py b/train_yoloe26.py index f6af379d79..f71dc241e8 100644 --- a/train_yoloe26.py +++ b/train_yoloe26.py @@ -539,7 +539,7 @@ train_args=dict( data=data, weight_decay=args.weight_decay, single_cls=single_cls, # for YOLOEPEFreeTrainer freeze=freeze, # for YOLOEVPTrainer - # refer_data=refer_data, # for YOLOEVPTrainer) + refer_data=refer_data, # for YOLOEVPTrainer) save_json=args.save_json, ) From 81a1ff48af86c14595cd754182fbd19d83e3a667 Mon Sep 17 00:00:00 2001 From: ShuaiLYU Date: Sun, 19 Apr 2026 19:57:18 -0500 Subject: [PATCH 25/25] freeze cv4 for vp train --- train_yoloe26.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_yoloe26.py b/train_yoloe26.py index f71dc241e8..bdee99c986 100644 --- a/train_yoloe26.py +++ b/train_yoloe26.py @@ -445,7 +445,7 @@ elif args.trainer == "YOLOEVPTrainer": # f"{head_index}.{name}.1.norm", # f"{head_index}.{name}.2.norm", # ] ) - continue + freeze.append(f"{head_index}.{name}") else: freeze.append(f"{head_index}.{name}")