Merge branch 'main' into reid-task-official-clean

This commit is contained in:
Laughing-q 2026-04-16 21:55:47 +08:00
commit 6790c9a1ad
132 changed files with 1927 additions and 1273 deletions

View file

@ -74,19 +74,19 @@ jobs:
uv pip list
- name: Benchmark DetectionModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.218
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.216
- name: Benchmark ClassificationModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.249
- name: Benchmark YOLOWorld DetectionModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.337
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.335
- name: Benchmark SegmentationModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.230
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.229
- name: Benchmark PoseModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.194
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.185
- name: Benchmark OBBModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-obb.pt' imgsz=160 verbose=0.372
@ -345,15 +345,15 @@ jobs:
yolo checks
uv pip list
- name: Benchmark DetectionModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n.pt' imgsz=160 verbose=0.218
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n.pt' imgsz=160 verbose=0.216
- name: Benchmark ClassificationModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-cls.pt' imgsz=160 verbose=0.249
- name: Benchmark YOLOWorld DetectionModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.337
run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.335
- name: Benchmark SegmentationModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-seg.pt' imgsz=160 verbose=0.230
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-seg.pt' imgsz=160 verbose=0.229
- name: Benchmark PoseModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-pose.pt' imgsz=160 verbose=0.194
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-pose.pt' imgsz=160 verbose=0.185
- name: Benchmark OBBModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-obb.pt' imgsz=160 verbose=0.372
- name: Benchmark Summary
@ -435,7 +435,7 @@ jobs:
channel-priority: true
activate-environment: anaconda-client-env
- name: Install Ultralytics package from conda-forge
run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino!=2026.0.0"
run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino<2026"
- name: Install pip packages
run: uv pip install pytest
- name: Check environment

View file

@ -227,7 +227,7 @@ jobs:
- name: Run Benchmarks
if: (github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true') && (matrix.platforms == 'linux/amd64' || matrix.dockerfile == 'Dockerfile-arm64') && matrix.dockerfile != 'Dockerfile' && matrix.dockerfile != 'Dockerfile-conda'
run: docker run ultralytics/ultralytics:${{ (matrix.tags == 'latest-python' && 'latest-python-export') || (matrix.tags == 'latest' && 'latest-export') || matrix.tags }} yolo benchmark model=yolo26n.pt imgsz=160 verbose=0.218
run: docker run ultralytics/ultralytics:${{ (matrix.tags == 'latest-python' && 'latest-python-export') || (matrix.tags == 'latest' && 'latest-export') || matrix.tags }} yolo benchmark model=yolo26n.pt imgsz=160 verbose=0.216
- name: Push All Images
if: github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')

View file

@ -29,9 +29,9 @@ First-time contributors are expected to submit small, well-scoped pull requests.
#### Established Contributors
Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values/). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards.
Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards.
Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development/) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics.
Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics.
#### Feature PRs
@ -156,11 +156,11 @@ We highly value bug reports as they help us improve the quality and reliability
Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation.
We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community.
We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community.
## 🌍 Open-Sourcing Your YOLO Project Under AGPL-3.0
Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open.
Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open.
### Why AGPL-3.0 Compliance Matters
@ -179,7 +179,7 @@ Complying means making the **complete corresponding source code** of your projec
- **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO.
2. **License Your Project:**
- Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3).
- Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0).
- Add a notice at the top of each source file indicating the license.
3. **Publish Your Source Code:**

View file

@ -252,7 +252,7 @@ We look forward to your contributions to help make the Ultralytics ecosystem eve
Ultralytics offers two licensing options to suit different needs:
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details.
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details.
- **Ultralytics Enterprise License**: Designed for commercial use, this license allows for the seamless integration of Ultralytics software and AI models into commercial products and services, bypassing the open-source requirements of AGPL-3.0. If your use case involves commercial deployment, please contact us via [Ultralytics Licensing](https://www.ultralytics.com/license).
## 📞 Contact

View file

@ -252,7 +252,7 @@ Ultralytics 支持广泛的 YOLO 模型,从早期的版本如 [YOLOv3](https:/
Ultralytics 提供两种许可选项以满足不同需求:
- **AGPL-3.0 许可证**:这种经 [OSI 批准](https://opensource.org/license/agpl-v3)的开源许可证非常适合学生、研究人员和爱好者。它鼓励开放协作和知识共享。有关完整详细信息,请参阅 [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 文件。
- **AGPL-3.0 许可证**:这种经 [OSI 批准](https://opensource.org/license/agpl-3.0)的开源许可证非常适合学生、研究人员和爱好者。它鼓励开放协作和知识共享。有关完整详细信息,请参阅 [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 文件。
- **Ultralytics 企业许可证**:专为商业用途设计,此许可证允许将 Ultralytics 软件和 AI 模型无缝集成到商业产品和服务中,绕过 AGPL-3.0 的开源要求。如果您的使用场景涉及商业部署,请通过 [Ultralytics 授权许可](https://www.ultralytics.com/license)与我们联系。
## 📞 联系方式

View file

@ -41,8 +41,8 @@ RUN sed -i 's/^\( *"tensorflowjs\)>=.*\(".*\)/\1>=3.9.0\2/' pyproject.toml && \
# Pip install onnxruntime-gpu, torch, torchvision and ultralytics, then remove build files
RUN uv pip install --system \
https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.18.0-cp38-cp38-linux_aarch64.whl \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.2.0-cp38-cp38-linux_aarch64.whl \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.17.2+c1d70fe-cp38-cp38-linux_aarch64.whl && \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl && \
# Need lower version of 'numpy' for TensorRT export
uv pip install --system numpy==1.23.5 && \
uv pip install --system -e ".[export]" && \

View file

@ -8,7 +8,7 @@ keywords: TT100K, Tsinghua-Tencent 100K, traffic sign detection, YOLO26, dataset
The [Tsinghua-Tencent 100K (TT100K)](https://cg.cs.tsinghua.edu.cn/traffic-sign/) is a large-scale traffic sign benchmark dataset created from 100,000 Tencent Street View panoramas. This dataset is specifically designed for traffic sign detection and classification in real-world conditions, providing researchers and developers with a comprehensive resource for building robust traffic sign recognition systems.
The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 different categories**. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios.
The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 annotation categories**. The original paper applies a 100-instance threshold per class for supervised training, yielding a commonly used **45-class** subset; however, the provided Ultralytics dataset configuration retains all **221 annotated categories**, many of which are very sparse. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios.
This dataset is particularly valuable for:

View file

@ -8,7 +8,7 @@ keywords: Ultralytics, Explorer API, dataset exploration, SQL queries, similarit
!!! warning "Community Note ⚠️"
As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
## Introduction
@ -331,13 +331,6 @@ Start creating your own CV dataset exploration reports using the Explorer API. F
Try our [GUI Demo](dashboard.md) based on Explorer API
## Coming Soon
- [ ] Merge specific labels from datasets. Example - Import all `person` labels from COCO and `car` labels from Cityscapes
- [ ] Remove images that have a higher similarity index than the given threshold
- [ ] Automatically persist new datasets after merging/removing entries
- [ ] Advanced Dataset Visualizations
## FAQ
### What is the Ultralytics Explorer API used for?

View file

@ -8,7 +8,7 @@ keywords: Ultralytics Explorer GUI, semantic search, vector similarity, SQL quer
!!! warning "Community Note ⚠️"
As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
Explorer GUI is built on the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by LLMs.

View file

@ -45,7 +45,7 @@ Install `ultralytics` and run `yolo explorer` in your terminal to run custom que
!!! warning "Community Note ⚠️"
As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
## Setup

View file

@ -8,7 +8,7 @@ keywords: Ultralytics Explorer, CV datasets, semantic search, SQL queries, vecto
!!! warning "Community Note ⚠️"
As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).
<p>
<img width="1709" alt="Ultralytics Explorer dataset visualization GUI" src="https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/explorer-dashboard-screenshot-1.avif">
@ -39,7 +39,7 @@ pip install ultralytics[explorer]
!!! tip
Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory.
Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://www.lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory.
## Explorer API
@ -68,7 +68,7 @@ yolo explorer
### What is Ultralytics Explorer and how can it help with CV datasets?
Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process.
Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://www.lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process.
Learn more about the [Explorer API](api.md).
@ -80,7 +80,7 @@ To manually install the optional dependencies needed for Ultralytics Explorer, y
pip install ultralytics[explorer]
```
These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md).
These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://www.lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md).
### How can I use the GUI version of Ultralytics Explorer?

View file

@ -36,9 +36,10 @@ Before you start to follow this guide:
- Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO26](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLO26
- Install [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) according to the JetPack version
- For JetPack 4.6.4, install [DeepStream 6.0.1](https://docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html)
- For JetPack 5.1.3, install [DeepStream 6.3](https://docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html)
- For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/7.0/dev-guide/text/DS_Overview.html)
- For JetPack 4.6.4, install [DeepStream 6.0.1](https://archive.docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html)
- For JetPack 5.1.3, install [DeepStream 6.3](https://archive.docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html)
- For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/7.1/text/DS_Overview.html)
- For JetPack 7.1, install [DeepStream 9.0](https://docs.nvidia.com/metropolis/deepstream/9.0/text/DS_Overview.html)
!!! tip

View file

@ -216,4 +216,4 @@ cv2.destroyAllWindows()
### Why should businesses choose Ultralytics YOLO26 for heatmap generation in data analysis?
Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans).
Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/pricing).

View file

@ -70,7 +70,7 @@ Use metrics like AP50, F1-score, or custom metrics to evaluate the model's perfo
### Log Results
It's crucial to log both the performance metrics and the corresponding hyperparameters for future reference. Ultralytics YOLO automatically saves these results in CSV format.
It's crucial to log both the performance metrics and the corresponding hyperparameters for future reference. Ultralytics YOLO automatically saves these results in NDJSON format.
### Repeat
@ -90,6 +90,7 @@ The following table lists the default search space parameters for hyperparameter
| `warmup_momentum` | `float` | `(0.0, 0.95)` | Initial momentum during warmup phase. Gradually increases to the final momentum value |
| `box` | `float` | `(1.0, 20.0)` | Bounding box loss weight in the total loss function. Balances box regression vs classification |
| `cls` | `float` | `(0.1, 4.0)` | Classification loss weight in the total loss function. Higher values emphasize correct class prediction |
| `cls_pw` | `float` | `(0.0, 1.0)` | Class weighting power for handling class imbalance. Higher values increase weight on rare classes |
| `dfl` | `float` | `(0.4, 12.0)` | DFL (Distribution Focal Loss) weight in the total loss function. Higher values emphasize precise bounding box localization |
| `hsv_h` | `float` | `(0.0, 0.1)` | Random hue augmentation range in HSV color space. Helps model generalize across color variations |
| `hsv_s` | `float` | `(0.0, 0.9)` | Random saturation augmentation range in HSV space. Simulates different lighting conditions |
@ -186,8 +187,8 @@ runs/
├── ...
└── tune/
├── best_hyperparameters.yaml
├── best_fitness.png
├── tune_results.csv
├── tune_fitness.png
├── tune_results.ndjson
├── tune_scatter_plots.png
└── weights/
├── last.pt
@ -236,7 +237,7 @@ This YAML file contains the best-performing hyperparameters found during the tun
copy_paste: 0.0
```
#### best_fitness.png
#### tune_fitness.png
This is a plot displaying fitness (typically a performance metric like AP50) against the number of iterations. It helps you visualize how well the genetic algorithm performed over time.
@ -247,23 +248,59 @@ This is a plot displaying fitness (typically a performance metric like AP50) aga
<img width="640" src="https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/best-fitness.avif" alt="Hyperparameter Tuning Fitness vs Iteration">
</p>
#### tune_results.csv
#### tune_results.ndjson
A CSV file containing detailed results of each iteration during the tuning. Each row in the file represents one iteration, and it includes metrics like fitness score, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), as well as the hyperparameters used.
An NDJSON file containing detailed results of each tuning iteration. Each line is one JSON object with the aggregate fitness, tuned hyperparameters, and per-dataset metrics. Single-dataset and multi-dataset tuning use the same file format.
- **Format**: CSV
- **Format**: NDJSON
- **Usage**: Per-iteration results tracking.
- **Example**:
```csv
fitness,lr0,lrf,momentum,weight_decay,warmup_epochs,warmup_momentum,box,cls,dfl,hsv_h,hsv_s,hsv_v,degrees,translate,scale,shear,perspective,flipud,fliplr,mosaic,mixup,copy_paste
0.05021,0.01,0.01,0.937,0.0005,3.0,0.8,7.5,0.5,1.5,0.015,0.7,0.4,0.0,0.1,0.5,0.0,0.0,0.0,0.5,1.0,0.0,0.0
0.07217,0.01003,0.00967,0.93897,0.00049,2.79757,0.81075,7.5,0.50746,1.44826,0.01503,0.72948,0.40658,0.0,0.0987,0.4922,0.0,0.0,0.0,0.49729,1.0,0.0,0.0
0.06584,0.01003,0.00855,0.91009,0.00073,3.42176,0.95,8.64301,0.54594,1.72261,0.01503,0.59179,0.40658,0.0,0.0987,0.46955,0.0,0.0,0.0,0.49729,0.80187,0.0,0.0
```
A pretty-printed example is shown below for readability. In the actual `.ndjson` file, each object is stored on a single line.
```json
{
"iteration": 1,
"fitness": 0.23345,
"hyperparameters": {
"lr0": 0.01,
"lrf": 0.01,
"momentum": 0.937,
"weight_decay": 0.0005
},
"datasets": {
"coco8": {
"fitness": 0.28992
},
"coco8-grayscale": {
"fitness": 0.17697
}
}
}
{
"iteration": 2,
"fitness": 0.23661,
"hyperparameters": {
"lr0": 0.0062,
"lrf": 0.01,
"momentum": 0.90058,
"weight_decay": 0.0
},
"datasets": {
"coco8": {
"fitness": 0.29561
},
"coco8-grayscale": {
"fitness": 0.1776
}
}
}
```
#### tune_scatter_plots.png
This file contains scatter plots generated from `tune_results.csv`, helping you visualize relationships between different hyperparameters and performance metrics. Note that hyperparameters initialized to 0 will not be tuned, such as `degrees` and `shear` below.
This file contains scatter plots generated from `tune_results.ndjson`, helping you visualize relationships between different hyperparameters and performance metrics. Note that hyperparameters initialized to 0 will not be tuned, such as `degrees` and `shear` below.
- **Format**: PNG
- **Usage**: Exploratory data analysis

View file

@ -143,7 +143,7 @@ pip install torch torchvision --index-url https://download.pytorch.org/whl/cu130
When running PyTorch 2.9.1 on NVIDIA DGX Spark, you may encounter the following `UserWarning` when initializing CUDA (e.g. running `yolo checks`, `yolo predict`, etc.):
```text
```
UserWarning: Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.
Minimum and Maximum cuda capability supported by this version of PyTorch is (8.0) - (12.0)
```

View file

@ -270,11 +270,11 @@ The above ultralytics installation will install Torch and Torchvision. However,
pip uninstall torch torchvision
```
2. Install `torch 2.2.0` and `torchvision 0.17.2` according to JP5.1.2
2. Install `torch 2.1.0` and `torchvision 0.16.2` according to JP5.1.2
```bash
pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.2.0-cp38-cp38-linux_aarch64.whl
pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.17.2+c1d70fe-cp38-cp38-linux_aarch64.whl
pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl
```
!!! note
@ -415,14 +415,14 @@ Even though all model exports work on NVIDIA Jetson, we have only included **PyT
<figure style="text-align: center;">
<img src="https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/jetson-orin-nano-super-benchmarks-coco128.avif" alt="Jetson Orin Nano Super Benchmarks">
<figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics 8.3.157</figcaption>
<figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics 8.4.33</figcaption>
</figure>
#### NVIDIA Jetson Orin NX 16GB
<figure style="text-align: center;">
<img src="https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/jetson-orin-nx-16-benchmarks-coco128.avif" alt="Jetson Orin NX 16GB Benchmarks">
<figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics 8.3.157</figcaption>
<figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics 8.4.33</figcaption>
</figure>
### Detailed Comparison Tables
@ -619,92 +619,92 @@ The below table represents the benchmark results for five different models (YOLO
!!! tip "Performance"
=== "YOLO11n"
=== "YOLO26n"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 5.4 | 0.5101 | 13.70 |
| TorchScript | ✅ | 10.5 | 0.5082 | 13.69 |
| ONNX | ✅ | 10.2 | 0.5081 | 14.47 |
| OpenVINO | ✅ | 10.4 | 0.5058 | 56.66 |
| TensorRT (FP32) | ✅ | 12.0 | 0.5081 | 7.44 |
| TensorRT (FP16) | ✅ | 8.2 | 0.5061 | 4.53 |
| TensorRT (INT8) | ✅ | 5.4 | 0.4825 | 3.70 |
| TF SavedModel | ✅ | 25.9 | 0.5077 | 116.23 |
| TF GraphDef | ✅ | 10.3 | 0.5077 | 114.92 |
| TF Lite | ✅ | 10.3 | 0.5077 | 340.75 |
| MNN | ✅ | 10.1 | 0.5059 | 76.26 |
| NCNN | ✅ | 10.2 | 0.5031 | 45.03 |
| PyTorch | ✅ | 5.3 | 0.4790 | 15.60 |
| TorchScript | ✅ | 9.8 | 0.4770 | 12.60 |
| ONNX | ✅ | 9.5 | 0.4760 | 15.76 |
| OpenVINO | ✅ | 9.6 | 0.4820 | 56.23 |
| TensorRT (FP32) | ✅ | 11.3 | 0.4770 | 7.53 |
| TensorRT (FP16) | ✅ | 8.1 | 0.4800 | 4.57 |
| TensorRT (INT8) | ✅ | 5.3 | 0.4490 | 3.80 |
| TF SavedModel | ✅ | 24.6 | 0.4760 | 118.33 |
| TF GraphDef | ✅ | 9.5 | 0.4760 | 116.30 |
| TF Lite | ✅ | 9.9 | 0.4760 | 286.00 |
| MNN | ✅ | 9.4 | 0.4760 | 68.77 |
| NCNN | ✅ | 9.3 | 0.4810 | 47.50 |
=== "YOLO11s"
=== "YOLO26s"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 18.4 | 0.5790 | 20.90 |
| TorchScript | ✅ | 36.5 | 0.5781 | 21.22 |
| ONNX | ✅ | 36.3 | 0.5781 | 25.07 |
| OpenVINO | ✅ | 36.4 | 0.5810 | 122.98 |
| TensorRT (FP32) | ✅ | 37.9 | 0.5783 | 13.02 |
| TensorRT (FP16) | ✅ | 21.8 | 0.5779 | 6.93 |
| TensorRT (INT8) | ✅ | 12.2 | 0.5735 | 5.08 |
| TF SavedModel | ✅ | 91.0 | 0.5782 | 250.65 |
| TF GraphDef | ✅ | 36.4 | 0.5782 | 252.69 |
| TF Lite | ✅ | 36.3 | 0.5782 | 998.68 |
| MNN | ✅ | 36.2 | 0.5781 | 188.01 |
| NCNN | ✅ | 36.2 | 0.5784 | 101.37 |
| PyTorch | ✅ | 20.0 | 0.5730 | 22.83 |
| TorchScript | ✅ | 36.8 | 0.5670 | 21.83 |
| ONNX | ✅ | 36.5 | 0.5664 | 26.29 |
| OpenVINO | ✅ | 36.7 | 0.5653 | 127.09 |
| TensorRT (FP32) | ✅ | 38.2 | 0.5664 | 13.60 |
| TensorRT (FP16) | ✅ | 21.3 | 0.5649 | 7.17 |
| TensorRT (INT8) | ✅ | 12.7 | 0.5468 | 5.25 |
| TF SavedModel | ✅ | 92.2 | 0.5665 | 263.69 |
| TF GraphDef | ✅ | 36.5 | 0.5665 | 268.21 |
| TF Lite | ✅ | 36.9 | 0.5665 | 949.63 |
| MNN | ✅ | 36.4 | 0.5644 | 184.68 |
| NCNN | ✅ | 36.4 | 0.5697 | 107.48 |
=== "YOLO11m"
=== "YOLO26m"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 38.8 | 0.6266 | 46.50 |
| TorchScript | ✅ | 77.3 | 0.6307 | 47.95 |
| ONNX | ✅ | 76.9 | 0.6307 | 53.06 |
| OpenVINO | ✅ | 77.1 | 0.6284 | 301.63 |
| TensorRT (FP32) | ✅ | 78.8 | 0.6305 | 27.86 |
| TensorRT (FP16) | ✅ | 41.7 | 0.6309 | 13.50 |
| TensorRT (INT8) | ✅ | 23.2 | 0.6291 | 9.12 |
| TF SavedModel | ✅ | 192.7 | 0.6307 | 622.24 |
| TF GraphDef | ✅ | 77.1 | 0.6307 | 628.74 |
| TF Lite | ✅ | 77.0 | 0.6307 | 2997.93 |
| MNN | ✅ | 76.8 | 0.6299 | 509.96 |
| NCNN | ✅ | 76.8 | 0.6284 | 292.99 |
| PyTorch | ✅ | 43.0 | 0.6220 | 44.43 |
| TorchScript | ✅ | 78.5 | 0.6230 | 44.00 |
| ONNX | ✅ | 78.2 | 0.6225 | 53.44 |
| OpenVINO | ✅ | 78.3 | 0.6186 | 303.26 |
| TensorRT (FP32) | ✅ | 80.0 | 0.6217 | 28.19 |
| TensorRT (FP16) | ✅ | 42.6 | 0.6225 | 13.59 |
| TensorRT (INT8) | ✅ | 23.4 | 0.5817 | 9.30 |
| TF SavedModel | ✅ | 196.3 | 0.6229 | 636.03 |
| TF GraphDef | ✅ | 78.2 | 0.6229 | 659.57 |
| TF Lite | ✅ | 78.5 | 0.6229 | 2905.17 |
| MNN | ✅ | 78.0 | 0.6168 | 500.09 |
| NCNN | ✅ | 78.0 | 0.6224 | 332.39 |
=== "YOLO11l"
=== "YOLO26l"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 49.0 | 0.6364 | 56.50 |
| TorchScript | ✅ | 97.6 | 0.6409 | 62.51 |
| ONNX | ✅ | 97.0 | 0.6399 | 68.35 |
| OpenVINO | ✅ | 97.3 | 0.6378 | 376.03 |
| TensorRT (FP32) | ✅ | 99.2 | 0.6396 | 35.59 |
| TensorRT (FP16) | ✅ | 52.1 | 0.6361 | 17.48 |
| TensorRT (INT8) | ✅ | 30.9 | 0.6207 | 11.87 |
| TF SavedModel | ✅ | 243.1 | 0.6409 | 807.47 |
| TF GraphDef | ✅ | 97.2 | 0.6409 | 822.88 |
| TF Lite | ✅ | 97.1 | 0.6409 | 3792.23 |
| MNN | ✅ | 96.9 | 0.6372 | 631.16 |
| NCNN | ✅ | 96.9 | 0.6364 | 350.46 |
| PyTorch | ✅ | 51.0 | 0.6230 | 60.97 |
| TorchScript | ✅ | 95.5 | 0.6250 | 56.20 |
| ONNX | ✅ | 95.0 | 0.6247 | 68.12 |
| OpenVINO | ✅ | 95.3 | 0.6238 | 397.84 |
| TensorRT (FP32) | ✅ | 97.1 | 0.6250 | 35.88 |
| TensorRT (FP16) | ✅ | 51.4 | 0.6225 | 17.42 |
| TensorRT (INT8) | ✅ | 30.0 | 0.5923 | 11.83 |
| TF SavedModel | ✅ | 238.4 | 0.6245 | 835.83 |
| TF GraphDef | ✅ | 95.0 | 0.6245 | 852.16 |
| TF Lite | ✅ | 95.4 | 0.6245 | 3650.85 |
| MNN | ✅ | 94.8 | 0.6257 | 612.37 |
| NCNN | ✅ | 94.8 | 0.6323 | 405.45 |
=== "YOLO11x"
=== "YOLO26x"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 109.3 | 0.7005 | 90.00 |
| TorchScript | ✅ | 218.1 | 0.6901 | 113.40 |
| ONNX | ✅ | 217.5 | 0.6901 | 122.94 |
| OpenVINO | ✅ | 217.8 | 0.6876 | 713.1 |
| TensorRT (FP32) | ✅ | 219.5 | 0.6904 | 66.93 |
| TensorRT (FP16) | ✅ | 112.2 | 0.6892 | 32.58 |
| TensorRT (INT8) | ✅ | 61.5 | 0.6612 | 19.90 |
| TF SavedModel | ✅ | 544.3 | 0.6900 | 1605.4 |
| TF GraphDef | ✅ | 217.8 | 0.6900 | 2961.8 |
| TF Lite | ✅ | 217.6 | 0.6900 | 8234.86 |
| MNN | ✅ | 217.3 | 0.6893 | 1254.18 |
| NCNN | ✅ | 217.3 | 0.6849 | 725.50 |
| PyTorch | ✅ | 113.2 | 0.6561 | 98.44 |
| TorchScript | ✅ | 214.0 | 0.6593 | 98.0 |
| ONNX | ✅ | 212.9 | 0.6595 | 122.43 |
| OpenVINO | ✅ | 213.2 | 0.6592 | 760.72 |
| TensorRT (FP32) | ✅ | 215.1 | 0.6593 | 67.17 |
| TensorRT (FP16) | ✅ | 110.2 | 0.6637 | 32.60 |
| TensorRT (INT8) | ✅ | 59.9 | 0.6170 | 19.99 |
| TF SavedModel | ✅ | 533.3 | 0.6593 | 1647.06 |
| TF GraphDef | ✅ | 212.9 | 0.6593 | 1670.30 |
| TF Lite | ✅ | 213.3 | 0.6590 | 8066.30 |
| MNN | ✅ | 212.8 | 0.6600 | 1227.90 |
| NCNN | ✅ | 212.8 | 0.6666 | 782.24 |
Benchmarked with Ultralytics 8.3.157
Benchmarked with Ultralytics 8.4.33
!!! note
@ -714,92 +714,92 @@ The below table represents the benchmark results for five different models (YOLO
!!! tip "Performance"
=== "YOLO11n"
=== "YOLO26n"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 5.4 | 0.5101 | 12.90 |
| TorchScript | ✅ | 10.5 | 0.5082 | 13.17 |
| ONNX | ✅ | 10.2 | 0.5081 | 15.43 |
| OpenVINO | ✅ | 10.4 | 0.5058 | 39.80 |
| TensorRT (FP32) | ✅ | 11.8 | 0.5081 | 7.94 |
| TensorRT (FP16) | ✅ | 8.1 | 0.5085 | 4.73 |
| TensorRT (INT8) | ✅ | 5.4 | 0.4786 | 3.90 |
| TF SavedModel | ✅ | 25.9 | 0.5077 | 88.48 |
| TF GraphDef | ✅ | 10.3 | 0.5077 | 86.67 |
| TF Lite | ✅ | 10.3 | 0.5077 | 302.55 |
| MNN | ✅ | 10.1 | 0.5059 | 52.73 |
| NCNN | ✅ | 10.2 | 0.5031 | 32.04 |
| PyTorch | ✅ | 5.3 | 0.4799 | 13.90 |
| TorchScript | ✅ | 9.8 | 0.4787 | 11.60 |
| ONNX | ✅ | 9.5 | 0.4763 | 14.18 |
| OpenVINO | ✅ | 9.6 | 0.4819 | 40.19 |
| TensorRT (FP32) | ✅ | 11.4 | 0.4770 | 7.01 |
| TensorRT (FP16) | ✅ | 8.0 | 0.4789 | 4.13 |
| TensorRT (INT8) | ✅ | 5.5 | 0.4489 | 3.49 |
| TF SavedModel | ✅ | 24.6 | 0.4764 | 92.34 |
| TF GraphDef | ✅ | 9.5 | 0.4764 | 92.06 |
| TF Lite | ✅ | 9.9 | 0.4764 | 254.43 |
| MNN | ✅ | 9.4 | 0.4760 | 48.55 |
| NCNN | ✅ | 9.3 | 0.4805 | 34.31 |
=== "YOLO11s"
=== "YOLO26s"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 18.4 | 0.5790 | 21.70 |
| TorchScript | ✅ | 36.5 | 0.5781 | 22.71 |
| ONNX | ✅ | 36.3 | 0.5781 | 26.49 |
| OpenVINO | ✅ | 36.4 | 0.5810 | 84.73 |
| TensorRT (FP32) | ✅ | 37.8 | 0.5783 | 13.77 |
| TensorRT (FP16) | ✅ | 21.2 | 0.5796 | 7.31 |
| TensorRT (INT8) | ✅ | 12.0 | 0.5735 | 5.33 |
| TF SavedModel | ✅ | 91.0 | 0.5782 | 185.06 |
| TF GraphDef | ✅ | 36.4 | 0.5782 | 186.45 |
| TF Lite | ✅ | 36.3 | 0.5782 | 882.58 |
| MNN | ✅ | 36.2 | 0.5775 | 126.36 |
| NCNN | ✅ | 36.2 | 0.5784 | 66.73 |
| PyTorch | ✅ | 19.5 | 0.5738 | 20.40 |
| TorchScript | ✅ | 36.8 | 0.5664 | 19.20 |
| ONNX | ✅ | 36.5 | 0.5664 | 24.35 |
| OpenVINO | ✅ | 36.7 | 0.5653 | 88.18 |
| TensorRT (FP32) | ✅ | 38.5 | 0.5664 | 12.62 |
| TensorRT (FP16) | ✅ | 21.5 | 0.5652 | 6.41 |
| TensorRT (INT8) | ✅ | 12.6 | 0.5468 | 4.78 |
| TF SavedModel | ✅ | 92.2 | 0.5665 | 195.16 |
| TF GraphDef | ✅ | 36.5 | 0.5665 | 197.57 |
| TF Lite | ✅ | 36.9 | 0.5665 | 827.48 |
| MNN | ✅ | 36.4 | 0.5649 | 123.47 |
| NCNN | ✅ | 36.4 | 0.5697 | 74.04 |
=== "YOLO11m"
=== "YOLO26m"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 38.8 | 0.6266 | 45.00 |
| TorchScript | ✅ | 77.3 | 0.6307 | 51.87 |
| ONNX | ✅ | 76.9 | 0.6307 | 56.00 |
| OpenVINO | ✅ | 77.1 | 0.6284 | 202.69 |
| TensorRT (FP32) | ✅ | 78.7 | 0.6305 | 30.38 |
| TensorRT (FP16) | ✅ | 41.8 | 0.6302 | 14.48 |
| TensorRT (INT8) | ✅ | 23.2 | 0.6291 | 9.74 |
| TF SavedModel | ✅ | 192.7 | 0.6307 | 445.58 |
| TF GraphDef | ✅ | 77.1 | 0.6307 | 460.94 |
| TF Lite | ✅ | 77.0 | 0.6307 | 2653.65 |
| MNN | ✅ | 76.8 | 0.6308 | 339.38 |
| NCNN | ✅ | 76.8 | 0.6284 | 187.64 |
| PyTorch | ✅ | 42.2 | 0.6237 | 38.60 |
| TorchScript | ✅ | 78.5 | 0.6227 | 40.50 |
| ONNX | ✅ | 78.2 | 0.6225 | 48.87 |
| OpenVINO | ✅ | 78.3 | 0.6186 | 205.69 |
| TensorRT (FP32) | ✅ | 80.1 | 0.6217 | 24.69 |
| TensorRT (FP16) | ✅ | 42.6 | 0.6225 | 11.66 |
| TensorRT (INT8) | ✅ | 23.4 | 0.5817 | 8.22 |
| TF SavedModel | ✅ | 196.3 | 0.6229 | 451.48 |
| TF GraphDef | ✅ | 78.2 | 0.6229 | 460.94 |
| TF Lite | ✅ | 78.5 | 0.6229 | 2555.53 |
| MNN | ✅ | 78.0 | 0.6217 | 333.33 |
| NCNN | ✅ | 78.0 | 0.6224 | 214.60 |
=== "YOLO11l"
=== "YOLO26l"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 49.0 | 0.6364 | 56.60 |
| TorchScript | ✅ | 97.6 | 0.6409 | 66.72 |
| ONNX | ✅ | 97.0 | 0.6399 | 71.92 |
| OpenVINO | ✅ | 97.3 | 0.6378 | 254.17 |
| TensorRT (FP32) | ✅ | 99.2 | 0.6406 | 38.89 |
| TensorRT (FP16) | ✅ | 51.9 | 0.6363 | 18.59 |
| TensorRT (INT8) | ✅ | 30.9 | 0.6207 | 12.60 |
| TF SavedModel | ✅ | 243.1 | 0.6409 | 575.98 |
| TF GraphDef | ✅ | 97.2 | 0.6409 | 583.79 |
| TF Lite | ✅ | 97.1 | 0.6409 | 3353.41 |
| MNN | ✅ | 96.9 | 0.6367 | 421.33 |
| NCNN | ✅ | 96.9 | 0.6364 | 228.26 |
| PyTorch | ✅ | 50.7 | 0.6258 | 48.60 |
| TorchScript | ✅ | 95.5 | 0.6249 | 51.60 |
| ONNX | ✅ | 95.0 | 0.6247 | 61.95 |
| OpenVINO | ✅ | 95.3 | 0.6238 | 272.47 |
| TensorRT (FP32) | ✅ | 97.1 | 0.6250 | 31.64 |
| TensorRT (FP16) | ✅ | 51.4 | 0.6225 | 14.77 |
| TensorRT (INT8) | ✅ | 30.0 | 0.5923 | 10.49 |
| TF SavedModel | ✅ | 238.4 | 0.6245 | 596.46 |
| TF GraphDef | ✅ | 95.0 | 0.6245 | 606.10 |
| TF Lite | ✅ | 95.4 | 0.6245 | 3275.55 |
| MNN | ✅ | 94.8 | 0.6247 | 408.15 |
| NCNN | ✅ | 94.8 | 0.6323 | 262.99 |
=== "YOLO11x"
=== "YOLO26x"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 109.3 | 0.7005 | 98.50 |
| TorchScript | ✅ | 218.1 | 0.6901 | 123.03 |
| ONNX | ✅ | 217.5 | 0.6901 | 129.55 |
| OpenVINO | ✅ | 217.8 | 0.6876 | 483.44 |
| TensorRT (FP32) | ✅ | 219.6 | 0.6904 | 75.92 |
| TensorRT (FP16) | ✅ | 112.1 | 0.6885 | 35.78 |
| TensorRT (INT8) | ✅ | 61.6 | 0.6592 | 21.60 |
| TF SavedModel | ✅ | 544.3 | 0.6900 | 1120.43 |
| TF GraphDef | ✅ | 217.7 | 0.6900 | 1172.35 |
| TF Lite | ✅ | 217.6 | 0.6900 | 7283.63 |
| MNN | ✅ | 217.3 | 0.6877 | 840.16 |
| NCNN | ✅ | 217.3 | 0.6849 | 474.41 |
| PyTorch | ✅ | 113.2 | 0.6561 | 84.40 |
| TorchScript | ✅ | 213.5 | 0.6594 | 91.20 |
| ONNX | ✅ | 212.9 | 0.6595 | 109.34 |
| OpenVINO | ✅ | 213.2 | 0.6592 | 520.88 |
| TensorRT (FP32) | ✅ | 215.1 | 0.6593 | 57.18 |
| TensorRT (FP16) | ✅ | 109.7 | 0.6632 | 26.76 |
| TensorRT (INT8) | ✅ | 60.0 | 0.6170 | 17.32 |
| TF SavedModel | ✅ | 533.3 | 0.6593 | 1170.50 |
| TF GraphDef | ✅ | 212.9 | 0.6593 | 1217.87 |
| TF Lite | ✅ | 213.3 | 0.6593 | 7247.11 |
| MNN | ✅ | 212.8 | 0.6591 | 820.90 |
| NCNN | ✅ | 212.8 | 0.6666 | 534.30 |
Benchmarked with Ultralytics 8.3.157
Benchmarked with Ultralytics 8.4.33
!!! note

View file

@ -243,7 +243,7 @@ Using YOLO, it is possible to extract and combine information from both RGB and
!!! warning "RGB-D Cameras"
When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment.
When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://www.realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment.
#### Depth Step-by-Step Usage

View file

@ -70,15 +70,10 @@ pip install -U ultralytics sahi
### Import Modules and Download Resources
Here's how to import the necessary modules and download a YOLO26 model and some test images:
Here's how to download some test images:
```python
from sahi.utils.file import download_from_url
from sahi.utils.ultralytics import download_yolo26n_model
# Download YOLO26 model
model_path = "models/yolo26n.pt"
download_yolo26n_model(model_path)
# Download test images
download_from_url(
@ -102,7 +97,7 @@ from sahi import AutoDetectionModel
detection_model = AutoDetectionModel.from_pretrained(
model_type="ultralytics",
model_path=model_path,
model_path="yolo26n.pt",
confidence_threshold=0.3,
device="cpu", # or 'cuda:0'
)
@ -110,17 +105,14 @@ detection_model = AutoDetectionModel.from_pretrained(
### Perform Standard Prediction
Perform standard inference using an image path or a numpy image.
Perform standard inference using an image path.
```python
from sahi.predict import get_prediction
from sahi.utils.cv import read_image
# With an image path
result = get_prediction("demo_data/small-vehicles1.jpeg", detection_model)
# With a numpy image
result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model)
result.export_visuals(export_dir="demo_data/", hide_conf=True)
```
### Visualize Results
@ -128,10 +120,13 @@ result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg
Export and visualize the predicted bounding boxes and masks:
```python
from IPython.display import Image
from PIL import Image
result.export_visuals(export_dir="demo_data/")
Image("demo_data/prediction_visual.png")
# Open the predicted image
processed_image = Image.open("demo_data/prediction_visual.png")
# Display the predicted image
processed_image.show()
```
## Sliced Inference with YOLO26
@ -139,6 +134,7 @@ Image("demo_data/prediction_visual.png")
Perform sliced inference by specifying the slice dimensions and overlap ratios:
```python
from PIL import Image
from sahi.predict import get_sliced_prediction
result = get_sliced_prediction(
@ -149,6 +145,15 @@ result = get_sliced_prediction(
overlap_height_ratio=0.2,
overlap_width_ratio=0.2,
)
# Export results
result.export_visuals(export_dir="demo_data/", hide_conf=True)
# Open the predicted image
processed_image = Image.open("demo_data/prediction_visual.png")
# Display the predicted image
processed_image.show()
```
## Handling Prediction Results
@ -175,7 +180,7 @@ from sahi.predict import predict
predict(
model_type="ultralytics",
model_path="path/to/yolo26n.pt",
model_path="yolo26n.pt",
model_device="cpu", # or 'cuda:0'
model_confidence_threshold=0.4,
source="path/to/dir",
@ -219,21 +224,20 @@ Integrating Ultralytics YOLO26 with SAHI (Slicing Aided Hyper Inference) for sli
pip install -U ultralytics sahi
```
Then, download a YOLO26 model and test images:
Then, download test images:
```python
from sahi.utils.file import download_from_url
from sahi.utils.ultralytics import download_yolo26n_model
# Download YOLO26 model
model_path = "models/yolo26n.pt"
download_yolo26n_model(model_path)
# Download test images
download_from_url(
"https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/small-vehicles1.jpeg",
"demo_data/small-vehicles1.jpeg",
)
download_from_url(
"https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/terrain2.png",
"demo_data/terrain2.png",
)
```
For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolo26).
@ -253,10 +257,13 @@ Learn more about the [benefits of sliced inference](#benefits-of-sliced-inferenc
Yes, you can visualize prediction results when using YOLO26 with SAHI. Here's how you can export and visualize the results:
```python
from IPython.display import Image
from PIL import Image
result.export_visuals(export_dir="demo_data/")
Image("demo_data/prediction_visual.png")
result.export_visuals(export_dir="demo_data/", hide_conf=True)
processed_image = Image.open("demo_data/prediction_visual.png")
processed_image.show()
```
This command will save the visualized predictions to the specified directory, and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results).

View file

@ -27,7 +27,7 @@ Before we start, you will need to create a Google Cloud Platform (GCP) project.
## Prerequisites
1. Install [Docker](https://docs.docker.com/engine/install/) on your machine.
2. Install the [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) and [authenticate for using the gcloud CLI](https://cloud.google.com/docs/authentication/gcloud).
2. Install the [Google Cloud SDK](https://docs.cloud.google.com/sdk/docs/install-sdk) and [authenticate for using the gcloud CLI](https://docs.cloud.google.com/docs/authentication/gcloud).
3. It is highly recommended that you go through the [Docker Quickstart Guide for Ultralytics](https://docs.ultralytics.com/guides/docker-quickstart/), because you will need to extend one of the official Ultralytics Docker images while following this guide.
## 1. Create an inference backend with FastAPI
@ -507,7 +507,7 @@ docker push YOUR_REGION-docker.pkg.dev/YOUR_PROJECT_ID/YOUR_REPOSITORY_NAME/IMAG
Wait for the process to complete. You should now see the image in your Artifact Registry repository.
For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling).
For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://docs.cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling).
## 4. Import your model in Vertex AI

View file

@ -42,7 +42,7 @@ Before we can merge your pull request, you must sign our [Contributor License Ag
After submitting your pull request, the CLA bot will guide you through the signing process. To sign the CLA, simply add a comment in your PR stating:
```text
```
I have read the CLA Document and I sign the CLA
```
@ -207,11 +207,11 @@ We highly value bug reports as they help us improve the quality and reliability
Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation.
We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community.
We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community.
## 🌍 Open-Sourcing Your YOLO Project Under AGPL-3.0
Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open.
Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open.
### Why AGPL-3.0 Compliance Matters
@ -230,7 +230,7 @@ Complying means making the **complete corresponding source code** of your projec
- **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO.
2. **License Your Project:**
- Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3).
- Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0).
- Add a notice at the top of each source file indicating the license.
3. **Publish Your Source Code:**
@ -295,7 +295,7 @@ Contributing to Ultralytics YOLO open-source repositories improves the software,
To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This process ensures that your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request stating:
```text
```
I have read the CLA Document and I sign the CLA
```

View file

@ -9,9 +9,9 @@ At [Ultralytics](https://www.ultralytics.com/), the security of our users' data
## Snyk Scanning
We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users.
We utilize [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users.
[![ultralytics](https://snyk.io/advisor/python/ultralytics/badge.svg)](https://snyk.io/advisor/python/ultralytics)
[![ultralytics](https://img.shields.io/badge/Snyk_security-monitored-8A2BE2)](https://security.snyk.io/package/pip/ultralytics)
## GitHub CodeQL Scanning
@ -51,7 +51,7 @@ These tools ensure proactive identification and resolution of security issues, e
### How does Ultralytics use Snyk for security scanning?
Ultralytics utilizes [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable.
Ultralytics utilizes [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable.
To see the Snyk badge and learn more about its deployment, check the [Snyk Scanning section](#snyk-scanning).

View file

@ -164,7 +164,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde
Ultralytics offers two licensing options to accommodate diverse use cases:
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details.
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details.
- **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://www.ultralytics.com/license).
Our licensing strategy is designed to ensure that any improvements to our open-source projects are returned to the community. We believe in open source, and our mission is to ensure that our contributions can be used and expanded in ways that benefit everyone.

View file

@ -6,7 +6,7 @@ keywords: Axelera AI, Metis AIPU, Voyager SDK, Edge AI, YOLOv8, YOLO11, YOLO26,
# Axelera AI Export and Deployment
Ultralytics partners with [Axelera AI](https://www.axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metis® AIPU** using the **Voyager SDK**.
Ultralytics partners with [Axelera AI](https://axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metis® AIPU** using the **Voyager SDK**.
![Axelera AI edge deployment ecosystem for YOLO](https://github.com/user-attachments/assets/c97a0297-390d-47df-bb13-ff1aa499f34a)
@ -148,20 +148,21 @@ Export your trained YOLO models using the standard Ultralytics export command.
### Export Arguments
| Argument | Type | Default | Description |
| :--------- | :--------------- | :--------------- | :------------------------------------------------------------------------------------------- |
| `format` | `str` | `'axelera'` | Target format for Axelera Metis AIPU hardware |
| `imgsz` | `int` or `tuple` | `640` | Image size for model input |
| `int8` | `bool` | `True` | Enable [INT8 quantization](https://www.ultralytics.com/glossary/model-quantization) for AIPU |
| `data` | `str` | `'coco128.yaml'` | [Dataset](https://docs.ultralytics.com/datasets/) config for quantization calibration |
| `fraction` | `float` | `1.0` | Fraction of dataset for calibration (100-400 images recommended) |
| `device` | `str` | `None` | Export device: GPU (`device=0`) or CPU (`device=cpu`) |
| Argument | Type | Default | Description |
| :--------- | :--------------- | :--------------- | :-------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'axelera'` | Target format for Axelera Metis AIPU hardware. |
| `imgsz` | `int` or `tuple` | `640` | Image size for model input. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
| `int8` | `bool` | `True` | Enable [INT8 quantization](https://www.ultralytics.com/glossary/model-quantization) for AIPU. |
| `data` | `str` | `'coco128.yaml'` | [Dataset](https://docs.ultralytics.com/datasets/) config for quantization calibration. |
| `fraction` | `float` | `1.0` | Fraction of dataset for calibration (100-400 images recommended). |
| `device` | `str` | `None` | Export device: GPU (`device=0`) or CPU (`device=cpu`). |
For all export options, see the [Export Mode documentation](https://docs.ultralytics.com/modes/export/).
### Output Structure
```text
```
yolo26n_axelera_model/
├── yolo26n.axm # Axelera model file
└── metadata.yaml # Model metadata (classes, image size, etc.)

View file

@ -117,15 +117,16 @@ Before diving into the usage instructions, be sure to check out the range of [YO
### Export Arguments
| Argument | Type | Default | Description |
| -------- | ---------------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'coreml'` | Target format for the exported model, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). |
| Argument | Type | Default | Description |
| --------- | ---------------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'coreml'` | Target format for the exported model, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `dynamic` | `bool` | `False` | Allows dynamic input sizes, enhancing flexibility in handling varying image dimensions. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). |
!!! tip

View file

@ -80,8 +80,10 @@ Exporting YOLO26 models to ExecuTorch is straightforward:
# Export the model to ExecuTorch format
model.export(format="executorch") # creates 'yolo26n_executorch_model' directory
# Load the exported ExecuTorch model
executorch_model = YOLO("yolo26n_executorch_model")
# Run inference on a single image
results = executorch_model.predict("https://ultralytics.com/images/bus.jpg")
```
@ -101,16 +103,18 @@ Exporting YOLO26 models to ExecuTorch is straightforward:
When exporting to ExecuTorch format, you can specify the following arguments:
| Argument | Type | Default | Description |
| -------- | --------------- | ------- | ------------------------------------------ |
| `imgsz` | `int` or `list` | `640` | Image size for model input (height, width) |
| `device` | `str` | `'cpu'` | Device to use for export (`'cpu'`) |
| Argument | Type | Default | Description |
| -------- | ---------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'executorch'` | Target format for the exported model, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). |
### Output Structure
The ExecuTorch export creates a directory containing the model and metadata:
```text
```
yolo26n_executorch_model/
├── yolo26n.pte # ExecuTorch model file
└── metadata.yaml # Model metadata (classes, image size, etc.)

View file

@ -103,7 +103,7 @@ If you'd like to dive deeper into Google Colab, here are a few resources to guid
- **[Image Segmentation with Ultralytics YOLO26 on Google Colab](https://www.ultralytics.com/blog/image-segmentation-with-ultralytics-yolo11-on-google-colab)**: Explore how to perform image segmentation tasks using YOLO26 in the Google Colab environment, with practical examples using datasets like the Roboflow Carparts Segmentation Dataset.
- **[Curated Notebooks](https://colab.google/notebooks/)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas.
- **[Curated Notebooks](https://developers.google.com/colab)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas.
- **[Google Colab's Medium Page](https://medium.com/google-colab)**: You can find tutorials, updates, and community contributions here that can help you better understand and utilize this tool.
@ -130,7 +130,7 @@ Google Colab offers several advantages for training YOLO26 models:
- **Integration with Google Drive:** Easily store and access datasets and models.
- **Collaboration:** Share notebooks with others and collaborate in real-time.
For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://colab.google/notebooks/).
For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://developers.google.com/colab).
### How can I handle Google Colab session timeouts during YOLO26 training?

View file

@ -87,7 +87,7 @@ Then, you can import the needed packages.
For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLO26 model to detect and classify litter and biological objects in underwater images.
We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md) under the section "API credentials".
We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md) under the section "API credentials".
Copy and paste your Kaggle username and API key into the following code. Then run the code to install the API and load the dataset into Watsonx.

View file

@ -99,7 +99,7 @@ For more details about the export process, visit the [Ultralytics documentation
1. **Performance**: OpenVINO delivers high-performance inference by utilizing the power of Intel CPUs, integrated and discrete GPUs, and FPGAs.
2. **Support for Heterogeneous Execution**: OpenVINO provides an API to write once and deploy on any supported Intel hardware (CPU, GPU, FPGA, VPU, etc.).
3. **Model Optimizer**: OpenVINO provides a Model Optimizer that imports, converts, and optimizes models from popular [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) frameworks such as PyTorch, [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), TensorFlow Lite, Keras, ONNX, PaddlePaddle, and Caffe.
4. **Ease of Use**: The toolkit comes with more than [80 tutorial notebooks](https://github.com/openvinotoolkit/openvino_notebooks) (including [YOLOv8 optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov8-optimization)) teaching different aspects of the toolkit.
4. **Ease of Use**: The toolkit comes with more than [80 tutorial notebooks](https://github.com/openvinotoolkit/openvino_notebooks) (including [YOLO26 optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov26-optimization)) teaching different aspects of the toolkit.
## OpenVINO Export Structure
@ -136,7 +136,7 @@ This approach is ideal for fast prototyping or deployment when you don't need fu
### Inference with OpenVINO Runtime
The OpenVINO Runtime provides a unified API for inference across all supported Intel hardware. It also provides advanced capabilities like load balancing across Intel hardware and asynchronous execution. For more information on running inference, refer to the [YOLO26 notebooks](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov11-optimization).
The OpenVINO Runtime provides a unified API for inference across all supported Intel hardware. It also provides advanced capabilities like load balancing across Intel hardware and asynchronous execution. For more information on running inference, refer to the [YOLO26 notebooks](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov26-optimization).
Remember, you'll need the XML and BIN files as well as any application-specific settings like input size, scale factor for normalization, etc., to correctly set up and use the model with the Runtime.

View file

@ -185,6 +185,7 @@ Export an Ultralytics YOLO11 model to IMX500 format and run inference with the e
| `int8` | `bool` | `True` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. |
| `data` | `str` | `'coco8.yaml'` | Path to the [dataset](https://docs.ultralytics.com/datasets/) configuration file (default: `coco8.yaml`), essential for quantization. |
| `fraction` | `float` | `1.0` | Specifies the fraction of the dataset to use for INT8 quantization calibration. Allows for calibrating on a subset of the full dataset, useful for experiments or when resources are limited. If not specified with INT8 enabled, the full dataset will be used. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the exported model. When `True`, `conf`, `iou`, and `agnostic_nms` are also accepted. |
| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`). |
!!! tip
@ -204,9 +205,9 @@ The export process will create an ONNX model for quantization validation, along
├── dnnParams.xml
├── labels.txt
├── packerOut.zip
├── yolo11n_imx.onnx
├── yolo11n_imx_MemoryReport.json
└── yolo11n_imx.pbtxt
├── model_imx.onnx
├── model_imx_MemoryReport.json
└── model_imx.pbtxt
```
=== "Pose Estimation"
@ -216,9 +217,9 @@ The export process will create an ONNX model for quantization validation, along
├── dnnParams.xml
├── labels.txt
├── packerOut.zip
├── yolo11n-pose_imx.onnx
├── yolo11n-pose_imx_MemoryReport.json
└── yolo11n-pose_imx.pbtxt
├── model_imx.onnx
├── model_imx_MemoryReport.json
└── model_imx.pbtxt
```
=== "Classification"
@ -228,9 +229,9 @@ The export process will create an ONNX model for quantization validation, along
├── dnnParams.xml
├── labels.txt
├── packerOut.zip
├── yolo11n-cls_imx.onnx
├── yolo11n-cls_imx_MemoryReport.json
└── yolo11n-cls_imx.pbtxt
├── model_imx.onnx
├── model_imx_MemoryReport.json
└── model_imx.pbtxt
```
=== "Instance Segmentation"
@ -240,9 +241,9 @@ The export process will create an ONNX model for quantization validation, along
├── dnnParams.xml
├── labels.txt
├── packerOut.zip
├── yolo11n-seg_imx.onnx
├── yolo11n-seg_imx_MemoryReport.json
└── yolo11n-seg_imx.pbtxt
├── model_imx.onnx
├── model_imx_MemoryReport.json
└── model_imx.pbtxt
```
## Using IMX500 Export in Deployment

View file

@ -112,6 +112,7 @@ All [Ultralytics YOLO26 models](../models/index.md) are designed to support expo
| `format` | `str` | `'torchscript'` | Target format for the exported model, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `dynamic` | `bool` | `False` | Allows dynamic input sizes, enhancing flexibility in handling varying image dimensions. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `optimize` | `bool` | `False` | Applies optimization for mobile devices, potentially reducing model size and improving performance. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |

View file

@ -214,7 +214,7 @@ These features help in tracking experiments, optimizing models, and collaboratin
After running your training script with W&B integration:
1. A link to your W&B dashboard will be provided in the console output.
2. Click on the link or go to [wandb.ai](https://wandb.ai/) and log in to your account.
2. Click on the link or go to [wandb.ai](https://wandb.ai/site) and log in to your account.
3. Navigate to your project to view detailed metrics, visualizations, and model performance data.
The dashboard offers insights into your model's training process, allowing you to analyze and improve your YOLO26 models effectively.

View file

@ -1,7 +1,7 @@
---
comments: true
description: Discover FastSAM, a real-time CNN-based solution for segmenting any object in an image. Efficient, competitive, and ideal for various vision tasks.
keywords: FastSAM, Fast Segment Anything Model, Ultralytics, real-time segmentation, CNN, YOLOv8-seg, object segmentation, image processing, computer vision
keywords: FastSAM, Fast Segment Anything Model, Ultralytics, real-time segmentation, instance segmentation, FastSAM vs YOLO, FastSAM vs SAM, YOLOv8-seg, YOLO26-seg, zero-shot segmentation, object segmentation, Meta
---
# Fast Segment Anything Model (FastSAM)
@ -54,21 +54,22 @@ This table presents the available models with their specific pretrained weights,
## FastSAM Comparison vs YOLO
Here we compare Meta's SAM 2 models, including the smallest SAM2-t variant, with Ultralytics smallest segmentation model, [YOLO11n-seg](../tasks/segment.md):
Here we compare Meta's SAM 2 models, including the smallest SAM2-t variant, with Ultralytics segmentation models including [YOLO26n-seg](yolo26.md):
| Model | Size<br><sup>(MB)</sup> | Parameters<br><sup>(M)</sup> | Speed (CPU)<br><sup>(ms/im)</sup> |
| ---------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------- | --------------------------------- |
| [Meta SAM-b](sam.md) | 375 | 93.7 | 49401 |
| [Meta SAM2-b](sam-2.md) | 162 | 80.8 | 31901 |
| [Meta SAM2-t](sam-2.md) | 78.1 | 38.9 | 25997 |
| [MobileSAM](mobile-sam.md) | 40.7 | 10.1 | 25381 |
| [FastSAM-s](fast-sam.md) with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.7 | 11.8 | 55.9 |
| Ultralytics [YOLOv8n-seg](yolov8.md) | **6.7** (11.7x smaller) | **3.4** (11.4x less) | **24.5** (1061x faster) |
| Ultralytics [YOLO11n-seg](yolo11.md) | **5.9** (13.2x smaller) | **2.9** (13.4x less) | **30.1** (864x faster) |
| [Meta SAM-b](sam.md) | 375 | 93.7 | 41703 |
| [Meta SAM2-b](sam-2.md) | 162 | 80.8 | 28867 |
| [Meta SAM2-t](sam-2.md) | 78.1 | 38.9 | 23430 |
| [MobileSAM](mobile-sam.md) | 40.7 | 10.1 | 23802 |
| [FastSAM-s](fast-sam.md) with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.9 | 11.8 | 58.0 |
| Ultralytics [YOLOv8n-seg](yolov8.md) | **7.1** (11.0x smaller) | **3.4** (11.4x less) | **24.8** (945x faster) |
| Ultralytics [YOLO11n-seg](yolo11.md) | **6.2** (12.6x smaller) | **2.9** (13.4x less) | **24.3** (964x faster) |
| Ultralytics [YOLO26n-seg](yolo26.md) | **6.7** (11.7x smaller) | **2.7** (14.4x less) | **25.2** (930x faster) |
This comparison demonstrates the substantial differences in model sizes and speeds between SAM variants and YOLO segmentation models. While SAM provides unique automatic segmentation capabilities, YOLO models, particularly YOLOv8n-seg and YOLO11n-seg, are significantly smaller, faster, and more computationally efficient.
This comparison demonstrates the substantial differences in model sizes and speeds between SAM variants and YOLO segmentation models. While SAM provides unique automatic segmentation capabilities, YOLO models, particularly YOLOv8n-seg, YOLO11n-seg and YOLO26n-seg, are significantly smaller, faster, and more computationally efficient.
Tests run on a 2025 Apple M4 Pro with 24GB of RAM using `torch==2.6.0` and `ultralytics==8.3.90`. To reproduce this test:
SAM speeds measured with PyTorch, YOLO speeds measured with ONNX Runtime. Tests run on a 2025 Apple M4 Air with 16GB of RAM using `torch==2.10.0`, `ultralytics==8.4.31`, and `onnxruntime==1.24.4`. To reproduce this test:
!!! example
@ -88,10 +89,12 @@ Tests run on a 2025 Apple M4 Pro with 24GB of RAM using `torch==2.6.0` and `ultr
model.info()
model(ASSETS)
# Profile YOLO models
for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt"]:
# Profile YOLO models (ONNX)
for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt", "yolo26n-seg.pt"]:
model = YOLO(file_name)
model.info()
onnx_path = model.export(format="onnx", dynamic=True)
model = YOLO(onnx_path)
model(ASSETS)
```

View file

@ -1,7 +1,7 @@
---
comments: true
description: Discover MobileSAM, a lightweight and fast image segmentation model for mobile and edge applications. Compare its performance with SAM and YOLO models.
keywords: MobileSAM, image segmentation, lightweight model, fast segmentation, mobile applications, SAM, Tiny-ViT, YOLO, Ultralytics
keywords: MobileSAM, image segmentation, lightweight segmentation, mobile segmentation, MobileSAM vs SAM, MobileSAM vs YOLO, Tiny-ViT, YOLO26-seg, edge AI segmentation, Ultralytics, Meta
---
![MobileSAM lightweight image segmentation model logo](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/logo2.png)
@ -35,21 +35,22 @@ The table below outlines the available MobileSAM model, its pretrained weights,
## MobileSAM Comparison vs YOLO
The following comparison highlights the differences between Meta's SAM variants, MobileSAM, and Ultralytics' smallest segmentation models, including [YOLO11n-seg](../models/yolo11.md):
The following comparison highlights the differences between Meta's SAM variants, MobileSAM, and Ultralytics segmentation models including [YOLO26n-seg](yolo26.md):
| Model | Size<br><sup>(MB)</sup> | Parameters<br><sup>(M)</sup> | Speed (CPU)<br><sup>(ms/im)</sup> |
| ------------------------------------------------------------------------------- | ----------------------- | ---------------------------- | --------------------------------- |
| Meta SAM-b | 375 | 93.7 | 49401 |
| Meta SAM2-b | 162 | 80.8 | 31901 |
| Meta SAM2-t | 78.1 | 38.9 | 25997 |
| MobileSAM | 40.7 | 10.1 | 25381 |
| FastSAM-s with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.7 | 11.8 | 55.9 |
| Ultralytics YOLOv8n-seg | **6.7** (11.7x smaller) | **3.4** (11.4x less) | **24.5** (1061x faster) |
| Ultralytics YOLO11n-seg | **5.9** (13.2x smaller) | **2.9** (13.4x less) | **30.1** (864x faster) |
| Meta SAM-b | 375 | 93.7 | 41703 |
| Meta SAM2-b | 162 | 80.8 | 28867 |
| Meta SAM2-t | 78.1 | 38.9 | 23430 |
| MobileSAM | 40.7 | 10.1 | 23802 |
| FastSAM-s with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.9 | 11.8 | 58.0 |
| Ultralytics YOLOv8n-seg | **7.1** (11.0x smaller) | **3.4** (11.4x less) | **24.8** (945x faster) |
| Ultralytics YOLO11n-seg | **6.2** (12.6x smaller) | **2.9** (13.4x less) | **24.3** (964x faster) |
| Ultralytics YOLO26n-seg | **6.7** (11.7x smaller) | **2.7** (14.4x less) | **25.2** (930x faster) |
This comparison demonstrates the substantial differences in model size and speed between SAM variants and YOLO segmentation models. While SAM models offer unique automatic segmentation capabilities, YOLO models—especially YOLOv8n-seg and YOLO11n-seg—are significantly smaller, faster, and more computationally efficient.
This comparison demonstrates the substantial differences in model size and speed between SAM variants and YOLO segmentation models. While SAM models offer unique automatic segmentation capabilities, YOLO models—especially YOLOv8n-seg, YOLO11n-seg and YOLO26n-seg—are significantly smaller, faster, and more computationally efficient.
Tests were conducted on a 2025 Apple M4 Pro with 24GB RAM using `torch==2.6.0` and `ultralytics==8.3.90`. To reproduce these results:
SAM speeds measured with PyTorch, YOLO speeds measured with ONNX Runtime. Tests run on a 2025 Apple M4 Air with 16GB of RAM using `torch==2.10.0`, `ultralytics==8.4.31`, and `onnxruntime==1.24.4`. To reproduce these results:
!!! example
@ -69,10 +70,12 @@ Tests were conducted on a 2025 Apple M4 Pro with 24GB RAM using `torch==2.6.0` a
model.info()
model(ASSETS)
# Profile YOLO models
for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt"]:
# Profile YOLO models (ONNX)
for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt", "yolo26n-seg.pt"]:
model = YOLO(file_name)
model.info()
onnx_path = model.export(format="onnx", dynamic=True)
model = YOLO(onnx_path)
model(ASSETS)
```
@ -182,7 +185,7 @@ To automatically annotate your dataset with the Ultralytics framework, use the `
```python
from ultralytics.data.annotator import auto_annotate
auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="mobile_sam.pt")
auto_annotate(data="path/to/images", det_model="yolo26x.pt", sam_model="mobile_sam.pt")
```
{% include "macros/sam-auto-annotate.md" %}

View file

@ -48,13 +48,19 @@ YOLO12, released in early 2025, introduces an attention-centric architecture tha
YOLO12 supports a variety of computer vision tasks. The table below shows task support and the operational modes (Inference, Validation, Training, and Export) enabled for each:
| Model Type | Task | Inference | Validation | Training | Export |
| -------------------------------------------------------------------------------------------------------------- | -------------------------------------- | --------- | ---------- | -------- | ------ |
| [YOLO12](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12.yaml) | [Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-seg](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-seg.yaml) | [Segmentation](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-pose](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-pose.yaml) | [Pose](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-cls](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-cls.yaml) | [Classification](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-obb](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-obb.yaml) | [OBB](../tasks/obb.md) | ✅ | ✅ | ✅ | ✅ |
!!! warning "Pretrained weights availability"
Only detection weights (`yolo12n.pt`, `yolo12s.pt`, `yolo12m.pt`, `yolo12l.pt`, `yolo12x.pt`) are released on [ultralytics/assets](https://github.com/ultralytics/assets/releases). Segmentation, classification, pose, and OBB architectures are defined in [ultralytics/cfg/models/12/](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/12), so those variants support training from scratch from the `.yaml` config, but no pretrained `.pt` files are currently available for them. For pretrained segmentation, pose, classification, or OBB checkpoints, Ultralytics recommends [YOLO11](yolo11.md) or [YOLO26](yolo26.md).
| Model Type | Task | Pretrained Weights | Inference | Validation | Training | Export |
| -------------------------------------------------------------------------------------------------------------- | -------------------------------------- | ------------------ | --------- | ---------- | -------- | ------ |
| [YOLO12](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12.yaml) | [Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-seg](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-seg.yaml) | [Segmentation](../tasks/segment.md) | ❌ | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-pose](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-pose.yaml) | [Pose](../tasks/pose.md) | ❌ | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-cls](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-cls.yaml) | [Classification](../tasks/classify.md) | ❌ | ✅ | ✅ | ✅ | ✅ |
| [YOLO12-obb](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-obb.yaml) | [OBB](../tasks/obb.md) | ❌ | ✅ | ✅ | ✅ | ✅ |
All YOLO12 architectures support every mode once a trained checkpoint is available. The `Pretrained Weights` column indicates only whether Ultralytics publishes an official pretrained `.pt` on [ultralytics/assets](https://github.com/ultralytics/assets/releases): for segmentation, pose, classification, and OBB, you must train your own checkpoint from the corresponding `.yaml` before running inference, validation, or export.
## Performance Metrics
@ -173,7 +179,7 @@ YOLO12 incorporates several key innovations to balance speed and accuracy. The A
### What [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks does YOLO12 support?
YOLO12 is a versatile model that supports a wide range of core computer vision tasks. It excels in object [detection](../tasks/detect.md), instance [segmentation](../tasks/segment.md), image [classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and oriented object detection (OBB) ([see details](../tasks/obb.md)). This comprehensive task support makes YOLO12 a powerful tool for diverse applications, from [robotics](https://www.ultralytics.com/glossary/robotics) and autonomous driving to medical imaging and industrial inspection. Each of these tasks can be performed in Inference, Validation, Training, and Export modes.
YOLO12 is a versatile model that supports a wide range of core computer vision tasks. It excels in object [detection](../tasks/detect.md), instance [segmentation](../tasks/segment.md), image [classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and oriented object detection (OBB) ([see details](../tasks/obb.md)). This comprehensive task support makes YOLO12 a powerful tool for diverse applications, from [robotics](https://www.ultralytics.com/glossary/robotics) and autonomous driving to medical imaging and industrial inspection. Note that pretrained `.pt` weights are currently published for detection only; the segmentation, pose, classification, and OBB architectures are provided as `.yaml` configs for training from scratch.
### How does YOLO12 compare to other YOLO models and competitors like RT-DETR?

View file

@ -50,7 +50,7 @@ We regret any inconvenience this may cause and will strive to update this docume
YOLOv4 is a powerful and efficient object detection model that strikes a balance between speed and accuracy. Its use of unique features and bag of freebies techniques during training allows it to perform excellently in real-time object detection tasks. YOLOv4 can be trained and used by anyone with a conventional GPU, making it accessible and practical for a wide range of applications including [surveillance systems](https://www.ultralytics.com/blog/shattering-the-surveillance-status-quo-with-vision-ai), [autonomous vehicles](https://www.ultralytics.com/solutions/ai-in-automotive), and [industrial automation](https://www.ultralytics.com/blog/improving-manufacturing-with-computer-vision).
For those looking to implement object detection in their projects, YOLOv4 remains a strong contender, especially when real-time performance is a priority. While Ultralytics currently focuses on supporting newer YOLO versions like [YOLOv8](https://docs.ultralytics.com/models/yolov8/) and [YOLO11](https://docs.ultralytics.com/models/yolo11/), the architectural innovations introduced in YOLOv4 have influenced the development of these later models.
For those looking to implement object detection in their projects, YOLOv4 remains a strong contender, especially when real-time performance is a priority. While Ultralytics currently focuses on supporting newer YOLO versions like [YOLO11](yolo11.md) and [YOLO26](yolo26.md), the architectural innovations introduced in YOLOv4 have influenced the development of these later models.
## Citations and Acknowledgments

View file

@ -367,7 +367,7 @@ Below are code examples for using each source type:
Example `.streams` text file:
```text
```
rtsp://example.com/media1.mp4
rtsp://example.com/media2.mp4
rtmp://example2.com/live

View file

@ -415,6 +415,49 @@ To resume training from an interrupted session, set the `resume` argument to `Tr
Check the section on [Resuming Interrupted Trainings](#resuming-interrupted-trainings) for more information.
### How do I train a model on an imbalanced dataset?
Class imbalance occurs when some classes have significantly fewer examples than others in your training data. This can cause the model to perform poorly on rare classes. Ultralytics YOLO supports class weighting through the `cls_pw` argument to address this issue.
The `cls_pw` argument controls class weighting power based on inverse class frequency:
- `cls_pw=0.0` (default): Disables class weighting
- `cls_pw=1.0`: Applies full inverse frequency weighting
- Values between `0.0` and `1.0`: Provide partial weighting for moderate imbalance
The class weights are computed as `(1.0 / class_counts) ^ cls_pw` and normalized so their mean equals 1.0.
!!! example "Training on Imbalanced Dataset"
=== "Python"
```python
from ultralytics import YOLO
# Load a pretrained model
model = YOLO("yolo26n.pt")
# Train with full class weighting for severely imbalanced data
results = model.train(data="custom.yaml", epochs=100, imgsz=640, cls_pw=1.0)
# Or use partial weighting (0.25) for moderate imbalance
results = model.train(data="custom.yaml", epochs=100, imgsz=640, cls_pw=0.25)
```
=== "CLI"
```bash
# Train with full inverse frequency weighting
yolo detect train data=custom.yaml model=yolo26n.pt epochs=100 imgsz=640 cls_pw=1.0
# Train with partial weighting for moderate imbalance
yolo detect train data=custom.yaml model=yolo26n.pt epochs=100 imgsz=640 cls_pw=0.25
```
!!! tip
Start with `cls_pw=0.25` for moderately imbalanced datasets and increase to `1.0` if the rare classes still underperform. You can check the computed class weights in the training logs to verify the weight distribution.
### Can I train YOLO26 models on Apple silicon chips?
Yes, Ultralytics YOLO26 supports training on Apple silicon chips utilizing the Metal Performance Shaders (MPS) framework. Specify 'mps' as your training device.

View file

@ -103,7 +103,7 @@ Use the search bar to find events by resource name or event description.
Filter by time period using the date range picker:
- Select a start and end date
- No default date filter (shows all events)
- The page defaults to the last 30 days
- Custom date ranges supported
![Ultralytics Platform Activity Page Date Range Picker Expanded](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/activity-page-date-range-picker-expanded.avif)
@ -123,7 +123,7 @@ Each event displays:
Some actions support undo directly from the Activity feed:
- **Settings changes**: Click **Undo** next to a settings update event to revert the change
- **Settings changes**: Click **Undo** next to a recent settings update event to revert the change
- Undo is available for a short time window after the action
## Pagination
@ -134,9 +134,13 @@ The Activity feed supports pagination:
- Navigate between pages using the pagination controls
- Page size is configurable via URL query parameter
## API Access
## API Routes
Access activity programmatically via the [REST API](../api/index.md#activity-api):
The Activity feed is powered by browser-authenticated routes — it is not exposed as a public API and cannot be accessed with an API key. The route shapes are listed below for reference; to view, mark, or archive activity, use the Activity feed in the platform UI.
!!! note "Browser Session Only"
The routes shown below require an active platform browser session. The `Authorization: Bearer YOUR_API_KEY` header in the examples will not authenticate these routes — they are documented only to describe how the in-app feed talks to the server.
=== "List Activity"

View file

@ -14,11 +14,10 @@ keywords: Ultralytics Platform, API keys, authentication, remote training, secur
Create a new API key:
1. Go to **Settings > Profile**
2. Scroll to the **API Keys** section
3. Click **Create Key**
4. Enter a name for the key (e.g., "Training Server")
5. Click **Create Key**
1. Go to **Settings > API Keys**
2. Click **Create Key**
3. Enter a name for the key (e.g., "Training Server")
4. Click **Create Key**
![Ultralytics Platform Settings Profile Tab Create Api Key Dialog](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/settings-profile-tab-create-api-key-dialog.avif)
@ -115,10 +114,10 @@ Enable metric streaming with your key.
!!! warning "Package Version Requirement"
Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform.
Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform.
```bash
pip install "ultralytics>=8.4.14"
pip install "ultralytics>=8.4.35"
```
```bash
@ -132,7 +131,7 @@ See [Cloud Training](../train/cloud-training.md#remote-training) for the complet
### View Keys
All keys are listed in `Settings > Profile` under the API Keys section:
All keys are listed on the `Settings > API Keys` tab:
Each key card shows the key name, the full decrypted key value (copyable), relative creation time, and a revoke button.
@ -208,7 +207,7 @@ Solutions:
1. Verify key is copied correctly (including the `ul_` prefix)
2. Check key hasn't been revoked
3. Confirm environment variable is set
4. Ensure you're using `ultralytics>=8.4.14`
4. Ensure you're using `ultralytics>=8.4.35`
### Permission Denied
@ -246,7 +245,7 @@ Keys don't expire automatically. They remain valid until revoked. Consider imple
### Can I see my key after creation?
Yes, full key values are visible in the key list on `Settings > Profile`. The Platform decrypts and displays your keys so you can copy them anytime.
Yes, full key values are visible in the key list on `Settings > API Keys`. The Platform decrypts and displays your keys so you can copy them anytime.
### Are keys region-specific?

View file

@ -84,7 +84,7 @@ For organizations with advanced needs:
- SLA guarantees (coming soon)
- Enterprise support
Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) for Enterprise pricing.
See [Ultralytics Licensing](https://www.ultralytics.com/license) for Enterprise plan details.
## Credits

View file

@ -129,7 +129,7 @@ Yes, Ultralytics Platform implements:
No, data region is selected during signup and cannot be changed. To use a different region:
1. Export your data
2. Create a new account in desired region
2. Create a new account in the desired region
3. Re-upload your data
This ensures data residency compliance.

View file

@ -8,11 +8,11 @@ keywords: Ultralytics Platform, settings, profile, preferences, GDPR, data expor
[Ultralytics Platform](https://platform.ultralytics.com) settings allow you to configure your profile, social links, workspace preferences, and manage your data with GDPR-compliant export and deletion options.
Settings is organized into five tabs: `Profile`, `Plans`, `Billing`, `Teams`, and `Trash`.
Settings is organized into six tabs: `Profile`, `API Keys`, `Plans`, `Billing`, `Teams`, and `Trash`.
## Profile Tab
The `Profile` tab contains your profile information, social links, API keys, data region, and account management options.
The `Profile` tab contains your profile information, social links, data region, and account management options.
### Profile Information
@ -95,10 +95,6 @@ Manage email addresses linked to your account in the `Profile` tab:
Your primary email is used for notifications and account recovery. Only verified emails can be set as primary.
### API Keys
API keys are managed directly on the `Profile` tab. See [API Keys](api-keys.md) for full documentation.
### Data Region
View your data region on the `Profile` tab:
@ -221,6 +217,10 @@ Permanently delete your account:
- Server logs (90 days)
- Legal compliance records
## API Keys Tab
The `API Keys` tab lets you create and manage API keys for remote training and inference. See [API Keys](api-keys.md) for full documentation.
## Plans Tab
The `Plans` tab lets you compare available plans and upgrade or downgrade your subscription.
@ -237,7 +237,7 @@ From this tab you can:
- **Compare features** across Free, Pro, and Enterprise tiers
- **Upgrade to Pro** to unlock more storage, models, team collaboration, and priority GPU access
- **Contact Sales** for Enterprise needs including SSO/SAML and commercial licensing
- **Review Enterprise** capabilities including SSO/SAML, RBAC, and commercial licensing — see [Ultralytics Licensing](https://www.ultralytics.com/license)
See [Billing](billing.md) for detailed plan information, pricing, and upgrade instructions.

View file

@ -21,7 +21,7 @@ Teams allow multiple users to work together under a shared workspace:
!!! note "Plan Requirement"
Creating a team requires a [Pro or Enterprise plan](billing.md#plans). You can upgrade from Settings or when clicking **+ Create Team** in the workspace switcher.
Team workspaces require a [Pro or Enterprise plan](billing.md#plans). You can start team setup before upgrading, but the workspace must be on a Pro or Enterprise plan to use team features.
## Creating a Team
@ -87,7 +87,7 @@ Admins and Owners can invite new members to the team:
![Ultralytics Platform Teams Invite Member Dialog](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/settings-teams-invite-member-dialog.avif)
The invitee receives an email invitation with a link to accept and join the team. Invitations expire after 7 days. Once accepted, the team workspace appears in the invitee's workspace switcher.
The invitee receives an email invitation with a link to accept and join the team. Invitations expire after 7 days. Once accepted, the team workspace appears in the invitee's workspace switcher. If an invite is missed, resend or cancel it from the Teams tab and send a fresh invite.
!!! note "Admin Invites"
@ -101,17 +101,11 @@ Enterprise plans include additional capabilities for organizations with advanced
!!! warning "License Expiration"
If your Enterprise license expires, workspace access is blocked until renewed. Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) to renew.
If your Enterprise license expires, workspace access is blocked until the license is renewed. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details.
### Getting Started with Enterprise
Enterprise plans are provisioned by the Ultralytics team:
1. Contact [sales@ultralytics.com](mailto:sales@ultralytics.com)
2. Discuss your team size, credit needs, and compliance requirements
3. Receive a provisioning invite with your enterprise configuration
4. Accept the invite to become the team Owner
5. Invite your team members
Enterprise plans are provisioned by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details. Once your enterprise configuration is set up, you'll receive a provisioning invite to accept as the team Owner, after which you can invite your team members.
## FAQ
@ -129,4 +123,4 @@ All team members share a single credit balance. The Owner and Admins can top up
### How do I upgrade from Pro to Enterprise?
Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) to discuss Enterprise pricing and provisioning. The Ultralytics team will handle the upgrade and configuration.
Enterprise pricing and provisioning are handled directly by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details.

View file

@ -87,6 +87,8 @@ Recover a deleted item:
The item returns to its original location with all data intact.
If the original slug is already taken, the platform restores the item with a unique available slug so you can access it immediately.
### Restore Behavior
| Resource | Restore Behavior |
@ -115,7 +117,7 @@ Permanently delete all items immediately:
!!! warning "Irreversible Action"
Emptying Trash permanently deletes all items immediately. This action cannot be undone and all data will be lost.
Emptying Trash permanently deletes all items immediately. This action cannot be undone and all data will be lost, including attached deployments, export jobs, and stored files tied to the trashed resources.
### Delete Single Item Permanently
@ -125,6 +127,8 @@ To permanently delete one item without waiting:
2. Click the **Delete** button
3. Confirm deletion
For projects, permanent deletion also removes related deployments and export files that belong to the deleted workspace resources.
## Storage and Trash
Items in Trash still count toward your storage quota:

View file

@ -52,11 +52,11 @@ graph LR
## Authentication
Most API requests require authentication via API key. Public endpoints (listing public datasets, projects, and models) support anonymous read access without a key.
Resource APIs such as datasets, projects, models, training, exports, and predictions use API-key authentication. Public endpoints (listing public datasets, projects, and models) support anonymous read access without a key. Account-oriented routes — including activity, settings, teams, billing, and GDPR flows — currently require an authenticated browser session and are not available via API key.
### Get API Key
1. Go to `Settings` > `Profile` (API Keys section)
1. Go to `Settings` > `API Keys`
2. Click `Create Key`
3. Copy the generated key
@ -66,7 +66,7 @@ See [API Keys](../account/api-keys.md) for detailed instructions.
Include your API key in all requests:
```
```http
Authorization: Bearer YOUR_API_KEY
```
@ -122,7 +122,7 @@ The API uses a two-layer rate limiting system to protect against abuse while kee
When throttled, the API returns `429` with retry metadata:
```
```http
Retry-After: 12
X-RateLimit-Reset: 2026-02-21T12:34:56.000Z
```
@ -145,7 +145,7 @@ Each category has an independent counter per API key. For example, making 20 pre
### Dedicated Endpoints (Unlimited)
[Dedicated endpoints](../deploy/endpoints.md) are **not subject to API key rate limits**. When you deploy a model to a dedicated endpoint, requests to that endpoint URL (e.g., `https://predict-abc123.run.app/predict`) go directly to your dedicated service with no rate limiting from the Platform. You're paying for the compute, so you get unlimited throughput up to your endpoint's scaling configuration.
[Dedicated endpoints](../deploy/endpoints.md) are **not subject to API key rate limits**. When you deploy a model to a dedicated endpoint, requests to that endpoint URL (e.g., `https://predict-abc123.run.app/predict`) go directly to your dedicated service with no rate limiting from the Platform. You're paying for the compute, so you get throughput from your dedicated service configuration rather than the shared API limits.
!!! tip "Handling Rate Limits"
@ -192,7 +192,7 @@ Create, browse, and manage labeled image datasets for training YOLO models. See
### List Datasets
```
```http
GET /api/datasets
```
@ -262,7 +262,7 @@ GET /api/datasets
### Get Dataset
```
```http
GET /api/datasets/{datasetId}
```
@ -270,7 +270,7 @@ Returns full dataset details including metadata, class names, and split counts.
### Create Dataset
```
```http
POST /api/datasets
```
@ -293,7 +293,7 @@ POST /api/datasets
### Update Dataset
```
```http
PATCH /api/datasets/{datasetId}
```
@ -309,7 +309,7 @@ PATCH /api/datasets/{datasetId}
### Delete Dataset
```
```http
DELETE /api/datasets/{datasetId}
```
@ -317,7 +317,7 @@ Soft-deletes the dataset (moved to [trash](../account/trash.md), recoverable for
### Clone Dataset
```
```http
POST /api/datasets/{datasetId}/clone
```
@ -336,7 +336,7 @@ Creates a copy of the dataset with all images and labels. Only public datasets c
### Export Dataset
```
```http
GET /api/datasets/{datasetId}/export
```
@ -359,7 +359,7 @@ Returns a JSON response with a signed download URL for the latest dataset export
### Create Dataset Version
```
```http
POST /api/datasets/{datasetId}/export
```
@ -386,7 +386,7 @@ All fields are optional. The `description` field is a user-provided label for th
### Update Version Description
```
```http
PATCH /api/datasets/{datasetId}/export
```
@ -411,7 +411,7 @@ Update the description of an existing version. Owner-only.
### Get Class Statistics
```
```http
GET /api/datasets/{datasetId}/class-stats
```
@ -454,7 +454,7 @@ Returns class distribution, location heatmap, and dimension statistics. Results
### Get Models Trained on Dataset
```
```http
GET /api/datasets/{datasetId}/models
```
@ -495,7 +495,7 @@ Returns models that were trained using this dataset.
### Auto-Annotate Dataset
```
```http
POST /api/datasets/{datasetId}/predict
```
@ -512,7 +512,7 @@ Run YOLO inference on dataset images to auto-generate annotations. Uses a select
### Dataset Ingest
```
```http
POST /api/datasets/ingest
```
@ -532,7 +532,7 @@ graph LR
#### List Images
```
```http
GET /api/datasets/{datasetId}/images
```
@ -552,7 +552,7 @@ GET /api/datasets/{datasetId}/images
#### Get Signed Image URLs
```
```http
POST /api/datasets/{datasetId}/images/urls
```
@ -560,13 +560,13 @@ Get signed URLs for a batch of image hashes (for display in the browser).
#### Delete Image
```
```http
DELETE /api/datasets/{datasetId}/images/{hash}
```
#### Get Image Labels
```
```http
GET /api/datasets/{datasetId}/images/{hash}/labels
```
@ -574,7 +574,7 @@ Returns annotations and class names for a specific image.
#### Update Image Labels
```
```http
PUT /api/datasets/{datasetId}/images/{hash}/labels
```
@ -594,13 +594,13 @@ PUT /api/datasets/{datasetId}/images/{hash}/labels
Move images between splits (train/val/test) within a dataset:
```
```http
PATCH /api/datasets/{datasetId}/images/bulk
```
Bulk delete images:
```
```http
DELETE /api/datasets/{datasetId}/images/bulk
```
@ -612,7 +612,7 @@ Organize your models into projects. Each model belongs to one project. See [Proj
### List Projects
```
```http
GET /api/projects
```
@ -626,13 +626,13 @@ GET /api/projects
### Get Project
```
```http
GET /api/projects/{projectId}
```
### Create Project
```
```http
POST /api/projects
```
@ -659,13 +659,13 @@ POST /api/projects
### Update Project
```
```http
PATCH /api/projects/{projectId}
```
### Delete Project
```
```http
DELETE /api/projects/{projectId}
```
@ -673,7 +673,7 @@ Soft-deletes the project (moved to [trash](../account/trash.md)).
### Clone Project
```
```http
POST /api/projects/{projectId}/clone
```
@ -681,13 +681,13 @@ POST /api/projects/{projectId}/clone
Upload a project icon (multipart form with image file):
```
```http
POST /api/projects/{projectId}/icon
```
Remove the project icon:
```
```http
DELETE /api/projects/{projectId}/icon
```
@ -699,7 +699,7 @@ Manage trained YOLO models — view metrics, download weights, run inference, an
### List Models
```
```http
GET /api/models
```
@ -714,7 +714,7 @@ GET /api/models
### List Completed Models
```
```http
GET /api/models/completed
```
@ -722,13 +722,13 @@ Returns models that have finished training (for use in model selectors and deplo
### Get Model
```
```http
GET /api/models/{modelId}
```
### Create Model
```
```http
POST /api/models
```
@ -748,19 +748,19 @@ POST /api/models
### Update Model
```
```http
PATCH /api/models/{modelId}
```
### Delete Model
```
```http
DELETE /api/models/{modelId}
```
### Download Model Files
```
```http
GET /api/models/{modelId}/files
```
@ -768,7 +768,7 @@ Returns signed download URLs for model files.
### Clone Model
```
```http
POST /api/models/{modelId}/clone
```
@ -794,7 +794,7 @@ Clone a public model to one of your projects.
### Track Download
```
```http
POST /api/models/{modelId}/track-download
```
@ -802,7 +802,7 @@ Track model download analytics.
### Run Inference
```
```http
POST /api/models/{modelId}/predict
```
@ -863,7 +863,7 @@ POST /api/models/{modelId}/predict
### Get Predict Token
```
```http
POST /api/models/{modelId}/predict/token
```
@ -871,7 +871,7 @@ Get a short-lived token for direct prediction requests. The token bypasses the A
### Warmup Model
```
```http
POST /api/models/{modelId}/predict/warmup
```
@ -895,7 +895,7 @@ graph LR
### Start Training
```
```http
POST /api/training/start
```
@ -947,7 +947,7 @@ POST /api/training/start
### Get Training Status
```
```http
GET /api/models/{modelId}/training
```
@ -955,7 +955,7 @@ Returns the current training job status, metrics, and progress for a model.
### Cancel Training
```
```http
DELETE /api/models/{modelId}/training
```
@ -965,7 +965,7 @@ Terminates the running compute instance and marks the job as cancelled.
## Deployments API
Deploy models to dedicated inference endpoints with auto-scaling, health checks, and monitoring. See [Endpoints documentation](../deploy/endpoints.md).
Deploy models to dedicated inference endpoints with health checks and monitoring. New deployments use scale-to-zero by default, and the API accepts an optional `resources` object. See [Endpoints documentation](../deploy/endpoints.md).
```mermaid
graph LR
@ -980,7 +980,7 @@ graph LR
### List Deployments
```
```http
GET /api/deployments
```
@ -995,7 +995,7 @@ GET /api/deployments
### Create Deployment
```
```http
POST /api/deployments
```
@ -1015,34 +1015,38 @@ POST /api/deployments
}
```
| Field | Type | Required | Description |
| ----------- | ------ | -------- | ------------------------------------------------------------------ |
| `modelId` | string | Yes | Model ID to deploy |
| `name` | string | Yes | Deployment name |
| `region` | string | Yes | Deployment region |
| `resources` | object | No | Resource configuration (cpu, memoryGi, minInstances, maxInstances) |
| Field | Type | Required | Description |
| ----------- | ------ | -------- | -------------------------------------------------------------------------- |
| `modelId` | string | Yes | Model ID to deploy |
| `name` | string | Yes | Deployment name |
| `region` | string | Yes | Deployment region |
| `resources` | object | No | Resource configuration (`cpu`, `memoryGi`, `minInstances`, `maxInstances`) |
Creates a dedicated inference endpoint in the specified region. The endpoint is globally accessible via a unique URL.
!!! note "Default Resources"
The deployment dialog currently submits fixed defaults of `cpu=1`, `memoryGi=2`, `minInstances=0`, and `maxInstances=1`. The API route accepts a `resources` object, but plan limits cap `minInstances` at `0` and `maxInstances` at `1`.
!!! tip "Region Selection"
Choose a region close to your users for lowest latency. The platform UI shows latency estimates for all 43 available regions.
### Get Deployment
```
```http
GET /api/deployments/{deploymentId}
```
### Delete Deployment
```
```http
DELETE /api/deployments/{deploymentId}
```
### Start Deployment
```
```http
POST /api/deployments/{deploymentId}/start
```
@ -1050,7 +1054,7 @@ Resume a stopped deployment.
### Stop Deployment
```
```http
POST /api/deployments/{deploymentId}/stop
```
@ -1058,7 +1062,7 @@ Pause a running deployment (stops billing).
### Health Check
```
```http
GET /api/deployments/{deploymentId}/health
```
@ -1066,7 +1070,7 @@ Returns the health status of the deployment endpoint.
### Run Inference on Deployment
```
```http
POST /api/deployments/{deploymentId}/predict
```
@ -1083,7 +1087,7 @@ Send an image directly to a deployment endpoint for inference. Functionally equi
### Get Metrics
```
```http
GET /api/deployments/{deploymentId}/metrics
```
@ -1098,7 +1102,7 @@ Returns request counts, latency, and error rate metrics with sparkline data.
### Get Logs
```
```http
GET /api/deployments/{deploymentId}/logs
```
@ -1116,7 +1120,7 @@ GET /api/deployments/{deploymentId}/logs
### Aggregated Metrics
```
```http
GET /api/monitoring
```
@ -1130,7 +1134,7 @@ Convert models to optimized formats like ONNX, TensorRT, CoreML, and TFLite for
### List Exports
```
```http
GET /api/exports
```
@ -1144,7 +1148,7 @@ GET /api/exports
### Create Export
```
```http
POST /api/exports
```
@ -1202,19 +1206,19 @@ POST /api/exports
### Get Export Status
```
```http
GET /api/exports/{exportId}
```
### Cancel Export
```
```http
DELETE /api/exports/{exportId}
```
### Track Export Download
```
```http
POST /api/exports/{exportId}/track-download
```
@ -1224,9 +1228,13 @@ POST /api/exports/{exportId}/track-download
View a feed of recent actions on your account — training runs, uploads, and more. See [Activity documentation](../account/activity.md).
!!! note "Browser Session Only"
The Activity routes are powered by browser-authenticated requests from the platform UI. They are not exposed as a public API, do not accept API-key authentication, and the route shapes below are documented only for reference. Use the Activity feed in the platform UI to view, mark, or archive events.
### List Activity
```
```http
GET /api/activity
```
@ -1241,7 +1249,7 @@ GET /api/activity
### Mark Events Seen
```
```http
POST /api/activity/mark-seen
```
@ -1263,7 +1271,7 @@ Or pass specific IDs:
### Archive Events
```
```http
POST /api/activity/archive
```
@ -1293,7 +1301,7 @@ View and restore deleted items. Items are permanently removed after 30 days. See
### List Trash
```
```http
GET /api/trash
```
@ -1308,7 +1316,7 @@ GET /api/trash
### Restore Item
```
```http
POST /api/trash
```
@ -1323,7 +1331,7 @@ POST /api/trash
### Permanently Delete Item
```
```http
DELETE /api/trash
```
@ -1342,12 +1350,16 @@ DELETE /api/trash
### Empty Trash
```
```http
DELETE /api/trash/empty
```
Permanently deletes all items in trash.
!!! note "Authentication"
`DELETE /api/trash/empty` requires an authenticated browser session and is not available via API key. Use the **Empty Trash** button in the UI instead.
---
## Billing API
@ -1360,7 +1372,7 @@ Check your credit balance, purchase credits, view transaction history, and confi
### Get Balance
```
```http
GET /api/billing/balance
```
@ -1385,7 +1397,7 @@ GET /api/billing/balance
### Get Usage Summary
```
```http
GET /api/billing/usage-summary
```
@ -1393,7 +1405,7 @@ Returns plan details, limits, and usage metrics.
### Get Transactions
```
```http
GET /api/billing/transactions
```
@ -1407,7 +1419,7 @@ Returns transaction history (most recent first).
### Create Checkout Session
```
```http
POST /api/billing/checkout-session
```
@ -1429,7 +1441,7 @@ Creates a checkout session for credit purchase.
### Create Subscription Checkout
```
```http
POST /api/billing/subscription-checkout
```
@ -1451,13 +1463,21 @@ Creates a checkout session for Pro subscription upgrade.
| `billingCycle` | string | No | Billing cycle: `monthly` (default) or `yearly` |
| `owner` | string | No | Team username for workspace upgrades (requires admin role) |
### Create Portal Session
### Cancel or Resume Subscription
```
POST /api/billing/portal-session
```http
DELETE /api/billing/subscription-checkout
```
Returns URL to billing portal for subscription management.
Cancels a Pro subscription at period end by default. Send `{"resume": true}` to resume an already scheduled cancellation before the billing period ends.
**Body:**
```json
{
"resume": true
}
```
### Auto Top-Up
@ -1465,7 +1485,7 @@ Automatically add credits when balance falls below a threshold.
#### Get Auto Top-Up Config
```
```http
GET /api/billing/auto-topup
```
@ -1477,7 +1497,7 @@ GET /api/billing/auto-topup
#### Update Auto Top-Up Config
```
```http
PATCH /api/billing/auto-topup
```
@ -1495,13 +1515,13 @@ PATCH /api/billing/auto-topup
#### List Payment Methods
```
```http
GET /api/billing/payment-methods
```
#### Create Setup Intent
```
```http
POST /api/billing/payment-methods/setup
```
@ -1509,7 +1529,7 @@ Returns a client secret for adding a new payment method.
#### Set Default Payment Method
```
```http
POST /api/billing/payment-methods/default
```
@ -1523,7 +1543,7 @@ POST /api/billing/payment-methods/default
#### Update Billing Info
```
```http
PATCH /api/billing/payment-methods
```
@ -1544,7 +1564,7 @@ PATCH /api/billing/payment-methods
#### Delete Payment Method
```
```http
DELETE /api/billing/payment-methods/{id}
```
@ -1556,7 +1576,7 @@ Check your storage usage breakdown by category (datasets, models, exports) and s
### Get Storage Info
```
```http
GET /api/storage
```
@ -1605,7 +1625,7 @@ GET /api/storage
### Recalculate Storage
```
```http
POST /api/storage
```
@ -1619,7 +1639,7 @@ Upload files directly to cloud storage using signed URLs for fast, reliable tran
### Get Signed Upload URL
```
```http
POST /api/upload/signed-url
```
@ -1659,7 +1679,7 @@ Request a signed URL for uploading a file directly to cloud storage. The signed
### Complete Upload
```
```http
POST /api/upload/complete
```
@ -1685,13 +1705,13 @@ Manage your API keys for programmatic access. See [API Keys documentation](../ac
### List API Keys
```
```http
GET /api/api-keys
```
### Create API Key
```
```http
POST /api/api-keys
```
@ -1705,7 +1725,7 @@ POST /api/api-keys
### Delete API Key
```
```http
DELETE /api/api-keys
```
@ -1731,13 +1751,13 @@ Create team workspaces, invite members, and manage roles for collaboration. See
### List Teams
```
```http
GET /api/teams
```
### Create Team
```
```http
POST /api/teams/create
```
@ -1752,7 +1772,7 @@ POST /api/teams/create
### List Members
```
```http
GET /api/members
```
@ -1760,7 +1780,7 @@ Returns members of the current workspace.
### Invite Member
```
```http
POST /api/members
```
@ -1785,19 +1805,19 @@ POST /api/members
### Update Member Role
```
```http
PATCH /api/members/{userId}
```
### Remove Member
```
```http
DELETE /api/members/{userId}
```
### Transfer Ownership
```
```http
POST /api/members/transfer-ownership
```
@ -1805,13 +1825,13 @@ POST /api/members/transfer-ownership
#### Accept Invite
```
```http
POST /api/invites/accept
```
#### Get Invite Info
```
```http
GET /api/invites/info
```
@ -1823,13 +1843,13 @@ GET /api/invites/info
#### Revoke Invite
```
```http
DELETE /api/invites/{inviteId}
```
#### Resend Invite
```
```http
POST /api/invites/{inviteId}/resend
```
@ -1841,7 +1861,7 @@ Search and browse public datasets and projects shared by the community. See [Exp
### Search Public Content
```
```http
GET /api/explore/search
```
@ -1856,7 +1876,7 @@ GET /api/explore/search
### Sidebar Data
```
```http
GET /api/explore/sidebar
```
@ -1870,7 +1890,7 @@ Manage your profile, API keys, storage usage, and data privacy settings. See [Se
### Get User by Username
```
```http
GET /api/users
```
@ -1882,7 +1902,7 @@ GET /api/users
### Follow or Unfollow User
```
```http
PATCH /api/users
```
@ -1897,7 +1917,7 @@ PATCH /api/users
### Check Username Availability
```
```http
GET /api/username/check
```
@ -1910,7 +1930,7 @@ GET /api/username/check
### Settings
```
```http
GET /api/settings
POST /api/settings
```
@ -1919,7 +1939,7 @@ Get or update user profile settings (display name, bio, social links, etc.).
### Profile Icon
```
```http
POST /api/settings/icon
DELETE /api/settings/icon
```
@ -1928,7 +1948,7 @@ Upload or remove profile avatar.
### Onboarding
```
```http
POST /api/onboarding
```
@ -1942,7 +1962,7 @@ Request an export of all your data or permanently delete your account. See [Sett
### Get GDPR Job Status
```
```http
GET /api/gdpr
```
@ -1956,7 +1976,7 @@ Returns job status. For completed export jobs, response includes a `downloadUrl`
### Start Export or Delete Flow
```
```http
POST /api/gdpr
```
@ -2022,7 +2042,7 @@ yolo check
!!! warning "Package Version Requirement"
Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform.
Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform.
### Authentication
@ -2163,7 +2183,7 @@ Webhooks notify your server of Platform events via HTTP POST callbacks:
**All plans**: Training webhooks via the Python SDK (real-time metrics, completion notifications) work automatically on every plan -- no configuration required.
**Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. [Contact sales](https://www.ultralytics.com/contact) for details.
**Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details.
---

View file

@ -243,10 +243,9 @@ Smart annotation adds model-assisted annotation to the editor. In Smart mode, yo
With a SAM model selected:
1. Enter edit mode and select `Smart` or press `S`
2. **Left-click** to add positive points (include this area)
3. **Right-click** to add negative points (exclude this area)
4. SAM generates a precise mask in real-time
5. Press `Enter` or `Escape` to save the annotation, or enable **auto-apply** for one-click workflows
2. Click on the object you want to annotate — SAM generates an initial mask in real-time
3. Refine the mask with additional clicks: click **outside** the current mask to add coverage, or click **inside** the current mask to subtract regions
4. Press `Enter` or `Escape` to save the annotation, or enable **auto-apply** for one-click workflows
![Ultralytics Platform Annotate Sam Positive Negative Points Mask](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-positive-negative-points-mask.avif)
@ -269,14 +268,13 @@ graph LR
!!! tip "SAM Tips"
- Start with a positive click on the object center
- Add negative clicks to exclude background
- Hold `Alt`/`Option` to invert click behavior (left-click becomes negative, right-click becomes positive)
- Enable **auto-apply** (`A`) for one-click annotation — the mask saves automatically after each click
- Start with a click on the object center
- Click again outside the mask to expand coverage
- Click inside the mask to subtract unwanted regions
- Enable **auto-apply** (`A`) for one-click annotation
- Hold `Shift` while auto-apply is on to place multiple points before the mask is applied
- Positive and negative points appear as square markers with `+` and `` symbols on the canvas
- Works best for distinct objects with clear edges
- Use 2-3 positive points for elongated objects
- Use a few refinement clicks for elongated or overlapping objects
SAM smart annotation can generate:
@ -292,11 +290,11 @@ SAM smart annotation can generate:
Auto-apply mode speeds up Smart annotation by automatically saving the SAM mask after each click — no need to press `Enter`. Toggle it with the auto-apply button in the toolbar or press `A`.
| Mode | Behavior |
| --------------------------- | ---------------------------------------------------- |
| **Auto-apply ON** (default) | Mask applies automatically after each click |
| **Auto-apply ON + `Shift`** | Place multiple points first, mask applies on release |
| **Auto-apply OFF** | Place points freely, press `Enter` to apply |
| Mode | Behavior |
| ---------------------------- | ---------------------------------------------------- |
| **Auto-apply ON** | Mask applies automatically after each click |
| **Auto-apply ON + `Shift`** | Place multiple points first, mask applies on release |
| **Auto-apply OFF** (default) | Place points freely, press `Enter` to apply |
![Ultralytics Platform Annotate Sam Auto Apply Toggle](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-auto-apply-toggle.avif)
@ -308,13 +306,13 @@ Auto-apply mode speeds up Smart annotation by automatically saving the SAM mask
When Smart mode is active, a model picker appears in the toolbar. Five SAM models are available — choose based on the speed vs. accuracy trade-off that suits your dataset:
| Model | Size | Speed | Notes |
| ----------------- | ------- | -------- | ------------------------ |
| **SAM 2.1 Tiny** | 74.5 MB | Fastest | |
| **SAM 2.1 Small** | 88 MB | Fast | Default |
| **SAM 2.1 Base** | 154 MB | Moderate | |
| **SAM 2.1 Large** | 428 MB | Slower | Most accurate of SAM 2.1 |
| **SAM 3** | 3.45 GB | Slowest | Latest generation |
| Model | Size | Speed | Notes |
| ----------------- | ------- | -------- | -------------------------- |
| **SAM 2.1 Tiny** | 74.5 MB | Fastest | |
| **SAM 2.1 Small** | 88 MB | Fast | |
| **SAM 2.1 Base** | 154 MB | Moderate | |
| **SAM 2.1 Large** | 428 MB | Slower | Most accurate of SAM 2.1 |
| **SAM 3** | 3.45 GB | Slowest | Default, latest generation |
![Ultralytics Platform Annotate Sam Model Selector](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-model-selector.avif)
@ -382,7 +380,7 @@ In edit mode, a crosshair overlay tracks the cursor position and displays pixel
## SAM Hover Preview
In Smart mode for **segment** tasks, SAM provides a real-time mask preview as you hover over the image — before clicking any points. This lets you see the predicted segmentation boundary and decide where to click. Once you add positive or negative points, the preview updates to reflect your refinements.
In Smart mode, SAM provides a real-time hover preview before you click any points. This preview is available for **detect**, **segment**, and **OBB** tasks. Once you add refinement clicks, the preview updates to reflect the current mask and the annotation type for the active task.
## Polygon Vertex Editing
@ -436,18 +434,22 @@ Efficient annotation with keyboard shortcuts:
=== "General"
| Shortcut | Action |
| ---------------------- | -------------------------- |
| `Cmd/Ctrl+S` | Save annotations |
| `Cmd/Ctrl+Z` | Undo |
| `Cmd/Ctrl+Shift+Z` | Redo |
| `Cmd/Ctrl+Y` | Redo (alternative) |
| `Escape` | Save / Deselect / Exit |
| `Delete` / `Backspace` | Delete selected annotation |
| `1-9` | Select class 1-9 |
| `Cmd/Ctrl+Scroll` | Zoom in/out |
| `Shift+Click` | Multi-select annotations |
| `Cmd/Ctrl+A` | Select all annotations |
| Shortcut | Action |
| ----------------------------- | ---------------------------- |
| `Cmd/Ctrl+S` | Save annotations |
| `Cmd/Ctrl+Z` | Undo |
| `Cmd/Ctrl+Shift+Z` | Redo |
| `Cmd/Ctrl+Y` | Redo (alternative) |
| `Escape` | Save / Deselect / Exit |
| `Delete` / `Backspace` | Delete selected annotation |
| `1-9` | Select class 1-9 |
| `Cmd/Ctrl+Scroll` | Zoom in/out |
| `Cmd/Ctrl++` or `Cmd/Ctrl+=` | Zoom in |
| `Cmd/Ctrl+-` | Zoom out |
| `Cmd/Ctrl+0` | Reset to fit |
| `Space+Drag` | Pan canvas when zoomed |
| `Shift+Click` | Multi-select annotations |
| `Cmd/Ctrl+A` | Select all annotations |
=== "Modes"
@ -458,15 +460,15 @@ Efficient annotation with keyboard shortcuts:
=== "Drawing"
| Shortcut | Action |
| -------------- | --------------------------------------------------------- |
| `Click+Drag` | Draw bounding box (detect/OBB) |
| `Click` | Add polygon point (segment) / Place skeleton (pose) |
| `Right-click` | Complete polygon / Add SAM negative point |
| `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) |
| `A` | Toggle auto-apply (Smart mode) |
| `Enter` | Complete polygon / Confirm pose / Save SAM annotation |
| `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit |
| Shortcut | Action |
| ------------------------------- | ----------------------------------------------------------- |
| `Click+Drag` | Draw bounding box (detect/OBB) |
| `Click` | Add polygon point (segment) / Place skeleton (pose) |
| `Right-click` | Complete polygon / Add SAM negative point |
| `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) |
| `A` | Toggle auto-apply (Smart mode) |
| `Enter` | Complete polygon / Confirm pose / Save SAM annotation |
| `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit |
=== "Arrange (Z-Order)"
@ -526,7 +528,7 @@ SAM provides high-quality masks for most objects. Accuracy depends on:
- Image quality and resolution
- Number of positive/negative points provided
For best results, start with a positive point on the object center and add negative points to exclude nearby objects.
For best results, start with a click on the object center, then use outside-mask clicks to add coverage and inside-mask clicks to subtract nearby objects or background.
### Can I import existing annotations?
@ -552,7 +554,7 @@ Yes, but for best results:
### Which SAM model should I use?
Start with **SAM 2.1 Small** (the default) — it's fast and accurate for most objects. Switch to **SAM 2.1 Large** when you need higher mask precision on complex shapes. Use **SAM 2.1 Tiny** for maximum speed on simple, high-contrast objects. **SAM 3** is the latest generation model and may produce better results on challenging images, but is significantly slower.
**SAM 3** is the default and the latest generation model — start there for the highest quality masks. Switch to **SAM 2.1 Small** for a faster interactive workflow on common objects, or **SAM 2.1 Large** when you need higher mask precision on complex shapes. Use **SAM 2.1 Tiny** for maximum speed on simple, high-contrast objects.
### Which tasks support SAM smart annotation?

View file

@ -6,7 +6,7 @@ keywords: Ultralytics Platform, datasets, dataset management, dataset versioning
# Datasets
[Ultralytics Platform](https://platform.ultralytics.com) datasets provide a streamlined solution for managing your training data. Once uploaded, datasets can be immediately used for model training, with automatic processing and statistics generation.
[Ultralytics Platform](https://platform.ultralytics.com) datasets provide a streamlined solution for managing your training data. After upload, the platform processes images, labels, and statistics automatically. A dataset is ready to train once processing has completed and it has at least one image in the `train` split, at least one image in either the `val` or `test` split, and at least one labeled image.
## Upload Dataset
@ -58,7 +58,7 @@ Ultralytics Platform accepts multiple upload formats for flexibility.
### Preparing Your Dataset
The Platform supports two annotation formats plus raw uploads: [Ultralytics YOLO](../../datasets/detect/index.md#ultralytics-yolo-format), [COCO](https://cocodataset.org/#format-data), and raw (unannotated images):
The Platform supports [Ultralytics YOLO](../../datasets/detect/index.md#ultralytics-yolo-format), [COCO](https://cocodataset.org/#format-data), [Ultralytics NDJSON](../../datasets/detect/index.md#ultralytics-ndjson-format), and raw (unannotated) uploads:
=== "YOLO Format"
@ -125,17 +125,43 @@ The Platform supports two annotation formats plus raw uploads: [Ultralytics YOLO
COCO annotations are automatically converted during upload. Detection (`bbox`), segmentation (`segmentation` polygons), and pose (`keypoints`) tasks are supported. Category IDs are remapped to a dense 0-indexed sequence across all annotation files. For converting between formats, see [format conversion tools](../../datasets/detect/index.md#port-or-convert-label-formats).
=== "Classification Layouts"
Classification uploads are auto-detected from common folder layouts:
```
split/class/image.jpg
class/split/image.jpg
class/image.jpg
```
Example:
```
my-classify-dataset/
├── train/
│ ├── cats/
│ └── dogs/
└── val/
├── cats/
└── dogs/
```
=== "NDJSON"
Ultralytics NDJSON exports can be uploaded directly back into Platform. This is useful for moving datasets between workspaces while preserving metadata, classes, splits, and annotations.
!!! tip "Raw Uploads"
**Raw**: Upload unannotated images (no labels). Useful when you plan to annotate directly on the platform using the [annotation editor](annotation.md).
!!! tip "Flat Directory Structure"
You can also upload images without the train/val folder structure. Images uploaded without split folders are assigned to the `train` split by default. You can reassign them later using the bulk move-to-split feature.
You can also upload images without explicit split folders. Platform respects the active split target during upload, and for non-classify datasets it may automatically create a validation split from part of the training set when no split information is provided. You can always reassign images later with bulk move-to-split or split redistribution.
!!! tip "Format Auto-Detection"
The format is detected automatically: datasets with a `data.yaml` containing `names`, `train`, or `val` keys are treated as YOLO. Datasets with COCO JSON files (containing `images`, `annotations`, and `categories` arrays) are treated as COCO. Datasets with only images and no annotations are treated as raw.
The format is detected automatically: datasets with a `data.yaml` containing `names`, `train`, or `val` keys are treated as YOLO. Datasets with COCO JSON files (containing `images`, `annotations`, and `categories` arrays) are treated as COCO. `.ndjson` exports are imported as Ultralytics NDJSON. Datasets with only images and no annotations are treated as raw.
For task-specific format details, see [supported tasks](index.md#supported-tasks) and the [Datasets Overview](../../datasets/index.md).
@ -242,7 +268,9 @@ Click any image to open the fullscreen viewer with:
- **Edit**: Enter annotation mode to add or modify labels
- **Download**: Download the original image file
- **Delete**: Delete the image from the dataset
- **Zoom**: `Cmd/Ctrl+Scroll` to zoom in/out
- **Zoom**: `Cmd/Ctrl+Scroll`, `Cmd/Ctrl++`, or `Cmd/Ctrl+=` to zoom in, and `Cmd/Ctrl+-` to zoom out
- **Reset view**: `Cmd/Ctrl + 0` or the reset button to fit the image to the viewer
- **Pan**: Hold `Space` and drag to pan the canvas when zoomed
- **Pixel view**: Toggle pixelated rendering for close inspection
![Ultralytics Platform Datasets Fullscreen Viewer With Metadata Panel](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-datasets-fullscreen-viewer-with-metadata-panel.avif)
@ -259,7 +287,7 @@ Filter images by their dataset split:
## Dataset Tabs
Each dataset page has six tabs accessible from the tab bar:
Each dataset page can show up to six tabs, depending on the dataset state and your permissions:
### Images Tab
@ -267,6 +295,8 @@ The default view showing the image gallery with annotation overlays. Supports gr
### Classes Tab
This tab appears when the dataset has images.
Manage annotation classes for your dataset:
- **Class histogram**: Bar chart showing annotation count per class with linear/log scale toggle
@ -283,6 +313,8 @@ Manage annotation classes for your dataset:
### Charts Tab
This tab appears when the dataset has images.
Automatic statistics computed from your dataset:
| Chart | Description |
@ -324,6 +356,8 @@ View all models trained on this dataset in a searchable table:
### Errors Tab
This tab appears only when one or more files fail processing.
Images that failed processing are listed here with:
- **Error banner**: Total count of failed images and guidance
@ -360,10 +394,15 @@ To create a version:
1. Open the **Versions** tab
2. Optionally enter a description (e.g., "Added 500 training images" or "Fixed mislabeled classes")
3. Click **+ New Version**
4. The NDJSON snapshot is generated and downloads automatically
4. The new version appears in the table
5. Download the version separately from the table when needed
Each version is numbered sequentially (v1, v2, v3...) and stored permanently. You can download any previous version at any time from the versions table.
!!! note "Ready Datasets Only"
Version creation is available after the dataset reaches `ready` status.
!!! tip "When to Create Versions"
Create a version before and after major changes to your dataset — adding images, fixing annotations, or rebalancing splits. This lets you compare model performance across different dataset states.
@ -374,20 +413,13 @@ Each version is numbered sequentially (v1, v2, v3...) and stored permanently. Yo
## Export Dataset
Export your dataset for offline use. The Platform supports multiple export formats:
| Format | Description |
| -------------- | -------------------------------------------------- |
| **YOLO** | Standard YOLO format with images and `.txt` labels |
| **COCO** | COCO JSON format with annotation arrays |
| **Pascal VOC** | XML annotation files per image |
| **NDJSON** | One JSON object per line (lightweight metadata) |
Export your dataset for offline use with an NDJSON download from the dataset header or the Versions tab.
To export:
1. Click the **Export** button in the dataset header
2. Select the desired format
3. The export job runs asynchronously — you'll be notified when the download is ready
2. Download the current NDJSON snapshot directly
3. Use the **Versions** tab when you want an immutable numbered snapshot you can re-download later
![Ultralytics Platform Datasets Export Ndjson Download](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-datasets-export-ndjson-download.avif)
@ -633,7 +665,7 @@ Use the bulk move-to-split feature:
### What label formats are supported?
Ultralytics Platform supports two annotation formats for upload:
Ultralytics Platform supports YOLO labels, COCO JSON, Ultralytics NDJSON, and raw image uploads:
=== "YOLO Format"
@ -653,6 +685,10 @@ Ultralytics Platform supports two annotation formats for upload:
JSON files with `images`, `annotations`, and `categories` arrays. Supports detection (`bbox`), segmentation (polygon), and pose (`keypoints`) tasks. COCO uses absolute pixel coordinates which are automatically converted to normalized format during upload.
=== "NDJSON"
Ultralytics NDJSON exports can be re-imported into Platform. This is the most complete way to move dataset metadata, splits, and annotations between workspaces.
### Can I annotate the same dataset for multiple task types?
Yes. Each image stores annotations for all 5 task types (detect, segment, pose, OBB, classify) together. You can switch the dataset's active task type at any time without losing existing annotations. Only annotations matching the active task type are shown in the editor and included in exports and training — annotations for other tasks are preserved and reappear when you switch back.

View file

@ -65,7 +65,7 @@ Ultralytics Platform supports all 5 YOLO task types:
!!! info "Task Type Selection"
The task type is set when creating a dataset and determines which annotation tools are available. You can change it later from the dataset settings, but incompatible annotations won't be displayed after switching.
The task type is set when creating a dataset and determines which annotation tools are available. You can change it later from the dataset header task selector, but incompatible annotations won't be displayed after switching.
## Key Features
@ -102,7 +102,7 @@ Create immutable NDJSON snapshots of your dataset for reproducible training. Eac
### Dataset Tabs
Every dataset page provides six tabs:
Dataset pages can show up to six tabs, depending on the dataset state and your permissions:
| Tab | Description |
| ------------ | ---------------------------------------------------------------------------- |
@ -113,6 +113,8 @@ Every dataset page provides six tabs:
| **Versions** | Create and download immutable NDJSON snapshots for reproducible training |
| **Errors** | Images that failed processing with error details and fix guidance |
`Classes` and `Charts` appear when the dataset has images. `Errors` appears only when processing failures exist. `Versions` appears for owners, or for non-owners when versions already exist.
### Statistics and Visualization
The `Charts` tab provides automatic analysis including:

View file

@ -1,12 +1,12 @@
---
comments: true
description: Deploy YOLO models to dedicated endpoints in 43 global regions with auto-scaling and monitoring on Ultralytics Platform.
description: Deploy YOLO models to dedicated endpoints in 43 global regions with scale-to-zero behavior and monitoring on Ultralytics Platform.
keywords: Ultralytics Platform, deployment, endpoints, YOLO, production, scaling, global regions
---
# Dedicated Endpoints
[Ultralytics Platform](https://platform.ultralytics.com) enables deployment of YOLO models to dedicated endpoints in 43 global regions. Each endpoint is a single-tenant service with auto-scaling, a unique endpoint URL, and independent monitoring.
[Ultralytics Platform](https://platform.ultralytics.com) enables deployment of YOLO models to dedicated endpoints in 43 global regions. Each endpoint is a single-tenant service with scale-to-zero behavior, a unique endpoint URL, and independent monitoring.
![Ultralytics Platform Model Deploy Tab With Region Map And Table](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/model-deploy-tab-with-region-map-and-table.avif)
@ -30,7 +30,7 @@ Create a deployment from the global `Deploy` page in the sidebar:
1. Click **New Deployment**
2. Select a model from the model selector
3. Select a region from the map or table
4. Optionally customize the deployment name and resources
4. Review the auto-generated deployment name (editable) and the default resources
5. Click **Deploy Model**
![Ultralytics Platform New Deployment Dialog With Model Selector And Region Map](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/new-deployment-dialog-with-model-selector-and-region-map.avif)
@ -157,12 +157,12 @@ The `New Deployment` dialog provides:
| **Model** | Select from completed models | - |
| **Region** | Deployment region | - |
| **Deployment Name** | Auto-generated, editable | - |
| **CPU Cores** | CPU allocation (1-8) | 1 |
| **Memory (GB)** | Memory allocation (1-32 GB) | 2 |
| **CPU Cores** | Fixed default | 1 |
| **Memory (GB)** | Fixed default | 2 |
![Ultralytics Platform New Deployment Dialog Resources Panel Expanded](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/new-deployment-dialog-resources-panel-expanded.avif)
Resource settings are available under the collapsible **Resources** section. Deployments use scale-to-zero by default (min instances = 0, max instances = 1) — you only pay for active inference time.
Deployments use fixed defaults of `1 CPU`, `2 GiB` memory, `minInstances = 0`, and `maxInstances = 1`. They scale to zero when idle, so you only pay for active inference time.
!!! note "Auto-Generated Names"

View file

@ -26,7 +26,7 @@ The Deployment section helps you:
- **Test** models directly in the browser with the `Predict` tab
- **Deploy** to dedicated endpoints in 43 global regions
- **Monitor** request metrics, logs, and health checks
- **Scale** automatically with traffic (including scale-to-zero)
- **Scale to zero** when idle (deployments currently run a single active instance)
![Ultralytics Platform Deploy Page World Map With Overview Cards](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/deploy-page-world-map-with-overview-cards.avif)
@ -54,12 +54,12 @@ graph LR
style D fill:#9C27B0,color:#fff
```
| Stage | Description |
| ------------- | ------------------------------------------------------------------------ |
| **Test** | Validate model with the [`Predict` tab](inference.md) |
| **Configure** | Select region, resources, and deployment name |
| **Deploy** | Create a dedicated endpoint from the [`Deploy` tab](endpoints.md) |
| **Monitor** | Track requests, latency, errors, and logs in [Monitoring](monitoring.md) |
| Stage | Description |
| ------------- | --------------------------------------------------------------------------- |
| **Test** | Validate model with the [`Predict` tab](inference.md) |
| **Configure** | Select region and deployment name (deployments use fixed default resources) |
| **Deploy** | Create a dedicated endpoint from the [`Deploy` tab](endpoints.md) |
| **Monitor** | Track requests, latency, errors, and logs in [Monitoring](monitoring.md) |
## Architecture
@ -100,8 +100,8 @@ Deploy to 43 regions worldwide on Ultralytics Cloud:
Each endpoint is a single-tenant service with:
- Dedicated compute resources (configurable CPU and memory)
- Auto-scaling (scale-to-zero when idle)
- Default resources of `1 CPU`, `2 GiB` memory, `minInstances=0`, `maxInstances=1`
- Scale-to-zero when idle
- Unique endpoint URL
- Independent monitoring, logs, and health checks
@ -118,7 +118,7 @@ Access the global deployments page from the sidebar under `Deploy`. This page sh
!!! info "Automatic Polling"
The page polls every 30 seconds for metric updates. When deployments are in a transitional state (creating, deploying, stopping), polling increases to every 2-3 seconds for near-instant feedback.
The page polls every 15 seconds normally. When deployments are in a transitional state (`creating`, `deploying`, or `stopping`), polling increases to every 3 seconds for faster feedback.
## Key Features
@ -130,12 +130,12 @@ Deploy close to your users with 43 regions covering:
- Europe, Middle East, Africa
- Asia Pacific, Oceania
### Auto-Scaling
### Scaling Behavior
Endpoints scale automatically:
Endpoints currently behave as follows:
- **Scale to zero**: No cost when idle (default)
- **Scale up**: Handle traffic spikes automatically
- **Single active instance**: `maxInstances` is currently capped at `1` on all plans
!!! tip "Cost Savings"
@ -189,7 +189,7 @@ Deploy a model in under 2 minutes:
| ----------- | --------------- | ------------------------------------ |
| **Latency** | Variable | Consistent |
| **Cost** | Free (included) | Free (basic), usage-based (advanced) |
| **Scale** | Limited | Configurable |
| **Scale** | Limited | Scale-to-zero, single instance |
| **Regions** | 3 | 43 |
| **URL** | Generic | Custom |
| **Rate** | 20 req/min | Unlimited |
@ -204,7 +204,7 @@ Dedicated endpoint deployment typically takes 1-2 minutes:
### Can I deploy multiple models?
Yes, each model can have multiple endpoints in different regions. There's no limit on total endpoints (subject to your plan).
Yes, each model can have multiple endpoints in different regions. Deployment counts are limited by plan: Free `3`, Pro `10`, Enterprise `unlimited`.
### What happens when an endpoint is idle?

View file

@ -147,11 +147,11 @@ Authorization: Bearer YOUR_API_KEY
!!! warning "API Key Required"
To run inference from your own scripts, notebooks, or apps, include an API key. Generate one in [`Settings`](../account/api-keys.md) (API Keys section on the Profile tab).
To run inference from your own scripts, notebooks, or apps, include an API key. Generate one in [`Settings > API Keys`](../account/api-keys.md).
### Endpoint
```
```http
POST https://platform.ultralytics.com/api/models/{modelId}/predict
```

View file

@ -79,7 +79,7 @@ Below the overview cards, the deployments list shows all endpoints across your p
!!! tip "Real-Time Updates"
The dashboard polls every 30 seconds for deployment status updates. When deployments are in a transitional state (creating, deploying), polling increases to every 3 seconds. Metric charts refresh every 60 seconds. Click the refresh button for immediate updates.
The dashboard polls every 15 seconds for deployment status updates. When deployments are in a transitional state (`creating`, `deploying`, or `stopping`), polling increases to every 3 seconds. Metric charts refresh every 60 seconds. Click the refresh button for immediate updates.
## Per-Deployment Metrics
@ -230,7 +230,7 @@ The `Predict` tab on each deployment card provides an inline predict panel — t
### Monitoring Overview
```
```http
GET /api/monitoring
```
@ -238,7 +238,7 @@ Returns aggregated metrics for all deployments owned by the authenticated user.
### Deployment Metrics
```
```http
GET /api/deployments/{deploymentId}/metrics?sparkline=true&range=24h
```
@ -251,7 +251,7 @@ Returns sparkline data and summary metrics for a specific deployment. Refresh in
### Deployment Logs
```
```http
GET /api/deployments/{deploymentId}/logs?limit=50&severity=ERROR,WARNING
```
@ -265,7 +265,7 @@ Returns recent log entries with optional severity filter and pagination.
### Deployment Health
```
```http
GET /api/deployments/{deploymentId}/health
```

View file

@ -343,12 +343,12 @@ Contact creators for commercial licensing.
To report inappropriate content:
1. Navigate to the public page containing the content (project or dataset), if accessible
2. Click the **Feedback** button in the sidebar
2. Open the **Help** page from the sidebar
3. Select **General** as the feedback type
4. Describe the content and the issue, including a link to the page
5. Submit the report
If the content is no longer accessible, use the **Feedback** button from any page and include as much detail as possible (URL, username, or description).
If the content is no longer accessible, use the **Help** page from any page and include as much detail as possible (URL, username, or description).
Our team reviews reports within 24-48 hours.

View file

@ -68,7 +68,7 @@ graph LR
| **Annotate** | Manual tools for all 5 task types, plus [Smart Annotation](data/annotation.md#smart-annotation) with SAM and YOLO models for detect, segment, and OBB (see [supported tasks](data/index.md#supported-tasks)) |
| **Train** | Cloud GPUs (20 free + 3 Pro-exclusive), real-time metrics, project organization |
| **Export** | [17 deployment formats](../modes/export.md) (ONNX, TensorRT, CoreML, TFLite, etc.; see [supported formats](train/models.md#supported-formats)) |
| **Deploy** | 43 global regions with dedicated endpoints, auto-scaling, monitoring |
| **Deploy** | 43 global regions with dedicated endpoints, scale-to-zero behavior, and monitoring |
**What you can do:**
@ -146,7 +146,7 @@ You can train models either through the web UI (cloud training) or from your own
```bash
# Install ultralytics
pip install "ultralytics>=8.4.14"
pip install "ultralytics>=8.4.35"
# Set your API key
export ULTRALYTICS_API_KEY="YOUR_API_KEY"
@ -177,7 +177,7 @@ You can train models either through the web UI (cloud training) or from your own
### Deployment
- **Inference Testing**: Test models directly in the browser with custom images
- **Dedicated Endpoints**: Deploy to 43 global regions with auto-scaling
- **Dedicated Endpoints**: Deploy to 43 global regions with scale-to-zero behavior
- **Monitoring**: Real-time metrics, request logs, and performance dashboards
```mermaid
@ -315,10 +315,10 @@ You can train models on your own hardware and stream real-time metrics to the pl
!!! warning "Package Version Requirement"
Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform.
Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform.
```bash
pip install "ultralytics>=8.4.14"
pip install "ultralytics>=8.4.35"
```
=== "CLI"
@ -473,4 +473,4 @@ See [Models Export](train/models.md#export-model), the [Export mode guide](../mo
??? question "Can I use Platform models commercially?"
Free and Pro plans use AGPL license. For commercial use without AGPL requirements, contact sales@ultralytics.com for Enterprise licensing.
Free and Pro plans use the AGPL license. For commercial use without AGPL requirements, see [Ultralytics Licensing](https://www.ultralytics.com/license).

View file

@ -19,21 +19,20 @@ keywords: Ultralytics Platform, Quickstart, YOLO models, dataset upload, model t
<strong>Watch:</strong> Get Started with Ultralytics Platform - QuickStart
</p>
The following interactive diagram outlines the four primary stages of the Ultralytics Platform workflow. Click any stage or sub-step to access detailed instructions for that section.
```mermaid
journey
title Your First Model in 5 Minutes
section Sign Up
Create account: 5: User
Select region: 5: User
section Prepare Data
Upload dataset: 5: User
Review images: 4: User
section Train
Configure training: 5: User
Monitor progress: 3: Platform
section Deploy
Test model: 5: User
Deploy endpoint: 5: User
graph LR
A(Sign Up) --> B(Prepare Data) --> C(Train) --> D(Deploy)
A -.- A1["<a href='#get-started'>Create account</a><br/><a href='#region-selection'>Select region</a>"]
B -.- B1["<a href='#upload-your-first-dataset'>Upload dataset</a><br/><a href='#create-your-first-project'>Create Project</a>"]
C -.- C1["<a href='#training-configuration'>Configure training</a><br/><a href='#monitor-training'>Monitor progress</a>"]
D -.- D1["<a href='#test-your-model'>Test model</a><br/><a href='#deploy-to-production'>Deploy endpoint</a>"]
click A "#get-started"
click B "#upload-your-first-dataset"
click C "#train-your-first-model"
click D "#deploy-to-production"
```
## Get Started
@ -105,7 +104,7 @@ The sidebar provides access to all Platform sections:
| | Deploy | Your active deployments |
| **Bottom** | Trash | Deleted items (recoverable for 30 days) |
| | Settings | Account, billing, and preferences |
| | Feedback | Send feedback to Ultralytics |
| | Help | Open help, docs, and feedback tools |
### Welcome Card
@ -352,7 +351,7 @@ graph LR
Your endpoint will be ready in about a minute with:
- **Unique URL**: HTTPS endpoint for API calls
- **Auto-Scaling**: Scales with traffic automatically
- **Scale-to-zero behavior**: No idle compute cost (deployments currently run a single active instance)
- **Monitoring**: Request metrics and logs
!!! info "Deployment Lifecycle"
@ -367,7 +366,7 @@ Read more about [endpoints](deploy/endpoints.md).
If you prefer to train on your own hardware, you can stream metrics to the platform using your API key. This works like Weights & Biases — train anywhere, monitor on the platform.
1. Generate an API key in [`Settings > Profile`](account/api-keys.md) (API Keys section)
1. Generate an API key in [`Settings > API Keys`](account/api-keys.md)
2. Set the environment variable and train with a `project/name` format:
```bash
@ -384,7 +383,7 @@ Read more about [API keys](account/api-keys.md), [dataset URIs](data/datasets.md
## Feedback & Help
Use the **Help** page in the sidebar footer to send feedback directly to Ultralytics. You can rate your experience, choose a feedback type (bug report, feature request, or general), and attach screenshots.
The **Help** page in the sidebar footer includes an in-app feedback form. You can rate your experience, choose a feedback type (bug, feature request, or general), and attach screenshots.
If you need more help:

View file

@ -62,12 +62,12 @@ Choose a dataset to train on (see [Datasets](../data/datasets.md)):
Set core training parameters:
| Parameter | Description | Default |
| -------------- | --------------------------------------------------------------------------- | ------- |
| **Epochs** | Number of training iterations | 100 |
| **Batch Size** | Samples per iteration | 16 |
| **Image Size** | Input resolution (320/416/512/640/1280 dropdown, or 32-4096 in YAML editor) | 640 |
| **Run Name** | Optional name for the training run | auto |
| Parameter | Description | Default |
| -------------- | --------------------------------------------------------------------------- | --------- |
| **Epochs** | Number of training iterations | 100 |
| **Batch Size** | Samples per iteration | -1 (auto) |
| **Image Size** | Input resolution (320/416/512/640/1280 dropdown, or 32-4096 in YAML editor) | 640 |
| **Run Name** | Optional name for the training run | auto |
### Step 4: Advanced Settings (Optional)
@ -169,11 +169,7 @@ Real-time GPU utilization, memory, temperature, CPU, and disk usage.
### Checkpoints
Checkpoints are saved automatically:
- **Every epoch**: Latest weights saved
- **Best model**: Highest mAP checkpoint preserved
- **Final model**: Weights at training completion
After training completes, the **best model** (`best.pt`, the highest-mAP checkpoint) is uploaded to the platform and made available for download, export, and deployment.
## Cancel Training
@ -181,7 +177,7 @@ Click **Cancel Training** on the model page to stop a running job:
- The compute instance is terminated
- Credits stop being charged
- Checkpoints saved up to that point are preserved
- The best checkpoint remains available if it was reached before cancellation
## Remote Training
@ -201,7 +197,7 @@ Train on your own hardware while streaming metrics to the platform.
!!! warning "Package Version Requirement"
Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform.
Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform.
```bash
pip install -U ultralytics
@ -209,7 +205,7 @@ Train on your own hardware while streaming metrics to the platform.
### Setup API Key
1. Go to [`Settings > Profile`](../account/api-keys.md) (API Keys section)
1. Go to [`Settings > API Keys`](../account/api-keys.md)
2. Create a new key (or the platform auto-creates one when you open the Local Training tab)
3. Set the environment variable:

View file

@ -105,7 +105,7 @@ graph LR
!!! info "Automatic Checkpoints"
The Platform automatically saves checkpoints at every epoch. The **best model** (highest mAP) and **final model** are always preserved.
For cloud training, the **best model** (`best.pt`, the highest-mAP checkpoint) is saved automatically and made available for download, export, and deployment after training completes.
## Quick Start

View file

@ -139,7 +139,7 @@ GPU and system metrics during training:
Run interactive inference directly in the browser:
- Upload an image, paste a URL, or use webcam
- Upload an image, use example images, or use webcam
- Results display with bounding boxes, masks, or keypoints
- Auto-inference when an image is provided
- Supports all task types ([detect](../../tasks/detect.md), [segment](../../tasks/segment.md), [pose](../../tasks/pose.md), [OBB](../../tasks/obb.md), [classify](../../tasks/classify.md))
@ -269,11 +269,17 @@ Export jobs progress through the following statuses:
Some export formats have architecture or task restrictions:
| Format | Restriction |
| ---------------- | --------------------------------------------------------------- |
| **IMX500** | Only available for YOLOv8 and YOLO11 models |
| **Axelera** | Not available for YOLO26 segmentation models |
| **PaddlePaddle** | Not available for YOLO26 detection/segmentation/pose/OBB models |
| Format | Restriction |
| ---------------- | ------------------------------------------------------- |
| **IMX500** | Available only for `YOLOv8n` and `YOLO11n` |
| **Axelera** | Detect models only |
| **PaddlePaddle** | Not available for YOLO26 detect/segment/pose/OBB models |
!!! note "Additional Export Rules"
- Classification exports do not include NMS.
- CoreML exports with batch sizes greater than `1` use `dynamic=true`.
- Unsupported format/model combinations are disabled in the export dialog before you launch.
## Clone Model

View file

@ -40,7 +40,7 @@ Enter your project details:
- **Name**: A descriptive name for your project (a random name is auto-generated)
- **Description**: Optional notes about the project purpose
- **Visibility**: Public (anyone can view) or Private (only you can access)
- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements — contact [sales@ultralytics.com](mailto:sales@ultralytics.com) for details.
- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements and is available with an Enterprise plan — see [Ultralytics Licensing](https://www.ultralytics.com/license).
![Ultralytics Platform New Project Dialog Name Visibility License](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-new-project-dialog-name-visibility-license.avif)

View file

@ -141,7 +141,7 @@ For more details, refer to the [`SolutionResults` class documentation](https://d
Most of the Solutions can be used directly through the command-line interface, including:
`Count`, `Crop`, `Blur`, `Workout`, `Heatmap`, `Isegment`, `Visioneye`, `Speed`, `Queue`, `Analytics`, `Inference`
`Count`, `Crop`, `Blur`, `Workout`, `Heatmap`, `Isegment`, `Visioneye`, `Speed`, `Queue`, `Analytics`, `Inference`, `Trackzone`
**Syntax**

View file

@ -66,7 +66,7 @@ Train YOLO26n-obb on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.
# Load a model
model = YOLO("yolo26n-obb.yaml") # build a new model from YAML
model = YOLO("yolo26n-obb.pt") # load a pretrained model (recommended for training)
model = YOLO("yolo26n-obb.yaml").load("yolo26n.pt") # build from YAML and transfer weights
model = YOLO("yolo26n-obb.yaml").load("yolo26n-obb.pt") # build from YAML and transfer weights
# Train the model
results = model.train(data="dota8.yaml", epochs=100, imgsz=640)

View file

@ -54,7 +54,7 @@ Train YOLO26n-seg on the COCO8-seg dataset for 100 [epochs](https://www.ultralyt
# Load a model
model = YOLO("yolo26n-seg.yaml") # build a new model from YAML
model = YOLO("yolo26n-seg.pt") # load a pretrained model (recommended for training)
model = YOLO("yolo26n-seg.yaml").load("yolo26n.pt") # build from YAML and transfer weights
model = YOLO("yolo26n-seg.yaml").load("yolo26n-seg.pt") # build from YAML and transfer weights
# Train the model
results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640)

View file

@ -87,22 +87,22 @@ Below are all the supported callbacks. For more details, refer to the callbacks
### Trainer Callbacks
| Callback | Description |
| --------------------------- | -------------------------------------------------------------------------------------------- |
| `on_pretrain_routine_start` | Triggered at the beginning of the pre-training routine. |
| `on_pretrain_routine_end` | Triggered at the end of the pre-training routine. |
| `on_train_start` | Triggered when the training starts. |
| `on_train_epoch_start` | Triggered at the start of each training [epoch](https://www.ultralytics.com/glossary/epoch). |
| `on_train_batch_start` | Triggered at the start of each training batch. |
| `optimizer_step` | Triggered during the optimizer step. |
| `on_before_zero_grad` | Triggered before gradients are zeroed. |
| `on_train_batch_end` | Triggered at the end of each training batch. |
| `on_train_epoch_end` | Triggered at the end of each training epoch. |
| `on_fit_epoch_end` | Triggered at the end of each fit epoch. |
| `on_model_save` | Triggered when the model is saved. |
| `on_train_end` | Triggered when the training process ends. |
| `on_params_update` | Triggered when model parameters are updated. |
| `teardown` | Triggered when the training process is being cleaned up. |
| Callback | Description |
| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `on_pretrain_routine_start` | Triggered at the beginning of the pre-training routine, before data loading and model setup. |
| `on_pretrain_routine_end` | Triggered at the end of the pre-training routine, after data loading and model setup are complete. |
| `on_train_start` | Triggered when the training starts, before the first [epoch](https://www.ultralytics.com/glossary/epoch) begins. |
| `on_train_epoch_start` | Triggered at the start of each training [epoch](https://www.ultralytics.com/glossary/epoch), before batch iteration begins. |
| `on_train_batch_start` | Triggered at the start of each training batch, before the forward pass. |
| `optimizer_step` | Triggered during the optimizer step. Reserved for custom integrations; not called by the default training loop. |
| `on_before_zero_grad` | Triggered before gradients are zeroed. Reserved for custom integrations; not called by the default training loop. |
| `on_train_batch_end` | Triggered at the end of each training batch, after the backward pass. The optimizer step may be deferred due to gradient accumulation. |
| `on_train_epoch_end` | Triggered at the end of each training epoch, after all batches are processed but **before** validation. Validation metrics and fitness may not be available yet. |
| `on_model_save` | Triggered when the model checkpoint is saved, after validation. |
| `on_fit_epoch_end` | Triggered at the end of each fit epoch (train + val), **after** validation and any checkpoint save. Validation metrics are available, and fitness is available for the per-epoch training call. This callback is also called during final best-model evaluation, where no checkpoint save occurs and fitness may not be present. |
| `on_train_end` | Triggered when the training process ends, after final evaluation of the best model. |
| `on_params_update` | Triggered when model parameters are updated. Reserved for custom integrations; not called by the default training loop. |
| `teardown` | Triggered when the training process is being cleaned up. |
### Validator Callbacks

View file

@ -237,7 +237,7 @@ You can then pass this file as `cfg=default_copy.yaml` along with any additional
## Solutions Commands
Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. These solutions simplify the implementation of complex tasks like object counting, workout monitoring, and queue management.
Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. The `yolo solutions` command exposes object counting, cropping, blurring, workout monitoring, heatmaps, instance segmentation, VisionEye, speed estimation, queue management, analytics, Streamlit inference, and zone-based tracking — see the [Solutions](../solutions/index.md) page for the full catalog. Run `yolo solutions help` to list every supported solution and its arguments.
!!! example
@ -250,6 +250,26 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati
yolo solutions count source="path/to/video.mp4" # specify video file path
```
=== "Crop"
Crop detected objects and save them to disk:
```bash
yolo solutions crop show=True
yolo solutions crop source="path/to/video.mp4" # specify video file path
yolo solutions crop classes="[0, 2]" # crop only selected classes
```
=== "Blur"
Blur detected objects in a video for privacy or to highlight other regions:
```bash
yolo solutions blur show=True
yolo solutions blur source="path/to/video.mp4" # specify video file path
yolo solutions blur classes="[0, 5]" # blur only selected classes
```
=== "Workout"
Monitor workout exercises using a pose model:
@ -259,8 +279,49 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati
yolo solutions workout source="path/to/video.mp4" # specify video file path
# Use keypoints for ab-workouts
yolo solutions workout kpts=[5, 11, 13] # left side
yolo solutions workout kpts=[6, 12, 14] # right side
yolo solutions workout kpts="[5, 11, 13]" # left side
yolo solutions workout kpts="[6, 12, 14]" # right side
```
=== "Heatmap"
Generate a heatmap showing object density and movement patterns:
```bash
yolo solutions heatmap show=True
yolo solutions heatmap source="path/to/video.mp4" # specify video file path
yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO # customize colormap
yolo solutions heatmap region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # restrict heatmap to a region
```
=== "Isegment"
Run instance segmentation with tracking on a video:
```bash
yolo solutions isegment show=True
yolo solutions isegment source="path/to/video.mp4" # specify video file path
yolo solutions isegment classes="[0, 5]" # segment only selected classes
```
=== "VisionEye"
Draw object-to-observer sightlines with VisionEye:
```bash
yolo solutions visioneye show=True
yolo solutions visioneye source="path/to/video.mp4" # specify video file path
yolo solutions visioneye classes="[0, 5]" # monitor only selected classes
```
=== "Speed"
Estimate the speed of moving objects in a video:
```bash
yolo solutions speed show=True
yolo solutions speed source="path/to/video.mp4" # specify video file path
yolo solutions speed meter_per_pixel=0.05 # set scale for real-world units
```
=== "Queue"
@ -273,6 +334,18 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati
yolo solutions queue region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # configure queue coordinates
```
=== "Analytics"
Generate analytical charts (line, bar, area, or pie) from tracked detections:
```bash
yolo solutions analytics show=True
yolo solutions analytics source="path/to/video.mp4" # specify video file path
yolo solutions analytics analytics_type="pie" show=True
yolo solutions analytics analytics_type="bar" show=True
yolo solutions analytics analytics_type="area" show=True
```
=== "Inference"
Perform object detection, instance segmentation, or pose estimation in a web browser using Streamlit:
@ -282,6 +355,16 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati
yolo solutions inference model="path/to/model.pt" # use custom model
```
=== "TrackZone"
Track objects only inside a specified polygonal zone:
```bash
yolo solutions trackzone show=True
yolo solutions trackzone source="path/to/video.mp4" # specify video file path
yolo solutions trackzone region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]" # configure zone coordinates
```
=== "Help"
View available solutions and their options:

View file

@ -8,7 +8,7 @@ keywords: YOLOv5, Google Cloud Platform, GCP, Deep Learning VM, object detection
Embarking on the journey of [artificial intelligence (AI)](https://www.ultralytics.com/glossary/artificial-intelligence-ai) and [machine learning (ML)](https://www.ultralytics.com/glossary/machine-learning-ml) can be exhilarating, especially when you leverage the power and flexibility of a [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) platform. Google Cloud Platform (GCP) offers robust tools tailored for ML enthusiasts and professionals alike. One such tool is the Deep Learning VM, preconfigured for data science and ML tasks. In this tutorial, we will navigate the process of setting up [Ultralytics YOLOv5](../../models/yolov5.md) on a [GCP Deep Learning VM](https://docs.cloud.google.com/deep-learning-vm/docs). Whether you're taking your first steps in ML or you're a seasoned practitioner, this guide provides a clear pathway to implementing [object detection](https://www.ultralytics.com/glossary/object-detection) models powered by YOLOv5.
🆓 Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://cloud.google.com/free/docs/free-cloud-features#free-trial) to kickstart your projects.
🆓 Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://docs.cloud.google.com/free/docs/free-cloud-features) to kickstart your projects.
In addition to GCP, explore other accessible quickstart options for YOLOv5, like our [Google Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"> for a browser-based experience, or the scalability of [Amazon AWS](./aws_quickstart_tutorial.md). Furthermore, container aficionados can utilize our official Docker image available on [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) <img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"> for an encapsulated environment, following our [Docker Quickstart Guide](../../guides/docker-quickstart.md).

View file

@ -23,7 +23,7 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning,
# Comprehensive Guide to Ultralytics YOLOv5
Welcome to the Ultralytics [YOLOv5](https://github.com/ultralytics/yolov5)🚀 Documentation! Ultralytics YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time. While YOLOv5 remains a powerful tool, consider exploring its successor, [Ultralytics YOLOv8](../models/yolov8.md), for the latest advancements.
Welcome to the Ultralytics [YOLOv5](https://github.com/ultralytics/yolov5)🚀 Documentation! Ultralytics YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time. While YOLOv5 remains a powerful tool, consider exploring its successors, [Ultralytics YOLOv8](../models/yolov8.md), [YOLO11](../models/yolo11.md), and [YOLO26](../models/yolo26.md), for the latest advancements.
Built on [PyTorch](https://pytorch.org/), this powerful [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. Let's get started!

View file

@ -41,7 +41,7 @@ Or create a `.comet.config` file in your working directory and set your credenti
**Comet Configuration File**
```
```ini
[comet]
api_key=YOUR_API_KEY
project_name=YOUR_COMET_PROJECT_NAME # This will default to 'yolov5'

View file

@ -32,7 +32,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half
Output:
```text
```
val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
@ -76,7 +76,7 @@ You can list as many checkpoints as you would like, including custom weights suc
Output:
```text
```
val: data=./data/coco.yaml, weights=['yolov5x.pt', 'yolov5l6.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
@ -117,7 +117,7 @@ python detect.py --weights yolov5x.pt yolov5l6.pt --img 640 --source data/images
Output:
```text
```
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
Fusing layers...

View file

@ -112,7 +112,7 @@ python export.py --weights yolov5s.pt --include torchscript onnx
Output:
```text
```
export: data=data/coco128.yaml, weights=['yolov5s.pt'], imgsz=[640, 640], batch_size=1, device=cpu, half=False, inplace=False, train=False, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['torchscript', 'onnx']
YOLOv5 🚀 v6.2-104-ge3e5122 Python-3.8.0 torch-1.12.1+cu113 CPU

View file

@ -39,7 +39,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half
Output:
```text
```
val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False
YOLOv5 🚀 v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)
@ -75,7 +75,7 @@ We can apply pruning to the model using the `torch_utils.prune()` command define
30% pruned output:
```text
```
val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False
YOLOv5 🚀 v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

View file

@ -28,7 +28,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half
Output:
```text
```
val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
@ -67,7 +67,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 832 --augment --half
Output:
```text
```
val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=832, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=True, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
@ -107,7 +107,7 @@ python detect.py --weights yolov5s.pt --img 832 --source data/images --augment
Output:
```text
```
YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)
Downloading https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5s.pt to yolov5s.pt...

View file

@ -39,7 +39,7 @@ Developing a custom [object detection](https://docs.ultralytics.com/tasks/detect
Ultralytics provides two licensing options to accommodate diverse usage scenarios:
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details.
- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details.
- **Enterprise License**: Designed for commercial applications, this license permits the seamless integration of Ultralytics software and AI models into commercial products and services without the open-source stipulations of AGPL-3.0. If your project requires commercial deployment, request an [Enterprise License](https://www.ultralytics.com/license).
Explore our licensing options further on the [Ultralytics Licensing](https://www.ultralytics.com/license) page.

View file

@ -17,7 +17,7 @@
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolo26n" }}_paddle_model/` | ✅ | `imgsz`, `batch`, `device` |
| [MNN](../integrations/mnn.md) | `mnn` | `{{ model_name or "yolo26n" }}.mnn` | ✅ | `imgsz`, `batch`, `int8`, `half`, `device` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolo26n" }}_ncnn_model/` | ✅ | `imgsz`, `half`, `batch`, `device` |
| [IMX500](../integrations/sony-imx500.md){{ tip2 }} | `imx` | `{{ model_name or "yolo26n" }}_imx_model/` | ✅ | `imgsz`, `int8`, `data`, `fraction`, `device` |
| [IMX500](../integrations/sony-imx500.md){{ tip2 }} | `imx` | `{{ model_name or "yolo26n" }}_imx_model/` | ✅ | `imgsz`, `int8`, `data`, `fraction`, `nms`{{ tip1 }}, `device` |
| [RKNN](../integrations/rockchip-rknn.md) | `rknn` | `{{ model_name or "yolo26n" }}_rknn_model/` | ✅ | `imgsz`, `batch`, `name`, `device` |
| [ExecuTorch](../integrations/executorch.md) | `executorch` | `{{ model_name or "yolo26n" }}_executorch_model/` | ✅ | `imgsz`, `device` |
| [Axelera](../integrations/axelera.md) | `axelera` | `{{ model_name or "yolo26n" }}_axelera_model/` | ✅ | `imgsz`, `int8`, `data`, `fraction`, `device` |
| [ExecuTorch](../integrations/executorch.md) | `executorch` | `{{ model_name or "yolo26n" }}_executorch_model/` | ✅ | `imgsz`, `batch`, `device` |
| [Axelera](../integrations/axelera.md) | `axelera` | `{{ model_name or "yolo26n" }}_axelera_model/` | ✅ | `imgsz`, `batch`, `int8`, `data`, `fraction`, `device` |

View file

@ -1,25 +1,25 @@
| Argument | Type | Default | Description |
| --------------- | ---------------- | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across [different types of input](https://docs.ultralytics.com/modes/predict/#inference-sources). |
| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. |
| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. |
| `imgsz` | `int` or `tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. |
| `rect` | `bool` | `True` | If enabled, minimally pads the shorter side of the image until it's divisible by stride to improve inference speed. If disabled, pads the image to a square during inference. |
| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. |
| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0`, `0`, `npu` or `npu:0`). Allows users to select between CPU, a specific GPU, Huawei Ascend NPU, or other compute devices for model execution. |
| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file, or `.txt` file](https://docs.ultralytics.com/modes/predict/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. |
| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. |
| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. |
| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accommodate new frames (optimized for real-time applications). If `True`, queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. |
| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. |
| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. For end-to-end models (YOLO26, YOLOv10), this only prevents the same detection from appearing with multiple class labels (IoU=1.0 duplicates) and does not perform IoU-threshold-based suppression between distinct boxes. |
| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. |
| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. |
| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. |
| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. |
| `stream` | `bool` | `False` | Enables memory-efficient processing for long videos or numerous images by returning a generator of Results objects instead of loading all frames into memory at once. |
| `verbose` | `bool` | `True` | Controls whether to display detailed inference logs in the terminal, providing real-time feedback on the prediction process. |
| `compile` | `bool` or `str` | `False` | Enables PyTorch 2.x `torch.compile` graph compilation with `backend='inductor'`. Accepts `True``"default"`, `False` → disables, or a string mode such as `"default"`, `"reduce-overhead"`, `"max-autotune-no-cudagraphs"`. Falls back to eager with a warning if unsupported. |
| `end2end` | `bool` | `None` | Overrides the end-to-end mode in YOLO models that support NMS-free inference (YOLO26, YOLOv10). Setting it to `False` lets you run prediction using the traditional NMS pipeline, additionally allowing you to make use of the `iou` argument. See the [End-to-End Detection guide](../guides/end2end-detection.md) for details. |
| Argument | Type | Default | Description |
| --------------- | ------------------------ | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `source` | `str` or `int` or `None` | `None` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. If omitted, a warning is logged and the model falls back to the built-in demo assets (`ultralytics/assets`, or a demo URL for OBB). Supports a wide range of formats and sources, enabling flexible application across [different types of input](https://docs.ultralytics.com/modes/predict/#inference-sources). |
| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. |
| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. |
| `imgsz` | `int` or `tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. |
| `rect` | `bool` | `True` | If enabled, minimally pads the shorter side of the image until it's divisible by stride to improve inference speed. If disabled, pads the image to a square during inference. |
| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. |
| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0`, `0`, `npu` or `npu:0`). Allows users to select between CPU, a specific GPU, Huawei Ascend NPU, or other compute devices for model execution. |
| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file, or `.txt` file](https://docs.ultralytics.com/modes/predict/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. |
| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. |
| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. |
| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accommodate new frames (optimized for real-time applications). If `True`, queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. |
| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. |
| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. For end-to-end models (YOLO26, YOLOv10), this only prevents the same detection from appearing with multiple class labels (IoU=1.0 duplicates) and does not perform IoU-threshold-based suppression between distinct boxes. |
| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. |
| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. |
| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. |
| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. |
| `stream` | `bool` | `False` | Enables memory-efficient processing for long videos or numerous images by returning a generator of Results objects instead of loading all frames into memory at once. |
| `verbose` | `bool` | `True` | Controls whether to display detailed inference logs in the terminal, providing real-time feedback on the prediction process. |
| `compile` | `bool` or `str` | `False` | Enables PyTorch 2.x `torch.compile` graph compilation with `backend='inductor'`. Accepts `True``"default"`, `False` → disables, or a string mode such as `"default"`, `"reduce-overhead"`, `"max-autotune-no-cudagraphs"`. Falls back to eager with a warning if unsupported. |
| `end2end` | `bool` | `None` | Overrides the end-to-end mode in YOLO models that support NMS-free inference (YOLO26, YOLOv10). Setting it to `False` lets you run prediction using the traditional NMS pipeline, additionally allowing you to make use of the `iou` argument. See the [End-to-End Detection guide](../guides/end2end-detection.md) for details. |

View file

@ -7,11 +7,11 @@
"show_in": ["bool", "True", "Flag to control whether to display the in counts on the video stream."],
"show_out": ["bool", "True", "Flag to control whether to display the out counts on the video stream."],
"analytics_type": ["str", "'line'", "Type of graph, i.e., `line`, `bar`, `area`, or `pie`."],
"colormap": ["int", "cv2.COLORMAP_JET", "Colormap to use for the heatmap."],
"colormap": ["int", "cv2.COLORMAP_DEEPGREEN", "Colormap to use for the heatmap."],
"json_file": ["str", "None", "Path to the JSON file that contains all parking coordinates data."],
"up_angle": ["float", "145.0", "Angle threshold for the 'up' pose."],
"kpts": ["list[int]", "'[6, 8, 10]'", "List of three keypoint indices used for monitoring workouts. These keypoints correspond to body joints or parts, such as shoulders, elbows, and wrists, for exercises like push-ups, pull-ups, squats, and ab-workouts."],
"down_angle": ["float", "90.0", "Angle threshold for the 'down' pose."],
"down_angle": ["int", "90", "Angle threshold for the 'down' pose."],
"blur_ratio": ["float", "0.5", "Adjusts percentage of blur intensity, with values in range `0.1 - 1.0`."],
"crop_dir": ["str", "'cropped-detections'", "Directory name for storing cropped detections."],
"records": ["int", "5", "Total detections count to trigger an email with security alarm system."],

View file

@ -40,6 +40,7 @@
| `warmup_bias_lr` | `float` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. |
| `box` | `float` | `7.5` | Weight of the box loss component in the [loss function](https://www.ultralytics.com/glossary/loss-function), influencing how much emphasis is placed on accurately predicting [bounding box](https://www.ultralytics.com/glossary/bounding-box) coordinates. |
| `cls` | `float` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. |
| `cls_pw` | `float` | `0.0` | Power for class weighting to handle class imbalance using inverse class frequency. `0.0` disables class weighting, `1.0` applies full inverse frequency weighting. Values between 0 and 1 provide partial weighting. |
| `dfl` | `float` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. |
| `pose` | `float` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. |
| `kobj` | `float` | `1.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. |

View file

@ -217,6 +217,9 @@ davis.justin@mssm.org:
esat@ultralytics.com:
avatar: https://avatars.githubusercontent.com/u/43647848?v=4
username: artest08
faruk.gmstss@gmail.com:
avatar: null
username: null
fatih@ultralytics.com:
avatar: https://avatars.githubusercontent.com/u/34196005?v=4
username: fcakyon
@ -259,6 +262,9 @@ lakshantha@ultralytics.com:
lakshanthad@yahoo.com:
avatar: https://avatars.githubusercontent.com/u/20147381?v=4
username: lakshanthad
lli28@nd.edu:
avatar: https://avatars.githubusercontent.com/u/126812331?v=4
username: easyrider11
lukasbuligonantunes@gmail.com:
avatar: https://avatars.githubusercontent.com/u/48484445?v=4
username: Buligon

View file

@ -16,7 +16,7 @@ pip install -r requirements.txt
### Installing `onnxruntime-gpu` (Optional)
For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions.
For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda/toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions.
```bash
pip install onnxruntime-gpu

View file

@ -12,7 +12,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U
- **Extensive Model Compatibility**: Supports a wide range of YOLO versions including [YOLOv5](https://docs.ultralytics.com/models/yolov5/), [YOLOv6](https://docs.ultralytics.com/models/yolov6/), [YOLOv7](https://docs.ultralytics.com/models/yolov7/), [YOLOv8](https://docs.ultralytics.com/models/yolov8/), [YOLOv9](https://docs.ultralytics.com/models/yolov9/), [YOLOv10](https://docs.ultralytics.com/models/yolov10/), [YOLO11](https://docs.ultralytics.com/models/yolo11/), [YOLO-World](https://docs.ultralytics.com/models/yolo-world/), [RT-DETR](https://docs.ultralytics.com/models/rtdetr/), and others.
- **Versatile Task Coverage**: Includes examples for `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`.
- **Precision Flexibility**: Works seamlessly with `FP16` and `FP32` precision [ONNX models](https://docs.ultralytics.com/integrations/onnx/).
- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda-toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/).
- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda/toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/).
- **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input.
- **Flexible Data Loading**: The `DataLoader` component handles images, folders, videos, and real-time video streams.
- **Real-Time Display and Video Export**: The `Viewer` provides real-time frame visualization and video export functions, similar to OpenCVs `imshow()` and `imwrite()`.
@ -45,7 +45,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U
### 2. [Optional] Install CUDA, CuDNN, and TensorRT
- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) version `12.x`.
- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) version `12.x`.
- The TensorRT execution provider requires both CUDA `12.x` and [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt) `10.x`. Ensure [cuDNN](https://developer.nvidia.com/cudnn) is also correctly installed.
### 3. [Optional] Install ffmpeg

View file

@ -8,7 +8,7 @@ This example provides a practical guide on performing inference with [Ultralytic
- **Deployment-Friendly:** Well-suited for deployment in industrial and production environments.
- **Performance:** Offers faster [inference latency](https://www.ultralytics.com/glossary/inference-latency) compared to OpenCV's DNN module on both CPU and [GPU](https://www.ultralytics.com/glossary/gpu-graphics-processing-unit).
- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda-toolkit).
- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda/toolkit).
## ☕ Note
@ -85,7 +85,7 @@ Ensure you have the following dependencies installed:
| [OpenCV](https://opencv.org/releases/) | >=4.0.0 | Required for image loading and preprocessing. |
| C++ Compiler | C++17 Support | Needed for features like `<filesystem>`. ([GCC](https://gcc.gnu.org/), [Clang](https://clang.llvm.org/), [MSVC](https://visualstudio.microsoft.com/vs/features/cplusplus/)) |
| [CMake](https://cmake.org/download/) | >=3.18 | Cross-platform build system generator. Version 3.18+ recommended for better CUDA support discovery. |
| [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. |
| [CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. |
| [cuDNN](https://developer.nvidia.com/cudnn) (CUDA required) | =8.x | Required by CUDA Execution Provider. **Must be cuDNN 8.x** compatible with your CUDA 11.x version. |
**Important Notes:**

View file

@ -35,7 +35,7 @@ Please follow the official Rust installation guide: [https://www.rust-lang.org/t
### 3. [Optional] Install CUDA & CuDNN & TensorRT
- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda-toolkit) v11.6+.
- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda/toolkit) v11.6+.
- The TensorRT execution provider requires CUDA v11.4+ and [TensorRT](https://developer.nvidia.com/tensorrt) v8.4+. You may also need [cuDNN](https://developer.nvidia.com/cudnn).
## ▶️ Get Started

View file

@ -54,7 +54,7 @@ We welcome contributions to improve this demo! If you encounter bugs, have featu
## 📄 License
This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-v3).
This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-3.0).
## 🙏 Acknowledgments

View file

@ -3,6 +3,49 @@
import shutil
from pathlib import Path
import pytest
@pytest.fixture(scope="session")
def solution_assets():
"""Session-scoped fixture to cache solution test assets.
Lazily downloads solution assets into a persistent directory (WEIGHTS_DIR/solution_assets) and returns a callable
that resolves asset names to cached paths.
"""
from ultralytics.utils import ASSETS_URL, WEIGHTS_DIR
from ultralytics.utils.downloads import safe_download
# Use persistent directory alongside weights
cache_dir = WEIGHTS_DIR / "solution_assets"
cache_dir.mkdir(parents=True, exist_ok=True)
# Define all assets needed for solution tests
assets = {
# Videos
"demo_video": "solutions_ci_demo.mp4",
"crop_video": "decelera_landscape_min.mov",
"pose_video": "solution_ci_pose_demo.mp4",
"parking_video": "solution_ci_parking_demo.mp4",
"vertical_video": "solution_vertical_demo.mp4",
# Parking manager files
"parking_areas": "solution_ci_parking_areas.json",
"parking_model": "solutions_ci_parking_model.pt",
}
asset_paths = {}
def get_asset(name):
"""Return the cached path for a named solution asset, downloading it on first use."""
if name not in asset_paths:
asset_path = cache_dir / assets[name]
if not asset_path.exists():
safe_download(url=f"{ASSETS_URL}/{asset_path.name}", dir=cache_dir)
asset_paths[name] = asset_path
return asset_paths[name]
return get_asset
def pytest_addoption(parser):
"""Add custom command-line options to pytest."""
@ -55,5 +98,5 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
# Remove directories
models = [path for x in {"*.mlpackage", "*_openvino_model"} for path in WEIGHTS_DIR.rglob(x)]
for directory in [WEIGHTS_DIR / "path with spaces", *models]:
for directory in [WEIGHTS_DIR / "solution_assets", WEIGHTS_DIR / "path with spaces", *models]:
shutil.rmtree(directory, ignore_errors=True)

View file

@ -1,15 +1,18 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import sys
from types import SimpleNamespace
from unittest import mock
import pytest
import torch
from tests import MODEL, SOURCE
from tests import MODEL, SOURCE, TASK_MODEL_DATA
from ultralytics import YOLO
from ultralytics.cfg import get_cfg
from ultralytics.engine.exporter import Exporter
from ultralytics.models.yolo import classify, detect, segment
from ultralytics.models.yolo import classify, detect, obb, pose, segment
from ultralytics.nn.tasks import load_checkpoint
from ultralytics.utils import ASSETS, DEFAULT_CFG, WEIGHTS_DIR
@ -22,122 +25,124 @@ def test_export():
"""Test model exporting functionality by adding a callback and verifying its execution."""
exporter = Exporter()
exporter.add_callback("on_export_start", test_func)
assert test_func in exporter.callbacks["on_export_start"], "callback test failed"
assert test_func in exporter.callbacks["on_export_start"], "on_export_start callback not registered"
f = exporter(model=YOLO("yolo26n.yaml").model)
YOLO(f)(SOURCE) # exported model inference
def test_detect():
"""Test YOLO object detection training, validation, and prediction functionality."""
overrides = {"data": "coco8.yaml", "model": "yolo26n.yaml", "imgsz": 32, "epochs": 1, "save": False}
cfg = get_cfg(DEFAULT_CFG)
cfg.data = "coco8.yaml"
cfg.imgsz = 32
# Trainer
trainer = detect.DetectionTrainer(overrides=overrides)
trainer.add_callback("on_train_start", test_func)
assert test_func in trainer.callbacks["on_train_start"], "callback test failed"
trainer.train()
# Validator
val = detect.DetectionValidator(args=cfg)
val.add_callback("on_val_start", test_func)
assert test_func in val.callbacks["on_val_start"], "callback test failed"
val(model=trainer.best) # validate best.pt
# Predictor
pred = detect.DetectionPredictor(overrides={"imgsz": [64, 64]})
pred.add_callback("on_predict_start", test_func)
assert test_func in pred.callbacks["on_predict_start"], "callback test failed"
# Confirm there is no issue with sys.argv being empty
with mock.patch.object(sys, "argv", []):
result = pred(source=ASSETS, model=MODEL)
assert len(result), "predictor test failed"
# Test resume functionality
overrides["resume"] = trainer.last
trainer = detect.DetectionTrainer(overrides=overrides)
try:
trainer.train()
except Exception as e:
print(f"Expected exception caught: {e}")
return
raise Exception("Resume test failed!")
def test_segment():
"""Test image segmentation training, validation, and prediction pipelines using YOLO models."""
@pytest.mark.parametrize(
"trainer_cls,validator_cls,predictor_cls,data,model,weights",
[
(
detect.DetectionTrainer,
detect.DetectionValidator,
detect.DetectionPredictor,
"coco8.yaml",
"yolo26n.yaml",
MODEL,
),
(
segment.SegmentationTrainer,
segment.SegmentationValidator,
segment.SegmentationPredictor,
"coco8-seg.yaml",
"yolo26n-seg.yaml",
WEIGHTS_DIR / "yolo26n-seg.pt",
),
(
classify.ClassificationTrainer,
classify.ClassificationValidator,
classify.ClassificationPredictor,
"imagenet10",
"yolo26n-cls.yaml",
None,
),
(obb.OBBTrainer, obb.OBBValidator, obb.OBBPredictor, "dota8.yaml", "yolo26n-obb.yaml", None),
(pose.PoseTrainer, pose.PoseValidator, pose.PosePredictor, "coco8-pose.yaml", "yolo26n-pose.yaml", None),
],
)
def test_task(trainer_cls, validator_cls, predictor_cls, data, model, weights):
"""Test YOLO training, validation, and prediction for various tasks."""
overrides = {
"data": "coco8-seg.yaml",
"model": "yolo26n-seg.yaml",
"data": data,
"model": model,
"imgsz": 32,
"epochs": 1,
"save": False,
"mask_ratio": 1,
"overlap_mask": False,
}
cfg = get_cfg(DEFAULT_CFG)
cfg.data = "coco8-seg.yaml"
cfg.imgsz = 32
# Trainer
trainer = segment.SegmentationTrainer(overrides=overrides)
trainer = trainer_cls(overrides=overrides)
trainer.add_callback("on_train_start", test_func)
assert test_func in trainer.callbacks["on_train_start"], "callback test failed"
assert test_func in trainer.callbacks["on_train_start"], "on_train_start callback not registered"
trainer.train()
# Validator
val = segment.SegmentationValidator(args=cfg)
val.add_callback("on_val_start", test_func)
assert test_func in val.callbacks["on_val_start"], "callback test failed"
val(model=trainer.best) # validate best.pt
# Predictor
pred = segment.SegmentationPredictor(overrides={"imgsz": [64, 64]})
pred.add_callback("on_predict_start", test_func)
assert test_func in pred.callbacks["on_predict_start"], "callback test failed"
result = pred(source=ASSETS, model=WEIGHTS_DIR / "yolo26n-seg.pt")
assert len(result), "predictor test failed"
# Test resume functionality
overrides["resume"] = trainer.last
trainer = segment.SegmentationTrainer(overrides=overrides)
try:
trainer.train()
except Exception as e:
print(f"Expected exception caught: {e}")
return
raise Exception("Resume test failed!")
def test_classify():
"""Test image classification including training, validation, and prediction phases."""
overrides = {"data": "imagenet10", "model": "yolo26n-cls.yaml", "imgsz": 32, "epochs": 1, "save": False}
cfg = get_cfg(DEFAULT_CFG)
cfg.data = "imagenet10"
cfg.data = data
cfg.imgsz = 32
# Trainer
trainer = classify.ClassificationTrainer(overrides=overrides)
trainer.add_callback("on_train_start", test_func)
assert test_func in trainer.callbacks["on_train_start"], "callback test failed"
trainer.train()
# Validator
val = classify.ClassificationValidator(args=cfg)
val = validator_cls(args=cfg)
val.add_callback("on_val_start", test_func)
assert test_func in val.callbacks["on_val_start"], "callback test failed"
assert test_func in val.callbacks["on_val_start"], "on_val_start callback not registered"
val(model=trainer.best)
# Predictor
pred = classify.ClassificationPredictor(overrides={"imgsz": [64, 64]})
pred = predictor_cls(overrides={"imgsz": [64, 64]})
pred.add_callback("on_predict_start", test_func)
assert test_func in pred.callbacks["on_predict_start"], "callback test failed"
result = pred(source=ASSETS, model=trainer.best)
assert len(result), "predictor test failed"
assert test_func in pred.callbacks["on_predict_start"], "on_predict_start callback not registered"
# Determine model path for prediction
model_path = weights if weights else trainer.best
if model == "yolo26n.yaml": # only for detection
# Confirm there is no issue with sys.argv being empty
with mock.patch.object(sys, "argv", []):
result = pred(source=ASSETS, model=model_path)
assert len(result) > 0, f"Predictor returned no results for {model}"
else:
result = pred(source=ASSETS, model=model_path)
assert len(result) > 0, f"Predictor returned no results for {model}"
# Test resume functionality
with pytest.raises(AssertionError):
trainer_cls(overrides={**overrides, "resume": trainer.last}).train()
@pytest.mark.parametrize("task,weight,data", TASK_MODEL_DATA)
def test_resume_incomplete(task, weight, data, tmp_path):
"""Test training resumes from an incomplete checkpoint."""
train_args = {
"data": data,
"epochs": 2,
"save": True,
"plots": False,
"workers": 0,
"project": tmp_path,
"name": task,
"imgsz": 32,
"exist_ok": True,
}
def stop_after_first_epoch(trainer):
if trainer.epoch == 0:
trainer.stop = True
def disable_final_eval(trainer):
trainer.final_eval = lambda: None
model = YOLO(weight)
model.add_callback("on_train_start", disable_final_eval)
model.add_callback("on_train_epoch_end", stop_after_first_epoch)
model.train(**train_args)
last_path = model.trainer.last
_, ckpt = load_checkpoint(last_path)
assert ckpt["epoch"] == 0, "checkpoint should be resumable"
# Resume training using the checkpoint
resume_model = YOLO(last_path)
resume_model.train(resume=True, **train_args)
assert resume_model.trainer.start_epoch == resume_model.trainer.epoch == 1, "resume test failed"
def test_nan_recovery():
@ -155,3 +160,44 @@ def test_nan_recovery():
trainer.add_callback("on_train_batch_end", inject_nan)
trainer.train()
assert nan_injected[0], "NaN injection failed"
def test_train_reuses_loaded_checkpoint_model(monkeypatch):
"""Test training reuses an already-loaded checkpoint model instead of re-parsing the model source."""
model = YOLO("yolo26n.yaml")
model.ckpt = {"checkpoint": True}
model.ckpt_path = "/tmp/fake.pt"
model.overrides["model"] = "ul://glenn-jocher/m2/exp-14"
original_model = model.model
captured = {}
class FakeTrainer:
def __init__(self, overrides=None, _callbacks=None):
self.overrides = overrides
self.callbacks = _callbacks
self.model = None
self.validator = SimpleNamespace(metrics=None)
self.best = MODEL.parent / "nonexistent-best.pt"
self.last = MODEL
captured["trainer"] = self
def get_model(self, cfg=None, weights=None, verbose=True):
captured["cfg"] = cfg
captured["weights"] = weights
return original_model
def train(self):
return None
monkeypatch.setattr("ultralytics.engine.model.checks.check_pip_update_available", lambda: None)
monkeypatch.setattr(model, "_smart_load", lambda key: FakeTrainer)
monkeypatch.setattr(
"ultralytics.engine.model.load_checkpoint",
lambda path: (original_model, {"checkpoint": True}),
)
model.train(data="coco8.yaml", epochs=1)
assert captured["trainer"].model is original_model, "Trainer model does not match original"
assert captured["cfg"] == original_model.yaml, f"Config mismatch: {captured['cfg']} != {original_model.yaml}"
assert captured["weights"] is original_model, "Weights do not match original model"

View file

@ -64,8 +64,8 @@ def test_torch2onnx_serializes_concurrent_exports(monkeypatch, tmp_path):
for thread in threads:
thread.join()
assert not errors
assert max_active == 1
assert not errors, f"Concurrent export errors: {errors}"
assert max_active == 1, f"Expected max 1 concurrent export, got {max_active}"
@pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
@ -341,7 +341,7 @@ def test_export_executorch():
file = YOLO(MODEL).export(format="executorch", imgsz=32)
assert Path(file).exists(), f"ExecuTorch export failed, directory not found: {file}"
# Check that .pte file exists in the exported directory
pte_file = Path(file) / Path(MODEL).with_suffix(".pte").name
pte_file = Path(file) / "model.pte"
assert pte_file.exists(), f"ExecuTorch .pte file not found: {pte_file}"
# Check that metadata.yaml exists
metadata_file = Path(file) / "metadata.yaml"
@ -359,8 +359,7 @@ def test_export_executorch_matrix(task):
file = YOLO(TASK2MODEL[task]).export(format="executorch", imgsz=32)
assert Path(file).exists(), f"ExecuTorch export failed for task '{task}', directory not found: {file}"
# Check that .pte file exists in the exported directory
model_name = Path(TASK2MODEL[task]).with_suffix(".pte").name
pte_file = Path(file) / model_name
pte_file = Path(file) / "model.pte"
assert pte_file.exists(), f"ExecuTorch .pte file not found for task '{task}': {pte_file}"
# Check that metadata.yaml exists
metadata_file = Path(file) / "metadata.yaml"

View file

@ -3,6 +3,7 @@
import contextlib
import csv
import urllib
import zipfile
from copy import copy
from pathlib import Path
@ -35,7 +36,7 @@ from ultralytics.utils import (
checks,
is_github_action_running,
)
from ultralytics.utils.downloads import download
from ultralytics.utils.downloads import download, safe_download
from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13
@ -81,7 +82,7 @@ def test_predict_txt(tmp_path):
for src in SOURCES_LIST:
f.write(f"{src}\n")
results = YOLO(MODEL)(source=file, imgsz=32)
assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images
assert len(results) == 7, f"Expected 7 results from source list, got {len(results)}"
@pytest.mark.skipif(True, reason="disabled for testing")
@ -93,7 +94,7 @@ def test_predict_csv_multi_row(tmp_path):
writer.writerow(["source"])
writer.writerows([[src] for src in SOURCES_LIST])
results = YOLO(MODEL)(source=file, imgsz=32)
assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images
assert len(results) == 7, f"Expected 7 results from multi-row CSV, got {len(results)}"
@pytest.mark.skipif(True, reason="disabled for testing")
@ -104,7 +105,7 @@ def test_predict_csv_single_row(tmp_path):
writer = csv.writer(f)
writer.writerow(SOURCES_LIST)
results = YOLO(MODEL)(source=file, imgsz=32)
assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images
assert len(results) == 7, f"Expected 7 results from single-row CSV, got {len(results)}"
@pytest.mark.parametrize("model_name", MODELS)
@ -155,7 +156,7 @@ def test_predict_gray_and_4ch(tmp_path):
for f in source_rgba, source_grayscale, source_non_utf, source_spaces:
for source in Image.open(f), cv2.imread(str(f)), f:
results = model(source, save=True, verbose=True, imgsz=32)
assert len(results) == 1 # verify that an image was run
assert len(results) == 1, f"Expected 1 result for {f.name}, got {len(results)}"
f.unlink() # cleanup
@ -334,16 +335,21 @@ def test_labels_and_crops():
assert len(cls_idxs) >= 2, f"Expected at least 2 detections, got {len(cls_idxs)}"
# Check label path
labels = save_path / f"labels/{im_name}.txt"
assert labels.exists()
assert labels.exists(), f"Label file {labels} does not exist"
# Check detections match label count
assert len(r.boxes.data) == len([line for line in labels.read_text().splitlines() if line])
label_count = len([line for line in labels.read_text().splitlines() if line])
assert len(r.boxes.data) == label_count, f"Box count {len(r.boxes.data)} != label count {label_count}"
# Check crops path and files
crop_dirs = list((save_path / "crops").iterdir())
crop_files = [f for p in crop_dirs for f in p.glob("*")]
# Crop directories match detections
assert all(r.names.get(c) in {d.name for d in crop_dirs} for c in cls_idxs)
crop_dir_names = {d.name for d in crop_dirs}
assert all(r.names.get(c) in crop_dir_names for c in cls_idxs), (
f"Crop dirs {crop_dir_names} don't match classes {cls_idxs}"
)
# Same number of crops as detections
assert len([f for f in crop_files if im_name in f.name]) == len(r.boxes.data)
crop_count = len([f for f in crop_files if im_name in f.name])
assert crop_count == len(r.boxes.data), f"Crop count {crop_count} != detection count {len(r.boxes.data)}"
@pytest.mark.skipif(not ONLINE, reason="environment is offline")
@ -367,6 +373,27 @@ def test_data_utils(tmp_path):
zip_directory(tmp_path / "coco8/images/val") # zip
def test_safe_download_unzips_local_path_archive(tmp_path):
"""Test safe_download() unzips local archive paths without treating them like remote URLs."""
dataset_dir = tmp_path / "coco8 local"
archive = tmp_path / "coco8 local.zip"
(dataset_dir / "images" / "train").mkdir(parents=True)
(dataset_dir / "images" / "val").mkdir(parents=True)
(dataset_dir / "labels" / "train").mkdir(parents=True)
(dataset_dir / "labels" / "val").mkdir(parents=True)
(dataset_dir / "data.yaml").write_text("path: .\ntrain: images/train\nval: images/val\nnames:\n 0: item\n")
with zipfile.ZipFile(archive, "w") as zf:
for path in dataset_dir.rglob("*"):
zf.write(path, arcname=path.relative_to(tmp_path))
extracted = safe_download(archive, dir=tmp_path / "datasets", unzip=True, progress=False)
expected_path = tmp_path / "datasets" / dataset_dir.name
assert extracted == expected_path, f"Extracted path {extracted} != expected {expected_path}"
assert (extracted / "data.yaml").is_file(), f"data.yaml not found in {extracted}"
assert (extracted / "images" / "val").is_dir(), f"images/val not found in {extracted}"
@pytest.mark.skipif(not ONLINE, reason="environment is offline")
def test_data_converter(tmp_path):
"""Test dataset conversion functions from COCO to YOLO format and class mappings."""
@ -641,6 +668,9 @@ def test_classify_transforms_train(image, auto_augment, erasing, force_color_jit
@pytest.mark.skipif(not ONLINE, reason="environment is offline")
def test_model_tune():
"""Tune YOLO model for performance improvement."""
YOLO("yolo26n.pt").tune(
data=["coco8.yaml", "coco8-grayscale.yaml"], plots=False, imgsz=32, epochs=1, iterations=2, device="cpu"
)
YOLO("yolo26n-pose.pt").tune(data="coco8-pose.yaml", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu")
YOLO("yolo26n-cls.pt").tune(data="imagenet10", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu")

View file

@ -18,13 +18,6 @@ from ultralytics.utils.torch_utils import TORCH_2_4
# Predefined argument values
SHOW = False
DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management
CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution
POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution
PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution
PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution
PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution
VERTICAL_VIDEO = "solution_vertical_demo.mp4" # only for vertical line counting
REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management
HORIZONTAL_LINE = [(10, 200), (540, 200)] # for object counting
VERTICAL_LINE = [(320, 0), (320, 400)] # for object counting
@ -50,129 +43,129 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False):
@pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled for testing due to --slow test errors after YOLOE PR.")
@pytest.mark.parametrize(
"name, solution_class, needs_frame_count, video, kwargs",
"name, solution_class, needs_frame_count, video_key, kwargs_update",
[
(
"ObjectCounter",
solutions.ObjectCounter,
False,
DEMO_VIDEO,
"demo_video",
{"region": REGION, "model": MODEL, "show": SHOW},
),
(
"ObjectCounter",
solutions.ObjectCounter,
False,
DEMO_VIDEO,
"demo_video",
{"region": HORIZONTAL_LINE, "model": MODEL, "show": SHOW},
),
(
"ObjectCounterVertical",
solutions.ObjectCounter,
False,
DEMO_VIDEO,
"vertical_video",
{"region": VERTICAL_LINE, "model": MODEL, "show": SHOW},
),
(
"ObjectCounterwithOBB",
solutions.ObjectCounter,
False,
DEMO_VIDEO,
"demo_video",
{"region": REGION, "model": "yolo26n-obb.pt", "show": SHOW},
),
(
"Heatmap",
solutions.Heatmap,
False,
DEMO_VIDEO,
"demo_video",
{"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None},
),
(
"HeatmapWithRegion",
solutions.Heatmap,
False,
DEMO_VIDEO,
"demo_video",
{"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW},
),
(
"SpeedEstimator",
solutions.SpeedEstimator,
False,
DEMO_VIDEO,
"demo_video",
{"region": REGION, "model": MODEL, "show": SHOW},
),
(
"QueueManager",
solutions.QueueManager,
False,
DEMO_VIDEO,
"demo_video",
{"region": REGION, "model": MODEL, "show": SHOW},
),
(
"LineAnalytics",
solutions.Analytics,
True,
DEMO_VIDEO,
"demo_video",
{"analytics_type": "line", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)},
),
(
"PieAnalytics",
solutions.Analytics,
True,
DEMO_VIDEO,
"demo_video",
{"analytics_type": "pie", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)},
),
(
"BarAnalytics",
solutions.Analytics,
True,
DEMO_VIDEO,
"demo_video",
{"analytics_type": "bar", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)},
),
(
"AreaAnalytics",
solutions.Analytics,
True,
DEMO_VIDEO,
"demo_video",
{"analytics_type": "area", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)},
),
("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}),
("TrackZone", solutions.TrackZone, False, "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}),
(
"ObjectCropper",
solutions.ObjectCropper,
False,
CROP_VIDEO,
"crop_video",
{"temp_crop_dir": "cropped-detections", "model": MODEL, "show": SHOW},
),
(
"ObjectBlurrer",
solutions.ObjectBlurrer,
False,
DEMO_VIDEO,
"demo_video",
{"blur_ratio": 0.02, "model": MODEL, "show": SHOW},
),
(
"InstanceSegmentation",
solutions.InstanceSegmentation,
False,
DEMO_VIDEO,
"demo_video",
{"model": "yolo26n-seg.pt", "show": SHOW},
),
("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}),
("VisionEye", solutions.VisionEye, False, "demo_video", {"model": MODEL, "show": SHOW}),
(
"RegionCounter",
solutions.RegionCounter,
False,
DEMO_VIDEO,
"demo_video",
{"region": REGION, "model": MODEL, "show": SHOW},
),
("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}),
("AIGym", solutions.AIGym, False, "pose_video", {"kpts": [6, 8, 10], "show": SHOW}),
(
"ParkingManager",
solutions.ParkingManagement,
False,
PARKING_VIDEO,
{"temp_model": str(PARKING_MODEL), "show": SHOW, "temp_json_file": str(PARKING_AREAS_JSON)},
"parking_video",
{"model": "parking_model", "show": SHOW, "json_file": "parking_areas"},
),
(
"StreamlitInference",
@ -183,34 +176,31 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False):
),
],
)
def test_solution(name, solution_class, needs_frame_count, video, kwargs, tmp_path):
def test_solution(name, solution_class, needs_frame_count, video_key, kwargs_update, tmp_path, solution_assets):
"""Test individual Ultralytics solution with video processing and parameter validation."""
if video:
if name != "ObjectCounterVertical":
safe_download(url=f"{ASSETS_URL}/{video}", dir=tmp_path)
else:
safe_download(url=f"{ASSETS_URL}/{VERTICAL_VIDEO}", dir=tmp_path)
if name == "ParkingManager":
safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=tmp_path)
safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=tmp_path)
# Get video path from persistent cache (no copying needed, read-only access)
video_path = str(solution_assets(video_key)) if video_key else None
elif name == "StreamlitInference":
# Update kwargs to use cached paths for parking manager
kwargs = {}
for key, value in kwargs_update.items():
if key.startswith("temp_"):
kwargs[key.replace("temp_", "")] = str(tmp_path / value)
elif value == "parking_model":
kwargs[key] = str(solution_assets("parking_model"))
elif value == "parking_areas":
kwargs[key] = str(solution_assets("parking_areas"))
else:
kwargs[key] = value
if name == "StreamlitInference":
if checks.check_imshow(): # do not merge with elif above
solution_class(**kwargs).inference() # requires interactive GUI environment
return
# Update kwargs to use tmp_path
kwargs_updated = {}
for key in kwargs:
if key.startswith("temp_"):
kwargs_updated[key.replace("temp_", "")] = str(tmp_path / kwargs[key])
else:
kwargs_updated[key] = kwargs[key]
video = VERTICAL_VIDEO if name == "ObjectCounterVertical" else video
process_video(
solution=solution_class(**kwargs_updated),
video_path=str(tmp_path / video),
solution=solution_class(**kwargs),
video_path=video_path,
needs_frame_count=needs_frame_count,
)
@ -220,7 +210,7 @@ def test_left_click_selection():
dc = solutions.DistanceCalculation()
dc.boxes, dc.track_ids = [[10, 10, 50, 50]], [1]
dc.mouse_event_for_distance(cv2.EVENT_LBUTTONDOWN, 30, 30, None, None)
assert 1 in dc.selected_boxes
assert 1 in dc.selected_boxes, f"Expected track_id 1 in selected_boxes, got {dc.selected_boxes}"
def test_right_click_reset():
@ -228,8 +218,8 @@ def test_right_click_reset():
dc = solutions.DistanceCalculation()
dc.selected_boxes, dc.left_mouse_count = {1: [10, 10, 50, 50]}, 1
dc.mouse_event_for_distance(cv2.EVENT_RBUTTONDOWN, 0, 0, None, None)
assert not dc.selected_boxes
assert dc.left_mouse_count == 0
assert not dc.selected_boxes, f"Expected empty selected_boxes after reset, got {dc.selected_boxes}"
assert dc.left_mouse_count == 0, f"Expected left_mouse_count=0 after reset, got {dc.left_mouse_count}"
def test_parking_json_none():
@ -249,7 +239,7 @@ def test_analytics_graph_not_supported():
analytics.process(im0=np.zeros((640, 480, 3), dtype=np.uint8), frame_number=0)
assert False, "Expected ValueError for unsupported chart type"
except ValueError as e:
assert "Unsupported analytics_type" in str(e)
assert "Unsupported analytics_type" in str(e), f"Expected 'Unsupported analytics_type' in error, got: {e}"
def test_area_chart_padding():
@ -257,7 +247,7 @@ def test_area_chart_padding():
analytics = solutions.Analytics(analytics_type="area")
analytics.update_graph(frame_number=1, count_dict={"car": 2}, plot="area")
plot_im = analytics.update_graph(frame_number=2, count_dict={"car": 3, "person": 1}, plot="area")
assert plot_im is not None
assert plot_im is not None, "Area chart plot returned None"
def test_config_update_method_with_invalid_argument():
@ -267,7 +257,7 @@ def test_config_update_method_with_invalid_argument():
obj.update(invalid_key=123)
assert False, "Expected ValueError for invalid update argument"
except ValueError as e:
assert "is not a valid solution argument" in str(e)
assert "is not a valid solution argument" in str(e), f"Expected validation error message, got: {e}"
def test_plot_with_no_masks():
@ -275,7 +265,7 @@ def test_plot_with_no_masks():
im0 = np.zeros((640, 480, 3), dtype=np.uint8)
isegment = solutions.InstanceSegmentation(model="yolo26n-seg.pt")
results = isegment(im0)
assert results.plot_im is not None
assert results.plot_im is not None, "Instance segmentation plot returned None"
def test_streamlit_handle_video_upload_creates_file():
@ -291,10 +281,11 @@ def test_streamlit_handle_video_upload_creates_file():
output_path = "ultralytics.mp4"
else:
output_path = None
assert output_path == "ultralytics.mp4"
assert os.path.exists("ultralytics.mp4")
assert output_path == "ultralytics.mp4", f"Expected output_path 'ultralytics.mp4', got {output_path}"
assert os.path.exists("ultralytics.mp4"), "ultralytics.mp4 file not created"
with open("ultralytics.mp4", "rb") as f:
assert f.read() == b"fake video content"
content = f.read()
assert content == b"fake video content", f"File content mismatch: {content}"
os.remove("ultralytics.mp4")
@ -329,7 +320,7 @@ def test_similarity_search_complete(tmp_path):
img.save(image_dir / f"test_image_{i}.jpg")
searcher = solutions.VisualAISearch(data=str(image_dir))
results = searcher("a red and white object")
assert results
assert results, "Similarity search returned empty results"
def test_distance_calculation_process_method():
@ -347,9 +338,9 @@ def test_distance_calculation_process_method():
frame = np.zeros((480, 640, 3), dtype=np.uint8)
with patch.object(dc, "extract_tracks"), patch.object(dc, "display_output"), patch("cv2.setMouseCallback"):
result = dc.process(frame)
assert isinstance(result, SolutionResults)
assert result.total_tracks == 2
assert result.pixels_distance > 0
assert isinstance(result, SolutionResults), f"Expected SolutionResults, got {type(result)}"
assert result.total_tracks == 2, f"Expected 2 tracks, got {result.total_tracks}"
assert result.pixels_distance > 0, f"Expected positive distance, got {result.pixels_distance}"
def test_object_crop_with_show_True():

View file

@ -1,6 +1,6 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
__version__ = "8.4.33"
__version__ = "8.4.38"
import importlib
import os

View file

@ -170,6 +170,7 @@ CFG_FLOAT_KEYS = frozenset(
"warmup_epochs",
"box",
"cls",
"cls_pw",
"dfl",
"degrees",
"shear",

View file

@ -101,6 +101,7 @@ warmup_momentum: 0.8 # (float) initial momentum during warmup
warmup_bias_lr: 0.1 # (float) bias learning rate during warmup
box: 7.5 # (float) box loss gain
cls: 0.5 # (float) classification loss gain
cls_pw: 0.0 # (float) class weights power for handling class imbalance (0.0=disable, 1.0=full inverse frequency)
dfl: 1.5 # (float) distribution focal loss gain
pose: 12.0 # (float) pose loss gain (pose tasks)
kobj: 1.0 # (float) keypoint objectness loss gain (pose tasks)

View file

@ -843,7 +843,12 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
yaml_path = dataset_dir / "data.yaml"
if yaml_path.is_file():
try:
if YAML.load(yaml_path).get("hash") == _hash:
cached = YAML.load(yaml_path)
if cached.get("hash") == _hash and all(
(dataset_dir / cached[split]).is_dir() and (dataset_dir / "labels" / split).is_dir()
for split in ("train", "val", "test")
if split in cached
):
return yaml_path
except Exception:
pass

View file

@ -153,7 +153,8 @@ class YOLODataset(BaseDataset):
x["hash"] = get_hash(self.label_files + self.im_files)
x["results"] = nf, nm, ne, nc, len(self.im_files)
x["msgs"] = msgs # warnings
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
if x["labels"]:
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
return x
def get_labels(self) -> list[dict]:
@ -182,12 +183,11 @@ class YOLODataset(BaseDataset):
LOGGER.info("\n".join(cache["msgs"])) # display warnings
# Read cache
[cache.pop(k) for k in ("hash", "version", "msgs")] # remove items
labels = cache["labels"]
if not labels:
raise RuntimeError(
f"No valid images found in {cache_path}. Images with incorrectly formatted labels are ignored. {HELP_URL}"
)
issues = "\n ".join(sorted(set(cache["msgs"]))) or "no error details"
raise RuntimeError(f"No valid images found in {cache_path}.\n {issues}\n{HELP_URL}")
[cache.pop(k) for k in ("hash", "version", "msgs")] # remove items
self.im_files = [lb["im_file"] for lb in labels] # update im_files
# Check if the dataset is all boxes or all segments

View file

@ -417,7 +417,9 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> dict[str, Any]
Returns:
(dict[str, Any]): Parsed dataset information and paths.
"""
file = check_file(dataset)
file = Path(check_file(dataset))
if file.is_dir():
file = find_dataset_yaml(file)
# Download (optional)
extract_dir = ""

View file

@ -476,7 +476,7 @@ class Exporter:
m.agnostic_nms = self.args.agnostic_nms
m.xyxy = self.args.nms and fmt != "coreml"
m.shape = None # reset cached shape for new export input size
if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
if hasattr(model, "pe") and hasattr(m, "fuse") and not hasattr(m, "lrpc"): # for YOLOE models
m.fuse(model.pe.to(self.device))
elif isinstance(m, C2f) and not is_tf_format:
# EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
@ -601,9 +601,9 @@ class Exporter:
from ultralytics.utils.export.torchscript import torch2torchscript
return torch2torchscript(
NMSModel(self.model, self.args) if self.args.nms else self.model,
self.im,
self.file,
model=NMSModel(self.model, self.args) if self.args.nms else self.model,
im=self.im,
output_file=self.file.with_suffix(".torchscript"),
optimize=self.args.optimize,
metadata=self.metadata,
prefix=prefix,
@ -692,9 +692,9 @@ class Exporter:
@try_export
def export_openvino(self, prefix=colorstr("OpenVINO:")):
"""Export YOLO model to OpenVINO format."""
from ultralytics.utils.export import torch2openvino
from ultralytics.utils.export.openvino import torch2openvino
# OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023"
# OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023
check_requirements("openvino>=2025.2.0" if MACOS and MACOS_VERSION >= "15.4" else "openvino>=2024.0.0")
import openvino as ov
@ -757,16 +757,26 @@ class Exporter:
"""Export YOLO model to PaddlePaddle format."""
from ultralytics.utils.export.paddle import torch2paddle
return torch2paddle(self.model, self.im, self.file, self.metadata, prefix)
return torch2paddle(
model=self.model,
im=self.im,
output_dir=str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}"),
metadata=self.metadata,
prefix=prefix,
)
@try_export
def export_mnn(self, prefix=colorstr("MNN:")):
"""Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
from ultralytics.utils.export.mnn import onnx2mnn
f_onnx = self.export_onnx()
return onnx2mnn(
f_onnx, self.file, half=self.args.half, int8=self.args.int8, metadata=self.metadata, prefix=prefix
onnx_file=self.export_onnx(),
output_file=self.file.with_suffix(".mnn"),
half=self.args.half,
int8=self.args.int8,
metadata=self.metadata,
prefix=prefix,
)
@try_export
@ -775,9 +785,9 @@ class Exporter:
from ultralytics.utils.export.ncnn import torch2ncnn
return torch2ncnn(
self.model,
self.im,
self.file,
model=self.model,
im=self.im,
output_dir=str(self.file).replace(self.file.suffix, "_ncnn_model/"),
half=self.args.half,
metadata=self.metadata,
device=self.device,
@ -986,9 +996,7 @@ class Exporter:
"""Export YOLO model to TensorFlow GraphDef *.pb format https://github.com/leimao/Frozen-Graph-TensorFlow."""
from ultralytics.utils.export.tensorflow import keras2pb
f = self.file.with_suffix(".pb")
keras2pb(keras_model, f, prefix)
return f
return keras2pb(keras_model, output_file=self.file.with_suffix(".pb"), prefix=prefix)
@try_export
def export_tflite(self, prefix=colorstr("TensorFlow Lite:")):
@ -1016,11 +1024,13 @@ class Exporter:
from ultralytics.utils.export.axelera import torch2axelera
output_dir = self.file.parent / f"{self.file.stem}_axelera_model"
return torch2axelera(
model=self.model,
file=self.file,
output_dir=output_dir,
calibration_dataset=self.get_int8_calibration_dataloader(prefix),
transform_fn=self._transform_fn,
model_name=self.file.stem,
metadata=self.metadata,
prefix=prefix,
)
@ -1032,7 +1042,13 @@ class Exporter:
check_executorch_requirements()
from ultralytics.utils.export.executorch import torch2executorch
return torch2executorch(self.model, self.file, self.im, metadata=self.metadata, prefix=prefix)
return torch2executorch(
model=self.model,
im=self.im,
output_dir=str(self.file).replace(self.file.suffix, "_executorch_model/"),
metadata=self.metadata,
prefix=prefix,
)
@try_export
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
@ -1055,10 +1071,9 @@ class Exporter:
from ultralytics.utils.export.tensorflow import tflite2edgetpu
LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix)
f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model
self._add_tflite_metadata(f)
return f
output_file = tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix)
self._add_tflite_metadata(output_file)
return output_file
@try_export
def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
@ -1066,12 +1081,15 @@ class Exporter:
check_requirements("tensorflowjs")
from ultralytics.utils.export.tensorflow import pb2tfjs
f = str(self.file).replace(self.file.suffix, "_web_model") # js dir
f_pb = str(self.file.with_suffix(".pb")) # *.pb path
pb2tfjs(pb_file=f_pb, output_dir=f, half=self.args.half, int8=self.args.int8, prefix=prefix)
# Add metadata
YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f
output_dir = pb2tfjs(
pb_file=str(self.file.with_suffix(".pb")),
output_dir=str(self.file).replace(self.file.suffix, "_web_model/"),
half=self.args.half,
int8=self.args.int8,
prefix=prefix,
)
YAML.save(Path(output_dir) / "metadata.yaml", self.metadata)
return output_dir
@try_export
def export_rknn(self, prefix=colorstr("RKNN:")):
@ -1080,7 +1098,13 @@ class Exporter:
self.args.opset = min(self.args.opset or 19, 19) # rknn-toolkit expects opset<=19
f_onnx = self.export_onnx()
return onnx2rknn(f_onnx, name=self.args.name, metadata=self.metadata, prefix=prefix)
return onnx2rknn(
onnx_file=f_onnx,
output_dir=str(self.file).replace(self.file.suffix, f"_rknn_model{os.sep}"),
name=self.args.name,
metadata=self.metadata,
prefix=prefix,
)
@try_export
def export_imx(self, prefix=colorstr("IMX:")):
@ -1120,11 +1144,11 @@ class Exporter:
check_apt_requirements(["openjdk-17-jre"])
return torch2imx(
self.model,
self.file,
self.args.conf,
self.args.iou,
self.args.max_det,
model=self.model,
output_dir=str(self.file).replace(self.file.suffix, "_imx_model/"),
conf=self.args.conf,
iou=self.args.iou,
max_det=self.args.max_det,
metadata=self.metadata,
dataset=self.get_int8_calibration_dataloader(prefix),
prefix=prefix,

View file

@ -426,7 +426,7 @@ class Model(torch.nn.Module):
self._check_is_pytorch_model()
return self.model.info(detailed=detailed, verbose=verbose, imgsz=imgsz)
def fuse(self) -> None:
def fuse(self) -> Model:
"""Fuse Conv2d and BatchNorm2d layers in the model for optimized inference.
This method iterates through the model's modules and fuses consecutive Conv2d and BatchNorm2d layers into a
@ -444,6 +444,7 @@ class Model(torch.nn.Module):
"""
self._check_is_pytorch_model()
self.model.fuse()
return self
def embed(
self,
@ -756,8 +757,6 @@ class Model(torch.nn.Module):
checks.check_pip_update_available()
if isinstance(kwargs.get("pretrained", None), (str, Path)):
self.load(kwargs["pretrained"]) # load pretrained weights if provided
overrides = YAML.load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
custom = {
# NOTE: handle the case when 'cfg' includes 'data'.
@ -781,8 +780,9 @@ class Model(torch.nn.Module):
args["resume"] = False
self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
if not args.get("resume"): # manually set model only if not resuming
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
if not args.get("resume") and self.ckpt:
# Reuse the already-loaded checkpoint model to avoid re-resolving remote weight sources during trainer setup.
self.trainer.model = self.trainer.get_model(weights=self.model, cfg=self.model.yaml)
self.model = self.trainer.model
self.trainer.train()

View file

@ -335,14 +335,14 @@ class BaseTrainer:
self.scaler = (
torch.amp.GradScaler("cuda", enabled=self.amp) if TORCH_2_4 else torch.cuda.amp.GradScaler(enabled=self.amp)
)
if self.world_size > 1:
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True)
# Check imgsz
gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32) # grid size (max stride)
self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
self.stride = gs # for multiscale training
if self.world_size > 1:
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True)
# Batch size
if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size
self.args.batch = self.batch_size = self.auto_batch()
@ -350,6 +350,7 @@ class BaseTrainer:
self._build_train_pipeline()
self.validator = self.get_validator()
self.ema = ModelEMA(self.model)
self.set_class_weights() # compute class weights after dataloader is ready
if RANK in {-1, 0}:
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
@ -531,8 +532,7 @@ class BaseTrainer:
self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600)
# Save model
if self.args.save or final_epoch:
self.save_model()
if (self.args.save or final_epoch) and self.save_model():
self.run_callbacks("on_model_save")
# Scheduler
@ -630,6 +630,11 @@ class BaseTrainer:
"""Save model training checkpoints with additional metadata."""
import io
ema = deepcopy(unwrap_model(self.ema.ema)).half()
if not all(torch.isfinite(v).all() for v in ema.state_dict().values() if isinstance(v, torch.Tensor)):
LOGGER.warning(f"Skipping checkpoint save at epoch {self.epoch}: EMA contains NaN/Inf")
return False
# Serialize ckpt to a byte buffer once (faster than repeated torch.save() calls)
buffer = io.BytesIO()
torch.save(
@ -637,7 +642,7 @@ class BaseTrainer:
"epoch": self.epoch,
"best_fitness": self.best_fitness,
"model": None, # resume and final checkpoints derive from EMA
"ema": deepcopy(unwrap_model(self.ema.ema)).half(),
"ema": ema,
"updates": self.ema.updates,
"optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
"scaler": self.scaler.state_dict(),
@ -666,6 +671,7 @@ class BaseTrainer:
self.best.write_bytes(serialized_ckpt) # save best.pt
if (self.save_period > 0) and (self.epoch % self.save_period == 0):
(self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt) # save epoch, i.e. 'epoch3.pt'
return True
def get_dataset(self):
"""Get train and validation datasets from data dictionary.
@ -720,7 +726,7 @@ class BaseTrainer:
cfg = weights.yaml
elif isinstance(self.args.pretrained, (str, Path)):
weights, _ = load_checkpoint(self.args.pretrained)
self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights)
self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK in {-1, 0}) # calls Model(cfg, weights)
return ckpt
def optimizer_step(self):
@ -785,6 +791,10 @@ class BaseTrainer:
"""Set or update model parameters before training."""
self.model.names = self.data["names"]
def set_class_weights(self):
"""Compute and set class weights for handling class imbalance. Override in subclasses."""
pass
def build_targets(self, preds, targets):
"""Build target tensors for training YOLO model."""
pass
@ -912,9 +922,11 @@ class BaseTrainer:
corrupted = broadcast_list[0]
if not corrupted:
return False
if epoch == self.start_epoch or not self.last.exists():
if epoch == self.start_epoch:
LOGGER.warning(f"{reason} detected but can not recover from last.pt...")
return False # Cannot recover on first epoch, let training continue
if not self.last.exists():
raise RuntimeError(f"{reason} detected but no valid last.pt is available for recovery")
self.nan_recovery_attempts += 1
if self.nan_recovery_attempts > 3:
raise RuntimeError(f"Training failed: NaN persisted for {self.nan_recovery_attempts} epochs")
@ -946,7 +958,7 @@ class BaseTrainer:
)
self.epochs += ckpt["epoch"] # finetune additional epochs
self._load_checkpoint_state(ckpt)
if unwrap_model(self.model).end2end:
if getattr(unwrap_model(self.model), "end2end", False):
# initialize loss and resume o2o and o2m args
unwrap_model(self.model).criterion = unwrap_model(self.model).init_criterion()
unwrap_model(self.model).criterion.updates = start_epoch - 1

View file

@ -17,11 +17,14 @@ Examples:
from __future__ import annotations
import gc
import json
import random
import shutil
import subprocess
import time
from collections import Counter
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
@ -37,13 +40,13 @@ class Tuner:
"""A class for hyperparameter tuning of YOLO models.
The class evolves YOLO model hyperparameters over a given number of iterations by mutating them according to the
search space and retraining the model to evaluate their performance. Supports both local CSV storage and distributed
MongoDB Atlas coordination for multi-machine hyperparameter optimization.
search space and retraining the model to evaluate their performance. Supports both local NDJSON storage and
distributed MongoDB Atlas coordination for multi-machine hyperparameter optimization.
Attributes:
space (dict[str, tuple]): Hyperparameter search space containing bounds and scaling factors for mutation.
tune_dir (Path): Directory where evolution logs and results will be saved.
tune_csv (Path): Path to the CSV file where evolution logs are saved.
tune_file (Path): Path to the NDJSON file where evolution logs are saved.
args (SimpleNamespace): Configuration arguments for the tuning process.
callbacks (dict): Callback functions to be executed during tuning.
prefix (str): Prefix string for logging messages.
@ -98,6 +101,7 @@ class Tuner:
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
"box": (1.0, 20.0), # box loss gain
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
"cls_pw": (0.0, 1.0), # cls power weight
"dfl": (0.4, 12.0), # dfl loss gain
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
@ -124,7 +128,7 @@ class Tuner:
self.args.exist_ok = self.args.resume # resume w/ same tune_dir
self.tune_dir = get_save_dir(self.args, name=self.args.name or "tune")
self.args.name, self.args.exist_ok, self.args.resume = (None, False, False) # reset to not affect training
self.tune_csv = self.tune_dir / "tune_results.csv"
self.tune_file = self.tune_dir / "tune_results.ndjson"
self.callbacks = _callbacks or callbacks.get_default_callbacks()
self.prefix = colorstr("Tuner: ")
callbacks.add_integration_callbacks(self)
@ -192,7 +196,7 @@ class Tuner:
Notes:
- Creates a fitness index for fast queries of top results
- Falls back to CSV-only mode if connection fails
- Falls back to local NDJSON mode if connection fails
- Uses connection pooling and retry logic for production reliability
"""
self.mongodb = self._connect(mongodb_uri)
@ -214,13 +218,45 @@ class Tuner:
except Exception:
return []
def _save_to_mongodb(self, fitness: float, hyperparameters: dict[str, float], metrics: dict, iteration: int):
@staticmethod
def _json_default(x):
"""Convert tensor-like values for JSON serialization."""
return x.item() if hasattr(x, "item") else str(x)
def _result_record(
self,
iteration: int,
fitness: float,
hyperparameters: dict[str, float],
datasets: dict[str, dict],
save_dirs: dict[str, str] | None = None,
) -> dict:
"""Build one local tuning result record."""
result = {
"iteration": iteration,
"fitness": round(fitness, 5),
"hyperparameters": hyperparameters,
"datasets": datasets,
}
if save_dirs:
result["save_dirs"] = save_dirs
return result
def _save_to_mongodb(
self,
fitness: float,
hyperparameters: dict[str, float],
metrics: dict,
datasets: dict[str, dict],
iteration: int,
):
"""Save results to MongoDB with proper type conversion.
Args:
fitness (float): Fitness score achieved with these hyperparameters.
hyperparameters (dict[str, float]): Dictionary of hyperparameter values.
metrics (dict): Complete training metrics dictionary (mAP, precision, recall, losses, etc.).
datasets (dict[str, dict]): Per-dataset metrics for the iteration.
iteration (int): Current iteration number.
"""
try:
@ -229,6 +265,7 @@ class Tuner:
"fitness": fitness,
"hyperparameters": {k: (v.item() if hasattr(v, "item") else v) for k, v in hyperparameters.items()},
"metrics": metrics,
"datasets": datasets,
"timestamp": datetime.now(),
"iteration": iteration,
}
@ -236,30 +273,85 @@ class Tuner:
except Exception as e:
LOGGER.warning(f"{self.prefix}MongoDB save failed: {e}")
def _sync_mongodb_to_csv(self):
"""Sync MongoDB results to CSV for plotting compatibility.
def _sync_mongodb_to_file(self):
"""Sync MongoDB results to the local NDJSON tuning log.
Downloads all results from MongoDB and writes them to the local CSV file in chronological order. This enables
the existing plotting functions to work seamlessly with distributed MongoDB data.
Downloads all results from MongoDB and writes them to the local NDJSON file in chronological order. This keeps
resume, mutation, and plotting on the same local source of truth when using distributed tuning.
"""
try:
# Get all results from MongoDB
all_results = list(self.collection.find().sort("iteration", 1))
if not all_results:
return
# Write to CSV
headers = ",".join(["fitness", *list(self.space.keys())]) + "\n"
with open(self.tune_csv, "w", encoding="utf-8") as f:
f.write(headers)
with open(self.tune_file, "w", encoding="utf-8") as f:
for result in all_results:
fitness = result["fitness"] or 0.0
hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
log_row = [round(fitness, 5), *hyp_values]
f.write(",".join(map(str, log_row)) + "\n")
f.write(
json.dumps(
self._result_record(
result["iteration"],
result["fitness"] or 0.0,
result.get("hyperparameters", {}),
result.get("datasets", {}),
result.get("save_dirs"),
),
default=self._json_default,
)
+ "\n"
)
except Exception as e:
LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}")
LOGGER.warning(f"{self.prefix}MongoDB to NDJSON sync failed: {e}")
def _load_local_results(self) -> list[dict]:
"""Load local tuning results from the NDJSON log."""
if not self.tune_file.exists():
return []
with open(self.tune_file, encoding="utf-8") as f:
return [json.loads(line) for line in f if line.strip()]
def _local_results_to_array(self, results: list[dict], n: int | None = None) -> np.ndarray | None:
"""Convert local NDJSON records to a fitness-plus-hyperparameters numpy array."""
if not results:
return None
x = np.array(
[
[r.get("fitness", 0.0)]
+ [r.get("hyperparameters", {}).get(k, getattr(self.args, k)) for k in self.space]
for r in results
],
dtype=float,
)
if n is None:
return x
order = np.argsort(-x[:, 0])
return x[order][:n]
def _save_local_result(self, result: dict):
"""Append one tuning result to the local NDJSON log."""
with open(self.tune_file, "a", encoding="utf-8") as f:
f.write(json.dumps(result, default=self._json_default) + "\n")
@staticmethod
def _best_metrics(result: dict) -> dict | None:
"""Summarize best-result metrics for logging."""
datasets = result.get("datasets", {})
if len(datasets) == 1:
return next(iter(datasets.values()))
if len(datasets) > 1:
return {k: round(v.get("fitness") or 0.0, 5) for k, v in datasets.items()}
return None
@staticmethod
def _dataset_names(data: list) -> list[str]:
"""Create stable unique dataset names for logging and per-run directories."""
stems = [Path(str(d)).stem for d in data]
totals, seen = Counter(stems), Counter()
names = []
for stem in stems:
seen[stem] += 1
names.append(f"{stem}-{seen[stem]}" if totals[stem] > 1 else stem)
return names
@staticmethod
def _crossover(x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
@ -308,13 +400,9 @@ class Tuner:
elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere
x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]])
# Fall back to CSV if MongoDB unavailable or empty
if x is None and self.tune_csv.exists():
csv_data = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
if len(csv_data) > 0:
fitness = csv_data[:, 0] # first column
order = np.argsort(-fitness)
x = csv_data[order][:n] # top-n sorted by fitness DESC
# Fall back to local NDJSON if MongoDB unavailable or empty
if x is None:
x = self._local_results_to_array(self._load_local_results(), n=n)
# Mutate if we have data, otherwise use defaults
if x is not None:
@ -351,10 +439,10 @@ class Tuner:
"""Execute the hyperparameter evolution process when the Tuner instance is called.
This method iterates through the specified number of iterations, performing the following steps:
1. Sync MongoDB results to CSV (if using distributed mode)
1. Sync MongoDB results to local NDJSON (if using distributed mode)
2. Mutate hyperparameters using the best previous results or defaults
3. Train a YOLO model with the mutated hyperparameters
4. Log fitness scores and hyperparameters to MongoDB and/or CSV
4. Log fitness scores and hyperparameters to MongoDB and/or NDJSON
5. Track the best performing configuration across all iterations
Args:
@ -362,17 +450,17 @@ class Tuner:
cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning.
"""
t0 = time.time()
best_save_dir, best_metrics = None, None
self.tune_dir.mkdir(parents=True, exist_ok=True)
(self.tune_dir / "weights").mkdir(parents=True, exist_ok=True)
best_save_dirs = {}
# Sync MongoDB to CSV at startup for proper resume logic
# Sync MongoDB to local NDJSON at startup for proper resume logic
if self.mongodb:
self._sync_mongodb_to_csv()
self._sync_mongodb_to_file()
start = 0
if self.tune_csv.exists():
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
start = x.shape[0]
if self.tune_file.exists():
start = len(self._load_local_results())
LOGGER.info(f"{self.prefix}Resuming tuning run {self.tune_dir} from iteration {start + 1}...")
for i in range(start, iterations):
# Linearly decay sigma from 0.2 → 0.1 over first 300 iterations
@ -383,69 +471,100 @@ class Tuner:
mutated_hyp = self._mutate(sigma=sigma_i)
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
metrics = {}
train_args = {**vars(self.args), **mutated_hyp}
save_dir = get_save_dir(get_cfg(train_args))
train_args["save_dir"] = str(save_dir) # pass save_dir to subprocess to ensure same path is used
weights_dir = save_dir / "weights"
try:
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
launch = [__import__("sys").executable, "-m", "ultralytics.cfg.__init__"] # workaround yolo not found
cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())]
return_code = subprocess.run(cmd, check=True).returncode
ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
metrics = torch_load(ckpt_file)["train_metrics"]
assert return_code == 0, "training failed"
data = train_args.pop("data")
if not isinstance(data, (list, tuple)):
data = [data]
dataset_names = self._dataset_names(data)
save_dir = (
[get_save_dir(get_cfg(train_args))]
if len(data) == 1
else [get_save_dir(get_cfg(train_args), name=name) for name in dataset_names]
)
weights_dir = [s / "weights" for s in save_dir]
metrics = {}
all_fitness = []
dataset_metrics = {}
for j, (d, dataset) in enumerate(zip(data, dataset_names)):
metrics_i = {}
try:
train_args["data"] = d
train_args["save_dir"] = str(save_dir[j]) # pass save_dir to subprocess to ensure same path is used
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
launch = [
__import__("sys").executable,
"-m",
"ultralytics.cfg.__init__",
] # workaround yolo not found
cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())]
return_code = subprocess.run(cmd, check=True).returncode
ckpt_file = weights_dir[j] / ("best.pt" if (weights_dir[j] / "best.pt").exists() else "last.pt")
metrics_i = torch_load(ckpt_file)["train_metrics"]
metrics = metrics_i
assert return_code == 0, "training failed"
# Cleanup
time.sleep(1)
gc.collect()
torch.cuda.empty_cache()
# Cleanup
time.sleep(1)
gc.collect()
torch.cuda.empty_cache()
except Exception as e:
LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}")
except Exception as e:
LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}")
# Save results - MongoDB takes precedence
fitness = metrics.get("fitness") or 0.0
# Save results - MongoDB takes precedence
dataset_metrics[dataset] = metrics_i or {"fitness": 0.0}
all_fitness.append(dataset_metrics[dataset].get("fitness") or 0.0)
fitness = sum(all_fitness) / len(all_fitness)
result = self._result_record(
i + 1,
fitness,
mutated_hyp,
dataset_metrics,
{dataset: str(s) for dataset, s in zip(dataset_names, save_dir)},
)
stop_after_iteration = False
if self.mongodb:
self._save_to_mongodb(fitness, mutated_hyp, metrics, i + 1)
self._sync_mongodb_to_csv()
self._save_to_mongodb(fitness, mutated_hyp, metrics, dataset_metrics, i + 1)
self._sync_mongodb_to_file()
total_mongo_iterations = self.collection.count_documents({})
if total_mongo_iterations >= iterations:
LOGGER.info(
f"{self.prefix}Target iterations ({iterations}) reached in MongoDB ({total_mongo_iterations}). Stopping."
)
break
stop_after_iteration = True
else:
# Save to CSV only if no MongoDB
log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
headers = "" if self.tune_csv.exists() else (",".join(["fitness", *list(self.space.keys())]) + "\n")
with open(self.tune_csv, "a", encoding="utf-8") as f:
f.write(headers + ",".join(map(str, log_row)) + "\n")
self._save_local_result(result)
# Get best results
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
results = self._load_local_results()
x = self._local_results_to_array(results)
fitness = x[:, 0] # first column
best_idx = fitness.argmax()
best_result = results[best_idx]
current_best_save_dirs = best_result.get("save_dirs", {})
best_is_current = best_idx == i
if best_is_current:
best_save_dir = str(save_dir)
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
for ckpt in weights_dir.glob("*.pt"):
shutil.copy2(ckpt, self.tune_dir / "weights")
elif cleanup and best_save_dir:
shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
if cleanup:
for s in best_save_dirs.values():
if s not in current_best_save_dirs.values():
shutil.rmtree(s, ignore_errors=True)
if len(data) == 1:
for ckpt in weights_dir[0].glob("*.pt"):
shutil.copy2(ckpt, self.tune_dir / "weights")
best_save_dirs = current_best_save_dirs
elif cleanup:
for s in save_dir:
shutil.rmtree(s, ignore_errors=True) # remove iteration dirs to reduce storage space
best_save_dirs = current_best_save_dirs
# Plot tune results
plot_tune_results(str(self.tune_csv))
plot_tune_results(str(self.tune_file))
# Save and print tune results
header = (
f"{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n"
f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n"
f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n"
f"{self.prefix}Best fitness metrics are {best_metrics}\n"
f"{self.prefix}Best fitness model is {best_save_dir}"
f"{self.prefix}Best fitness metrics are {self._best_metrics(best_result)}\n"
f"{self.prefix}Best fitness model is "
f"{self.tune_dir / 'weights' if len(best_result.get('datasets', {})) == 1 else 'not saved for multi-dataset tuning'}"
)
LOGGER.info("\n" + header)
data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
@ -455,3 +574,8 @@ class Tuner:
header=remove_colorstr(header.replace(self.prefix, "# ")) + "\n",
)
YAML.print(self.tune_dir / "best_hyperparameters.yaml")
if stop_after_iteration:
LOGGER.info(
f"{self.prefix}Target iterations ({iterations}) reached in MongoDB ({total_mongo_iterations}). Stopping."
)
break

View file

@ -2273,8 +2273,9 @@ class SAM3SemanticPredictor(SAM3Predictor):
"""Run inference on the extracted features with optional bounding boxes and labels."""
# NOTE: priority: bboxes > text > pre-set classes
nc = 1 if bboxes is not None else len(text) if text is not None else len(self.model.names)
geometric_prompt = self._get_dummy_prompt(nc)
geometric_prompt = None
if bboxes is not None:
geometric_prompt = self._get_dummy_prompt(nc)
for i in range(len(bboxes)):
geometric_prompt.append_boxes(bboxes[[i]], labels[[i]])
if text is None:

View file

@ -330,7 +330,7 @@ class TransformerDecoder(nn.Module):
# cache miss, will create compilation issue
# In case we're not compiling, we'll still rely on the dict-based cache
if feat_size not in self.coord_cache:
self.coord_cache[feat_size] = self._get_coords(H, W, reference_boxes.device)
self.coord_cache[feat_size] = self._get_coords(H, W, reference_boxes.device, reference_boxes.dtype)
coords_h, coords_w = self.coord_cache[feat_size]
assert coords_h.shape == (H,)
@ -522,7 +522,7 @@ class TransformerDecoder(nn.Module):
# clamp to mitigate numerical issues
if self.clamp_presence_logits:
intermediate_layer_presence_logits.clamp(
intermediate_layer_presence_logits.clamp_(
min=-self.clamp_presence_logit_max_val,
max=self.clamp_presence_logit_max_val,
)

View file

@ -290,15 +290,18 @@ class SAM3SemanticModel(torch.nn.Module):
self, backbone_out, batch=len(text_ids)
)
backbone_out.update({k: v for k, v in self.text_embeddings.items()})
with torch.profiler.record_function("SAM3Image._encode_prompt"):
prompt, prompt_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt)
# index text features (note that regardless of early or late fusion, the batch size of
# `txt_feats` is always the number of *prompts* in the encoder)
txt_feats = backbone_out["language_features"][:, text_ids]
txt_masks = backbone_out["language_mask"][text_ids]
# encode text
prompt = torch.cat([txt_feats, prompt], dim=0)
prompt_mask = torch.cat([txt_masks, prompt_mask], dim=1)
if geometric_prompt is not None:
with torch.profiler.record_function("SAM3Image._encode_prompt"):
geo_prompt, geo_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt)
prompt = torch.cat([txt_feats, geo_prompt], dim=0)
prompt_mask = torch.cat([txt_masks, geo_mask], dim=1)
else:
prompt = txt_feats
prompt_mask = txt_masks
# Run the encoder
with torch.profiler.record_function("SAM3Image._run_encoder"):

Some files were not shown because too many files have changed in this diff Show more