diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0de04608f..c578eca8ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,19 +74,19 @@ jobs: uv pip list - name: Benchmark DetectionModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.218 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.216 - name: Benchmark ClassificationModel shell: bash run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.249 - name: Benchmark YOLOWorld DetectionModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.337 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.335 - name: Benchmark SegmentationModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.230 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.229 - name: Benchmark PoseModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.194 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.185 - name: Benchmark OBBModel shell: bash run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-obb.pt' imgsz=160 verbose=0.372 @@ -345,15 +345,15 @@ jobs: yolo checks uv pip list - name: Benchmark DetectionModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n.pt' imgsz=160 verbose=0.218 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n.pt' imgsz=160 verbose=0.216 - name: Benchmark ClassificationModel run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-cls.pt' imgsz=160 verbose=0.249 - name: Benchmark YOLOWorld DetectionModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.337 + run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.335 - name: Benchmark SegmentationModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-seg.pt' imgsz=160 verbose=0.230 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-seg.pt' imgsz=160 verbose=0.229 - name: Benchmark PoseModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-pose.pt' imgsz=160 verbose=0.194 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-pose.pt' imgsz=160 verbose=0.185 - name: Benchmark OBBModel run: python -m ultralytics.cfg.__init__ benchmark model='yolo26n-obb.pt' imgsz=160 verbose=0.372 - name: Benchmark Summary @@ -435,7 +435,7 @@ jobs: channel-priority: true activate-environment: anaconda-client-env - name: Install Ultralytics package from conda-forge - run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino!=2026.0.0" + run: conda install -c pytorch -c conda-forge pytorch-cpu torchvision ultralytics "openvino<2026" - name: Install pip packages run: uv pip install pytest - name: Check environment diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 231ad84087..f4116d20da 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -227,7 +227,7 @@ jobs: - name: Run Benchmarks if: (github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true') && (matrix.platforms == 'linux/amd64' || matrix.dockerfile == 'Dockerfile-arm64') && matrix.dockerfile != 'Dockerfile' && matrix.dockerfile != 'Dockerfile-conda' - run: docker run ultralytics/ultralytics:${{ (matrix.tags == 'latest-python' && 'latest-python-export') || (matrix.tags == 'latest' && 'latest-export') || matrix.tags }} yolo benchmark model=yolo26n.pt imgsz=160 verbose=0.218 + run: docker run ultralytics/ultralytics:${{ (matrix.tags == 'latest-python' && 'latest-python-export') || (matrix.tags == 'latest' && 'latest-export') || matrix.tags }} yolo benchmark model=yolo26n.pt imgsz=160 verbose=0.216 - name: Push All Images if: github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dacd46261d..4939441c29 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,9 +29,9 @@ First-time contributors are expected to submit small, well-scoped pull requests. #### Established Contributors -Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values/). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards. +Pull requests from established contributors generally receive higher review priority. Actions and results are fundamental to the [Ultralytics Mission & Values](https://handbook.ultralytics.com/mission-vision-values). There is no specific threshold to becoming an 'established contributor' as it's impossible to fit all individuals to the same standard. The Ultralytics Team notices those who make consistent, high-quality contributions that follow the Ultralytics standards. -Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development/) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics. +Following our [contributing guidelines](./CONTRIBUTING.md) and [our Development Workflow](https://handbook.ultralytics.com/workflows/development) is the best way to improve your chances for your work to be reviewed, accepted, and/or recognized; this is not a guarantee. In addition, contributors with a strong track record of meaningful contributions to notable open-source projects may be treated as established contributors, even if they are technically first-time contributors to Ultralytics. #### Feature PRs @@ -156,11 +156,11 @@ We highly value bug reports as they help us improve the quality and reliability Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation. -We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community. +We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community. ## ๐ŸŒ Open-Sourcing Your YOLO Project Under AGPL-3.0 -Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. +Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. ### Why AGPL-3.0 Compliance Matters @@ -179,7 +179,7 @@ Complying means making the **complete corresponding source code** of your projec - **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO. 2. **License Your Project:** - - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3). + - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0). - Add a notice at the top of each source file indicating the license. 3. **Publish Your Source Code:** diff --git a/README.md b/README.md index f67fbdc864..7d6ddf4b1c 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ We look forward to your contributions to help make the Ultralytics ecosystem eve Ultralytics offers two licensing options to suit different needs: -- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. +- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is perfect for students, researchers, and enthusiasts. It encourages open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. - **Ultralytics Enterprise License**: Designed for commercial use, this license allows for the seamless integration of Ultralytics software and AI models into commercial products and services, bypassing the open-source requirements of AGPL-3.0. If your use case involves commercial deployment, please contact us via [Ultralytics Licensing](https://www.ultralytics.com/license). ## ๐Ÿ“ž Contact diff --git a/README.zh-CN.md b/README.zh-CN.md index a064cc07b5..8179b41271 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -252,7 +252,7 @@ Ultralytics ๆ”ฏๆŒๅนฟๆณ›็š„ YOLO ๆจกๅž‹๏ผŒไปŽๆ—ฉๆœŸ็š„็‰ˆๆœฌๅฆ‚ [YOLOv3](https:/ Ultralytics ๆไพ›ไธค็ง่ฎธๅฏ้€‰้กนไปฅๆปก่ถณไธๅŒ้œ€ๆฑ‚๏ผš -- **AGPL-3.0 ่ฎธๅฏ่ฏ**๏ผš่ฟ™็ง็ป [OSI ๆ‰นๅ‡†](https://opensource.org/license/agpl-v3)็š„ๅผ€ๆบ่ฎธๅฏ่ฏ้žๅธธ้€‚ๅˆๅญฆ็”Ÿใ€็ ”็ฉถไบบๅ‘˜ๅ’Œ็ˆฑๅฅฝ่€…ใ€‚ๅฎƒ้ผ“ๅŠฑๅผ€ๆ”พๅไฝœๅ’Œ็Ÿฅ่ฏ†ๅ…ฑไบซใ€‚ๆœ‰ๅ…ณๅฎŒๆ•ด่ฏฆ็ป†ไฟกๆฏ๏ผŒ่ฏทๅ‚้˜… [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) ๆ–‡ไปถใ€‚ +- **AGPL-3.0 ่ฎธๅฏ่ฏ**๏ผš่ฟ™็ง็ป [OSI ๆ‰นๅ‡†](https://opensource.org/license/agpl-3.0)็š„ๅผ€ๆบ่ฎธๅฏ่ฏ้žๅธธ้€‚ๅˆๅญฆ็”Ÿใ€็ ”็ฉถไบบๅ‘˜ๅ’Œ็ˆฑๅฅฝ่€…ใ€‚ๅฎƒ้ผ“ๅŠฑๅผ€ๆ”พๅไฝœๅ’Œ็Ÿฅ่ฏ†ๅ…ฑไบซใ€‚ๆœ‰ๅ…ณๅฎŒๆ•ด่ฏฆ็ป†ไฟกๆฏ๏ผŒ่ฏทๅ‚้˜… [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) ๆ–‡ไปถใ€‚ - **Ultralytics ไผไธš่ฎธๅฏ่ฏ**๏ผšไธ“ไธบๅ•†ไธš็”จ้€”่ฎพ่ฎก๏ผŒๆญค่ฎธๅฏ่ฏๅ…่ฎธๅฐ† Ultralytics ่ฝฏไปถๅ’Œ AI ๆจกๅž‹ๆ— ็ผ้›†ๆˆๅˆฐๅ•†ไธšไบงๅ“ๅ’ŒๆœๅŠกไธญ๏ผŒ็ป•่ฟ‡ AGPL-3.0 ็š„ๅผ€ๆบ่ฆๆฑ‚ใ€‚ๅฆ‚ๆžœๆ‚จ็š„ไฝฟ็”จๅœบๆ™ฏๆถ‰ๅŠๅ•†ไธš้ƒจ็ฝฒ๏ผŒ่ฏท้€š่ฟ‡ [Ultralytics ๆŽˆๆƒ่ฎธๅฏ](https://www.ultralytics.com/license)ไธŽๆˆ‘ไปฌ่”็ณปใ€‚ ## ๐Ÿ“ž ่”็ณปๆ–นๅผ diff --git a/docker/Dockerfile-jetson-jetpack5 b/docker/Dockerfile-jetson-jetpack5 index 1a0013d8d2..3ac12963fa 100644 --- a/docker/Dockerfile-jetson-jetpack5 +++ b/docker/Dockerfile-jetson-jetpack5 @@ -41,8 +41,8 @@ RUN sed -i 's/^\( *"tensorflowjs\)>=.*\(".*\)/\1>=3.9.0\2/' pyproject.toml && \ # Pip install onnxruntime-gpu, torch, torchvision and ultralytics, then remove build files RUN uv pip install --system \ https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.18.0-cp38-cp38-linux_aarch64.whl \ - https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.2.0-cp38-cp38-linux_aarch64.whl \ - https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.17.2+c1d70fe-cp38-cp38-linux_aarch64.whl && \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl && \ # Need lower version of 'numpy' for TensorRT export uv pip install --system numpy==1.23.5 && \ uv pip install --system -e ".[export]" && \ diff --git a/docs/en/datasets/detect/tt100k.md b/docs/en/datasets/detect/tt100k.md index 4fbeaf9860..f672af7c69 100644 --- a/docs/en/datasets/detect/tt100k.md +++ b/docs/en/datasets/detect/tt100k.md @@ -8,7 +8,7 @@ keywords: TT100K, Tsinghua-Tencent 100K, traffic sign detection, YOLO26, dataset The [Tsinghua-Tencent 100K (TT100K)](https://cg.cs.tsinghua.edu.cn/traffic-sign/) is a large-scale traffic sign benchmark dataset created from 100,000 Tencent Street View panoramas. This dataset is specifically designed for traffic sign detection and classification in real-world conditions, providing researchers and developers with a comprehensive resource for building robust traffic sign recognition systems. -The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 different categories**. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios. +The dataset contains **100,000 images** with over **30,000 traffic sign instances** across **221 annotation categories**. The original paper applies a 100-instance threshold per class for supervised training, yielding a commonly used **45-class** subset; however, the provided Ultralytics dataset configuration retains all **221 annotated categories**, many of which are very sparse. These images capture large variations in illuminance, weather conditions, viewing angles, and distances, making it ideal for training models that need to perform reliably in diverse real-world scenarios. This dataset is particularly valuable for: diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md index ca47635598..cc8796e678 100644 --- a/docs/en/datasets/explorer/api.md +++ b/docs/en/datasets/explorer/api.md @@ -8,7 +8,7 @@ keywords: Ultralytics, Explorer API, dataset exploration, SQL queries, similarit !!! warning "Community Note โš ๏ธ" - As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). + As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). ## Introduction @@ -331,13 +331,6 @@ Start creating your own CV dataset exploration reports using the Explorer API. F Try our [GUI Demo](dashboard.md) based on Explorer API -## Coming Soon - -- [ ] Merge specific labels from datasets. Example - Import all `person` labels from COCO and `car` labels from Cityscapes -- [ ] Remove images that have a higher similarity index than the given threshold -- [ ] Automatically persist new datasets after merging/removing entries -- [ ] Advanced Dataset Visualizations - ## FAQ ### What is the Ultralytics Explorer API used for? diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md index 800333171a..bb393452c8 100644 --- a/docs/en/datasets/explorer/dashboard.md +++ b/docs/en/datasets/explorer/dashboard.md @@ -8,7 +8,7 @@ keywords: Ultralytics Explorer GUI, semantic search, vector similarity, SQL quer !!! warning "Community Note โš ๏ธ" - As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). + As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). Explorer GUI is built on the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by LLMs. diff --git a/docs/en/datasets/explorer/explorer.md b/docs/en/datasets/explorer/explorer.md index 567d9ebd10..5e6fc3a516 100644 --- a/docs/en/datasets/explorer/explorer.md +++ b/docs/en/datasets/explorer/explorer.md @@ -45,7 +45,7 @@ Install `ultralytics` and run `yolo explorer` in your terminal to run custom que !!! warning "Community Note โš ๏ธ" - As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). + As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). ## Setup diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md index 14b39166ef..7d119ba806 100644 --- a/docs/en/datasets/explorer/index.md +++ b/docs/en/datasets/explorer/index.md @@ -8,7 +8,7 @@ keywords: Ultralytics Explorer, CV datasets, semantic search, SQL queries, vecto !!! warning "Community Note โš ๏ธ" - As of **`ultralytics>=8.3.10`**, Ultralytics Explorer support is deprecated. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/). + As of **`ultralytics>=8.3.12`**, Ultralytics Explorer has been removed. To use Explorer, install `pip install ultralytics==8.3.11`. Similar (and expanded) dataset exploration features are available in [Ultralytics Platform](https://platform.ultralytics.com/).

Ultralytics Explorer dataset visualization GUI @@ -39,7 +39,7 @@ pip install ultralytics[explorer] !!! tip - Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory. + Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://www.lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory. ## Explorer API @@ -68,7 +68,7 @@ yolo explorer ### What is Ultralytics Explorer and how can it help with CV datasets? -Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process. +Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like [LanceDB](https://www.lancedb.com/), Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process. Learn more about the [Explorer API](api.md). @@ -80,7 +80,7 @@ To manually install the optional dependencies needed for Ultralytics Explorer, y pip install ultralytics[explorer] ``` -These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md). +These dependencies are essential for the full functionality of semantic search and SQL querying. By including libraries powered by [LanceDB](https://www.lancedb.com/), the installation ensures that the database operations remain efficient and scalable, even for large datasets like [COCO](../detect/coco.md). ### How can I use the GUI version of Ultralytics Explorer? diff --git a/docs/en/guides/deepstream-nvidia-jetson.md b/docs/en/guides/deepstream-nvidia-jetson.md index 32bd886728..f5448de160 100644 --- a/docs/en/guides/deepstream-nvidia-jetson.md +++ b/docs/en/guides/deepstream-nvidia-jetson.md @@ -36,9 +36,10 @@ Before you start to follow this guide: - Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO26](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLO26 - Install [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) according to the JetPack version - - For JetPack 4.6.4, install [DeepStream 6.0.1](https://docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html) - - For JetPack 5.1.3, install [DeepStream 6.3](https://docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html) - - For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/7.0/dev-guide/text/DS_Overview.html) + - For JetPack 4.6.4, install [DeepStream 6.0.1](https://archive.docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html) + - For JetPack 5.1.3, install [DeepStream 6.3](https://archive.docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html) + - For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/7.1/text/DS_Overview.html) + - For JetPack 7.1, install [DeepStream 9.0](https://docs.nvidia.com/metropolis/deepstream/9.0/text/DS_Overview.html) !!! tip diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md index a3a27cecac..9c49d1b874 100644 --- a/docs/en/guides/heatmaps.md +++ b/docs/en/guides/heatmaps.md @@ -216,4 +216,4 @@ cv2.destroyAllWindows() ### Why should businesses choose Ultralytics YOLO26 for heatmap generation in data analysis? -Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans). +Ultralytics YOLO26 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO26's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/pricing). diff --git a/docs/en/guides/hyperparameter-tuning.md b/docs/en/guides/hyperparameter-tuning.md index 4a01957700..a1d66861d7 100644 --- a/docs/en/guides/hyperparameter-tuning.md +++ b/docs/en/guides/hyperparameter-tuning.md @@ -70,7 +70,7 @@ Use metrics like AP50, F1-score, or custom metrics to evaluate the model's perfo ### Log Results -It's crucial to log both the performance metrics and the corresponding hyperparameters for future reference. Ultralytics YOLO automatically saves these results in CSV format. +It's crucial to log both the performance metrics and the corresponding hyperparameters for future reference. Ultralytics YOLO automatically saves these results in NDJSON format. ### Repeat @@ -90,6 +90,7 @@ The following table lists the default search space parameters for hyperparameter | `warmup_momentum` | `float` | `(0.0, 0.95)` | Initial momentum during warmup phase. Gradually increases to the final momentum value | | `box` | `float` | `(1.0, 20.0)` | Bounding box loss weight in the total loss function. Balances box regression vs classification | | `cls` | `float` | `(0.1, 4.0)` | Classification loss weight in the total loss function. Higher values emphasize correct class prediction | +| `cls_pw` | `float` | `(0.0, 1.0)` | Class weighting power for handling class imbalance. Higher values increase weight on rare classes | | `dfl` | `float` | `(0.4, 12.0)` | DFL (Distribution Focal Loss) weight in the total loss function. Higher values emphasize precise bounding box localization | | `hsv_h` | `float` | `(0.0, 0.1)` | Random hue augmentation range in HSV color space. Helps model generalize across color variations | | `hsv_s` | `float` | `(0.0, 0.9)` | Random saturation augmentation range in HSV space. Simulates different lighting conditions | @@ -186,8 +187,8 @@ runs/ โ”œโ”€โ”€ ... โ””โ”€โ”€ tune/ โ”œโ”€โ”€ best_hyperparameters.yaml - โ”œโ”€โ”€ best_fitness.png - โ”œโ”€โ”€ tune_results.csv + โ”œโ”€โ”€ tune_fitness.png + โ”œโ”€โ”€ tune_results.ndjson โ”œโ”€โ”€ tune_scatter_plots.png โ””โ”€โ”€ weights/ โ”œโ”€โ”€ last.pt @@ -236,7 +237,7 @@ This YAML file contains the best-performing hyperparameters found during the tun copy_paste: 0.0 ``` -#### best_fitness.png +#### tune_fitness.png This is a plot displaying fitness (typically a performance metric like AP50) against the number of iterations. It helps you visualize how well the genetic algorithm performed over time. @@ -247,23 +248,59 @@ This is a plot displaying fitness (typically a performance metric like AP50) aga Hyperparameter Tuning Fitness vs Iteration

-#### tune_results.csv +#### tune_results.ndjson -A CSV file containing detailed results of each iteration during the tuning. Each row in the file represents one iteration, and it includes metrics like fitness score, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), as well as the hyperparameters used. +An NDJSON file containing detailed results of each tuning iteration. Each line is one JSON object with the aggregate fitness, tuned hyperparameters, and per-dataset metrics. Single-dataset and multi-dataset tuning use the same file format. -- **Format**: CSV +- **Format**: NDJSON - **Usage**: Per-iteration results tracking. - **Example**: - ```csv - fitness,lr0,lrf,momentum,weight_decay,warmup_epochs,warmup_momentum,box,cls,dfl,hsv_h,hsv_s,hsv_v,degrees,translate,scale,shear,perspective,flipud,fliplr,mosaic,mixup,copy_paste - 0.05021,0.01,0.01,0.937,0.0005,3.0,0.8,7.5,0.5,1.5,0.015,0.7,0.4,0.0,0.1,0.5,0.0,0.0,0.0,0.5,1.0,0.0,0.0 - 0.07217,0.01003,0.00967,0.93897,0.00049,2.79757,0.81075,7.5,0.50746,1.44826,0.01503,0.72948,0.40658,0.0,0.0987,0.4922,0.0,0.0,0.0,0.49729,1.0,0.0,0.0 - 0.06584,0.01003,0.00855,0.91009,0.00073,3.42176,0.95,8.64301,0.54594,1.72261,0.01503,0.59179,0.40658,0.0,0.0987,0.46955,0.0,0.0,0.0,0.49729,0.80187,0.0,0.0 - ``` + +A pretty-printed example is shown below for readability. In the actual `.ndjson` file, each object is stored on a single line. + +```json +{ + "iteration": 1, + "fitness": 0.23345, + "hyperparameters": { + "lr0": 0.01, + "lrf": 0.01, + "momentum": 0.937, + "weight_decay": 0.0005 + }, + "datasets": { + "coco8": { + "fitness": 0.28992 + }, + "coco8-grayscale": { + "fitness": 0.17697 + } + } +} + +{ + "iteration": 2, + "fitness": 0.23661, + "hyperparameters": { + "lr0": 0.0062, + "lrf": 0.01, + "momentum": 0.90058, + "weight_decay": 0.0 + }, + "datasets": { + "coco8": { + "fitness": 0.29561 + }, + "coco8-grayscale": { + "fitness": 0.1776 + } + } +} +``` #### tune_scatter_plots.png -This file contains scatter plots generated from `tune_results.csv`, helping you visualize relationships between different hyperparameters and performance metrics. Note that hyperparameters initialized to 0 will not be tuned, such as `degrees` and `shear` below. +This file contains scatter plots generated from `tune_results.ndjson`, helping you visualize relationships between different hyperparameters and performance metrics. Note that hyperparameters initialized to 0 will not be tuned, such as `degrees` and `shear` below. - **Format**: PNG - **Usage**: Exploratory data analysis diff --git a/docs/en/guides/nvidia-dgx-spark.md b/docs/en/guides/nvidia-dgx-spark.md index ead672942e..362e122d43 100644 --- a/docs/en/guides/nvidia-dgx-spark.md +++ b/docs/en/guides/nvidia-dgx-spark.md @@ -143,7 +143,7 @@ pip install torch torchvision --index-url https://download.pytorch.org/whl/cu130 When running PyTorch 2.9.1 on NVIDIA DGX Spark, you may encounter the following `UserWarning` when initializing CUDA (e.g. running `yolo checks`, `yolo predict`, etc.): - ```text + ``` UserWarning: Found GPU0 NVIDIA GB10 which is of cuda capability 12.1. Minimum and Maximum cuda capability supported by this version of PyTorch is (8.0) - (12.0) ``` diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md index e26d2da032..f380069ca6 100644 --- a/docs/en/guides/nvidia-jetson.md +++ b/docs/en/guides/nvidia-jetson.md @@ -270,11 +270,11 @@ The above ultralytics installation will install Torch and Torchvision. However, pip uninstall torch torchvision ``` -2. Install `torch 2.2.0` and `torchvision 0.17.2` according to JP5.1.2 +2. Install `torch 2.1.0` and `torchvision 0.16.2` according to JP5.1.2 ```bash - pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.2.0-cp38-cp38-linux_aarch64.whl - pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.17.2+c1d70fe-cp38-cp38-linux_aarch64.whl + pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl + pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl ``` !!! note @@ -415,14 +415,14 @@ Even though all model exports work on NVIDIA Jetson, we have only included **PyT
Jetson Orin Nano Super Benchmarks -
Benchmarked with Ultralytics 8.3.157
+
Benchmarked with Ultralytics 8.4.33
#### NVIDIA Jetson Orin NX 16GB
Jetson Orin NX 16GB Benchmarks -
Benchmarked with Ultralytics 8.3.157
+
Benchmarked with Ultralytics 8.4.33
### Detailed Comparison Tables @@ -619,92 +619,92 @@ The below table represents the benchmark results for five different models (YOLO !!! tip "Performance" - === "YOLO11n" + === "YOLO26n" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 5.4 | 0.5101 | 13.70 | - | TorchScript | โœ… | 10.5 | 0.5082 | 13.69 | - | ONNX | โœ… | 10.2 | 0.5081 | 14.47 | - | OpenVINO | โœ… | 10.4 | 0.5058 | 56.66 | - | TensorRT (FP32) | โœ… | 12.0 | 0.5081 | 7.44 | - | TensorRT (FP16) | โœ… | 8.2 | 0.5061 | 4.53 | - | TensorRT (INT8) | โœ… | 5.4 | 0.4825 | 3.70 | - | TF SavedModel | โœ… | 25.9 | 0.5077 | 116.23 | - | TF GraphDef | โœ… | 10.3 | 0.5077 | 114.92 | - | TF Lite | โœ… | 10.3 | 0.5077 | 340.75 | - | MNN | โœ… | 10.1 | 0.5059 | 76.26 | - | NCNN | โœ… | 10.2 | 0.5031 | 45.03 | + | PyTorch | โœ… | 5.3 | 0.4790 | 15.60 | + | TorchScript | โœ… | 9.8 | 0.4770 | 12.60 | + | ONNX | โœ… | 9.5 | 0.4760 | 15.76 | + | OpenVINO | โœ… | 9.6 | 0.4820 | 56.23 | + | TensorRT (FP32) | โœ… | 11.3 | 0.4770 | 7.53 | + | TensorRT (FP16) | โœ… | 8.1 | 0.4800 | 4.57 | + | TensorRT (INT8) | โœ… | 5.3 | 0.4490 | 3.80 | + | TF SavedModel | โœ… | 24.6 | 0.4760 | 118.33 | + | TF GraphDef | โœ… | 9.5 | 0.4760 | 116.30 | + | TF Lite | โœ… | 9.9 | 0.4760 | 286.00 | + | MNN | โœ… | 9.4 | 0.4760 | 68.77 | + | NCNN | โœ… | 9.3 | 0.4810 | 47.50 | - === "YOLO11s" + === "YOLO26s" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 18.4 | 0.5790 | 20.90 | - | TorchScript | โœ… | 36.5 | 0.5781 | 21.22 | - | ONNX | โœ… | 36.3 | 0.5781 | 25.07 | - | OpenVINO | โœ… | 36.4 | 0.5810 | 122.98 | - | TensorRT (FP32) | โœ… | 37.9 | 0.5783 | 13.02 | - | TensorRT (FP16) | โœ… | 21.8 | 0.5779 | 6.93 | - | TensorRT (INT8) | โœ… | 12.2 | 0.5735 | 5.08 | - | TF SavedModel | โœ… | 91.0 | 0.5782 | 250.65 | - | TF GraphDef | โœ… | 36.4 | 0.5782 | 252.69 | - | TF Lite | โœ… | 36.3 | 0.5782 | 998.68 | - | MNN | โœ… | 36.2 | 0.5781 | 188.01 | - | NCNN | โœ… | 36.2 | 0.5784 | 101.37 | + | PyTorch | โœ… | 20.0 | 0.5730 | 22.83 | + | TorchScript | โœ… | 36.8 | 0.5670 | 21.83 | + | ONNX | โœ… | 36.5 | 0.5664 | 26.29 | + | OpenVINO | โœ… | 36.7 | 0.5653 | 127.09 | + | TensorRT (FP32) | โœ… | 38.2 | 0.5664 | 13.60 | + | TensorRT (FP16) | โœ… | 21.3 | 0.5649 | 7.17 | + | TensorRT (INT8) | โœ… | 12.7 | 0.5468 | 5.25 | + | TF SavedModel | โœ… | 92.2 | 0.5665 | 263.69 | + | TF GraphDef | โœ… | 36.5 | 0.5665 | 268.21 | + | TF Lite | โœ… | 36.9 | 0.5665 | 949.63 | + | MNN | โœ… | 36.4 | 0.5644 | 184.68 | + | NCNN | โœ… | 36.4 | 0.5697 | 107.48 | - === "YOLO11m" + === "YOLO26m" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 38.8 | 0.6266 | 46.50 | - | TorchScript | โœ… | 77.3 | 0.6307 | 47.95 | - | ONNX | โœ… | 76.9 | 0.6307 | 53.06 | - | OpenVINO | โœ… | 77.1 | 0.6284 | 301.63 | - | TensorRT (FP32) | โœ… | 78.8 | 0.6305 | 27.86 | - | TensorRT (FP16) | โœ… | 41.7 | 0.6309 | 13.50 | - | TensorRT (INT8) | โœ… | 23.2 | 0.6291 | 9.12 | - | TF SavedModel | โœ… | 192.7 | 0.6307 | 622.24 | - | TF GraphDef | โœ… | 77.1 | 0.6307 | 628.74 | - | TF Lite | โœ… | 77.0 | 0.6307 | 2997.93 | - | MNN | โœ… | 76.8 | 0.6299 | 509.96 | - | NCNN | โœ… | 76.8 | 0.6284 | 292.99 | + | PyTorch | โœ… | 43.0 | 0.6220 | 44.43 | + | TorchScript | โœ… | 78.5 | 0.6230 | 44.00 | + | ONNX | โœ… | 78.2 | 0.6225 | 53.44 | + | OpenVINO | โœ… | 78.3 | 0.6186 | 303.26 | + | TensorRT (FP32) | โœ… | 80.0 | 0.6217 | 28.19 | + | TensorRT (FP16) | โœ… | 42.6 | 0.6225 | 13.59 | + | TensorRT (INT8) | โœ… | 23.4 | 0.5817 | 9.30 | + | TF SavedModel | โœ… | 196.3 | 0.6229 | 636.03 | + | TF GraphDef | โœ… | 78.2 | 0.6229 | 659.57 | + | TF Lite | โœ… | 78.5 | 0.6229 | 2905.17 | + | MNN | โœ… | 78.0 | 0.6168 | 500.09 | + | NCNN | โœ… | 78.0 | 0.6224 | 332.39 | - === "YOLO11l" + === "YOLO26l" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 49.0 | 0.6364 | 56.50 | - | TorchScript | โœ… | 97.6 | 0.6409 | 62.51 | - | ONNX | โœ… | 97.0 | 0.6399 | 68.35 | - | OpenVINO | โœ… | 97.3 | 0.6378 | 376.03 | - | TensorRT (FP32) | โœ… | 99.2 | 0.6396 | 35.59 | - | TensorRT (FP16) | โœ… | 52.1 | 0.6361 | 17.48 | - | TensorRT (INT8) | โœ… | 30.9 | 0.6207 | 11.87 | - | TF SavedModel | โœ… | 243.1 | 0.6409 | 807.47 | - | TF GraphDef | โœ… | 97.2 | 0.6409 | 822.88 | - | TF Lite | โœ… | 97.1 | 0.6409 | 3792.23 | - | MNN | โœ… | 96.9 | 0.6372 | 631.16 | - | NCNN | โœ… | 96.9 | 0.6364 | 350.46 | + | PyTorch | โœ… | 51.0 | 0.6230 | 60.97 | + | TorchScript | โœ… | 95.5 | 0.6250 | 56.20 | + | ONNX | โœ… | 95.0 | 0.6247 | 68.12 | + | OpenVINO | โœ… | 95.3 | 0.6238 | 397.84 | + | TensorRT (FP32) | โœ… | 97.1 | 0.6250 | 35.88 | + | TensorRT (FP16) | โœ… | 51.4 | 0.6225 | 17.42 | + | TensorRT (INT8) | โœ… | 30.0 | 0.5923 | 11.83 | + | TF SavedModel | โœ… | 238.4 | 0.6245 | 835.83 | + | TF GraphDef | โœ… | 95.0 | 0.6245 | 852.16 | + | TF Lite | โœ… | 95.4 | 0.6245 | 3650.85 | + | MNN | โœ… | 94.8 | 0.6257 | 612.37 | + | NCNN | โœ… | 94.8 | 0.6323 | 405.45 | - === "YOLO11x" + === "YOLO26x" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 109.3 | 0.7005 | 90.00 | - | TorchScript | โœ… | 218.1 | 0.6901 | 113.40 | - | ONNX | โœ… | 217.5 | 0.6901 | 122.94 | - | OpenVINO | โœ… | 217.8 | 0.6876 | 713.1 | - | TensorRT (FP32) | โœ… | 219.5 | 0.6904 | 66.93 | - | TensorRT (FP16) | โœ… | 112.2 | 0.6892 | 32.58 | - | TensorRT (INT8) | โœ… | 61.5 | 0.6612 | 19.90 | - | TF SavedModel | โœ… | 544.3 | 0.6900 | 1605.4 | - | TF GraphDef | โœ… | 217.8 | 0.6900 | 2961.8 | - | TF Lite | โœ… | 217.6 | 0.6900 | 8234.86 | - | MNN | โœ… | 217.3 | 0.6893 | 1254.18 | - | NCNN | โœ… | 217.3 | 0.6849 | 725.50 | + | PyTorch | โœ… | 113.2 | 0.6561 | 98.44 | + | TorchScript | โœ… | 214.0 | 0.6593 | 98.0 | + | ONNX | โœ… | 212.9 | 0.6595 | 122.43 | + | OpenVINO | โœ… | 213.2 | 0.6592 | 760.72 | + | TensorRT (FP32) | โœ… | 215.1 | 0.6593 | 67.17 | + | TensorRT (FP16) | โœ… | 110.2 | 0.6637 | 32.60 | + | TensorRT (INT8) | โœ… | 59.9 | 0.6170 | 19.99 | + | TF SavedModel | โœ… | 533.3 | 0.6593 | 1647.06 | + | TF GraphDef | โœ… | 212.9 | 0.6593 | 1670.30 | + | TF Lite | โœ… | 213.3 | 0.6590 | 8066.30 | + | MNN | โœ… | 212.8 | 0.6600 | 1227.90 | + | NCNN | โœ… | 212.8 | 0.6666 | 782.24 | - Benchmarked with Ultralytics 8.3.157 + Benchmarked with Ultralytics 8.4.33 !!! note @@ -714,92 +714,92 @@ The below table represents the benchmark results for five different models (YOLO !!! tip "Performance" - === "YOLO11n" + === "YOLO26n" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 5.4 | 0.5101 | 12.90 | - | TorchScript | โœ… | 10.5 | 0.5082 | 13.17 | - | ONNX | โœ… | 10.2 | 0.5081 | 15.43 | - | OpenVINO | โœ… | 10.4 | 0.5058 | 39.80 | - | TensorRT (FP32) | โœ… | 11.8 | 0.5081 | 7.94 | - | TensorRT (FP16) | โœ… | 8.1 | 0.5085 | 4.73 | - | TensorRT (INT8) | โœ… | 5.4 | 0.4786 | 3.90 | - | TF SavedModel | โœ… | 25.9 | 0.5077 | 88.48 | - | TF GraphDef | โœ… | 10.3 | 0.5077 | 86.67 | - | TF Lite | โœ… | 10.3 | 0.5077 | 302.55 | - | MNN | โœ… | 10.1 | 0.5059 | 52.73 | - | NCNN | โœ… | 10.2 | 0.5031 | 32.04 | + | PyTorch | โœ… | 5.3 | 0.4799 | 13.90 | + | TorchScript | โœ… | 9.8 | 0.4787 | 11.60 | + | ONNX | โœ… | 9.5 | 0.4763 | 14.18 | + | OpenVINO | โœ… | 9.6 | 0.4819 | 40.19 | + | TensorRT (FP32) | โœ… | 11.4 | 0.4770 | 7.01 | + | TensorRT (FP16) | โœ… | 8.0 | 0.4789 | 4.13 | + | TensorRT (INT8) | โœ… | 5.5 | 0.4489 | 3.49 | + | TF SavedModel | โœ… | 24.6 | 0.4764 | 92.34 | + | TF GraphDef | โœ… | 9.5 | 0.4764 | 92.06 | + | TF Lite | โœ… | 9.9 | 0.4764 | 254.43 | + | MNN | โœ… | 9.4 | 0.4760 | 48.55 | + | NCNN | โœ… | 9.3 | 0.4805 | 34.31 | - === "YOLO11s" + === "YOLO26s" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 18.4 | 0.5790 | 21.70 | - | TorchScript | โœ… | 36.5 | 0.5781 | 22.71 | - | ONNX | โœ… | 36.3 | 0.5781 | 26.49 | - | OpenVINO | โœ… | 36.4 | 0.5810 | 84.73 | - | TensorRT (FP32) | โœ… | 37.8 | 0.5783 | 13.77 | - | TensorRT (FP16) | โœ… | 21.2 | 0.5796 | 7.31 | - | TensorRT (INT8) | โœ… | 12.0 | 0.5735 | 5.33 | - | TF SavedModel | โœ… | 91.0 | 0.5782 | 185.06 | - | TF GraphDef | โœ… | 36.4 | 0.5782 | 186.45 | - | TF Lite | โœ… | 36.3 | 0.5782 | 882.58 | - | MNN | โœ… | 36.2 | 0.5775 | 126.36 | - | NCNN | โœ… | 36.2 | 0.5784 | 66.73 | + | PyTorch | โœ… | 19.5 | 0.5738 | 20.40 | + | TorchScript | โœ… | 36.8 | 0.5664 | 19.20 | + | ONNX | โœ… | 36.5 | 0.5664 | 24.35 | + | OpenVINO | โœ… | 36.7 | 0.5653 | 88.18 | + | TensorRT (FP32) | โœ… | 38.5 | 0.5664 | 12.62 | + | TensorRT (FP16) | โœ… | 21.5 | 0.5652 | 6.41 | + | TensorRT (INT8) | โœ… | 12.6 | 0.5468 | 4.78 | + | TF SavedModel | โœ… | 92.2 | 0.5665 | 195.16 | + | TF GraphDef | โœ… | 36.5 | 0.5665 | 197.57 | + | TF Lite | โœ… | 36.9 | 0.5665 | 827.48 | + | MNN | โœ… | 36.4 | 0.5649 | 123.47 | + | NCNN | โœ… | 36.4 | 0.5697 | 74.04 | - === "YOLO11m" + === "YOLO26m" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 38.8 | 0.6266 | 45.00 | - | TorchScript | โœ… | 77.3 | 0.6307 | 51.87 | - | ONNX | โœ… | 76.9 | 0.6307 | 56.00 | - | OpenVINO | โœ… | 77.1 | 0.6284 | 202.69 | - | TensorRT (FP32) | โœ… | 78.7 | 0.6305 | 30.38 | - | TensorRT (FP16) | โœ… | 41.8 | 0.6302 | 14.48 | - | TensorRT (INT8) | โœ… | 23.2 | 0.6291 | 9.74 | - | TF SavedModel | โœ… | 192.7 | 0.6307 | 445.58 | - | TF GraphDef | โœ… | 77.1 | 0.6307 | 460.94 | - | TF Lite | โœ… | 77.0 | 0.6307 | 2653.65 | - | MNN | โœ… | 76.8 | 0.6308 | 339.38 | - | NCNN | โœ… | 76.8 | 0.6284 | 187.64 | + | PyTorch | โœ… | 42.2 | 0.6237 | 38.60 | + | TorchScript | โœ… | 78.5 | 0.6227 | 40.50 | + | ONNX | โœ… | 78.2 | 0.6225 | 48.87 | + | OpenVINO | โœ… | 78.3 | 0.6186 | 205.69 | + | TensorRT (FP32) | โœ… | 80.1 | 0.6217 | 24.69 | + | TensorRT (FP16) | โœ… | 42.6 | 0.6225 | 11.66 | + | TensorRT (INT8) | โœ… | 23.4 | 0.5817 | 8.22 | + | TF SavedModel | โœ… | 196.3 | 0.6229 | 451.48 | + | TF GraphDef | โœ… | 78.2 | 0.6229 | 460.94 | + | TF Lite | โœ… | 78.5 | 0.6229 | 2555.53 | + | MNN | โœ… | 78.0 | 0.6217 | 333.33 | + | NCNN | โœ… | 78.0 | 0.6224 | 214.60 | - === "YOLO11l" + === "YOLO26l" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 49.0 | 0.6364 | 56.60 | - | TorchScript | โœ… | 97.6 | 0.6409 | 66.72 | - | ONNX | โœ… | 97.0 | 0.6399 | 71.92 | - | OpenVINO | โœ… | 97.3 | 0.6378 | 254.17 | - | TensorRT (FP32) | โœ… | 99.2 | 0.6406 | 38.89 | - | TensorRT (FP16) | โœ… | 51.9 | 0.6363 | 18.59 | - | TensorRT (INT8) | โœ… | 30.9 | 0.6207 | 12.60 | - | TF SavedModel | โœ… | 243.1 | 0.6409 | 575.98 | - | TF GraphDef | โœ… | 97.2 | 0.6409 | 583.79 | - | TF Lite | โœ… | 97.1 | 0.6409 | 3353.41 | - | MNN | โœ… | 96.9 | 0.6367 | 421.33 | - | NCNN | โœ… | 96.9 | 0.6364 | 228.26 | + | PyTorch | โœ… | 50.7 | 0.6258 | 48.60 | + | TorchScript | โœ… | 95.5 | 0.6249 | 51.60 | + | ONNX | โœ… | 95.0 | 0.6247 | 61.95 | + | OpenVINO | โœ… | 95.3 | 0.6238 | 272.47 | + | TensorRT (FP32) | โœ… | 97.1 | 0.6250 | 31.64 | + | TensorRT (FP16) | โœ… | 51.4 | 0.6225 | 14.77 | + | TensorRT (INT8) | โœ… | 30.0 | 0.5923 | 10.49 | + | TF SavedModel | โœ… | 238.4 | 0.6245 | 596.46 | + | TF GraphDef | โœ… | 95.0 | 0.6245 | 606.10 | + | TF Lite | โœ… | 95.4 | 0.6245 | 3275.55 | + | MNN | โœ… | 94.8 | 0.6247 | 408.15 | + | NCNN | โœ… | 94.8 | 0.6323 | 262.99 | - === "YOLO11x" + === "YOLO26x" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 109.3 | 0.7005 | 98.50 | - | TorchScript | โœ… | 218.1 | 0.6901 | 123.03 | - | ONNX | โœ… | 217.5 | 0.6901 | 129.55 | - | OpenVINO | โœ… | 217.8 | 0.6876 | 483.44 | - | TensorRT (FP32) | โœ… | 219.6 | 0.6904 | 75.92 | - | TensorRT (FP16) | โœ… | 112.1 | 0.6885 | 35.78 | - | TensorRT (INT8) | โœ… | 61.6 | 0.6592 | 21.60 | - | TF SavedModel | โœ… | 544.3 | 0.6900 | 1120.43 | - | TF GraphDef | โœ… | 217.7 | 0.6900 | 1172.35 | - | TF Lite | โœ… | 217.6 | 0.6900 | 7283.63 | - | MNN | โœ… | 217.3 | 0.6877 | 840.16 | - | NCNN | โœ… | 217.3 | 0.6849 | 474.41 | + | PyTorch | โœ… | 113.2 | 0.6561 | 84.40 | + | TorchScript | โœ… | 213.5 | 0.6594 | 91.20 | + | ONNX | โœ… | 212.9 | 0.6595 | 109.34 | + | OpenVINO | โœ… | 213.2 | 0.6592 | 520.88 | + | TensorRT (FP32) | โœ… | 215.1 | 0.6593 | 57.18 | + | TensorRT (FP16) | โœ… | 109.7 | 0.6632 | 26.76 | + | TensorRT (INT8) | โœ… | 60.0 | 0.6170 | 17.32 | + | TF SavedModel | โœ… | 533.3 | 0.6593 | 1170.50 | + | TF GraphDef | โœ… | 212.9 | 0.6593 | 1217.87 | + | TF Lite | โœ… | 213.3 | 0.6593 | 7247.11 | + | MNN | โœ… | 212.8 | 0.6591 | 820.90 | + | NCNN | โœ… | 212.8 | 0.6666 | 534.30 | - Benchmarked with Ultralytics 8.3.157 + Benchmarked with Ultralytics 8.4.33 !!! note diff --git a/docs/en/guides/ros-quickstart.md b/docs/en/guides/ros-quickstart.md index f2f4594c16..aa6eb76096 100644 --- a/docs/en/guides/ros-quickstart.md +++ b/docs/en/guides/ros-quickstart.md @@ -243,7 +243,7 @@ Using YOLO, it is possible to extract and combine information from both RGB and !!! warning "RGB-D Cameras" - When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment. + When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the [Intel RealSense](https://www.realsenseai.com/) series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment. #### Depth Step-by-Step Usage diff --git a/docs/en/guides/sahi-tiled-inference.md b/docs/en/guides/sahi-tiled-inference.md index 0662006842..02a0c762a0 100644 --- a/docs/en/guides/sahi-tiled-inference.md +++ b/docs/en/guides/sahi-tiled-inference.md @@ -70,15 +70,10 @@ pip install -U ultralytics sahi ### Import Modules and Download Resources -Here's how to import the necessary modules and download a YOLO26 model and some test images: +Here's how to download some test images: ```python from sahi.utils.file import download_from_url -from sahi.utils.ultralytics import download_yolo26n_model - -# Download YOLO26 model -model_path = "models/yolo26n.pt" -download_yolo26n_model(model_path) # Download test images download_from_url( @@ -102,7 +97,7 @@ from sahi import AutoDetectionModel detection_model = AutoDetectionModel.from_pretrained( model_type="ultralytics", - model_path=model_path, + model_path="yolo26n.pt", confidence_threshold=0.3, device="cpu", # or 'cuda:0' ) @@ -110,17 +105,14 @@ detection_model = AutoDetectionModel.from_pretrained( ### Perform Standard Prediction -Perform standard inference using an image path or a numpy image. +Perform standard inference using an image path. ```python from sahi.predict import get_prediction -from sahi.utils.cv import read_image -# With an image path result = get_prediction("demo_data/small-vehicles1.jpeg", detection_model) -# With a numpy image -result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model) +result.export_visuals(export_dir="demo_data/", hide_conf=True) ``` ### Visualize Results @@ -128,10 +120,13 @@ result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg Export and visualize the predicted bounding boxes and masks: ```python -from IPython.display import Image +from PIL import Image -result.export_visuals(export_dir="demo_data/") -Image("demo_data/prediction_visual.png") +# Open the predicted image +processed_image = Image.open("demo_data/prediction_visual.png") + +# Display the predicted image +processed_image.show() ``` ## Sliced Inference with YOLO26 @@ -139,6 +134,7 @@ Image("demo_data/prediction_visual.png") Perform sliced inference by specifying the slice dimensions and overlap ratios: ```python +from PIL import Image from sahi.predict import get_sliced_prediction result = get_sliced_prediction( @@ -149,6 +145,15 @@ result = get_sliced_prediction( overlap_height_ratio=0.2, overlap_width_ratio=0.2, ) + +# Export results +result.export_visuals(export_dir="demo_data/", hide_conf=True) + +# Open the predicted image +processed_image = Image.open("demo_data/prediction_visual.png") + +# Display the predicted image +processed_image.show() ``` ## Handling Prediction Results @@ -175,7 +180,7 @@ from sahi.predict import predict predict( model_type="ultralytics", - model_path="path/to/yolo26n.pt", + model_path="yolo26n.pt", model_device="cpu", # or 'cuda:0' model_confidence_threshold=0.4, source="path/to/dir", @@ -219,21 +224,20 @@ Integrating Ultralytics YOLO26 with SAHI (Slicing Aided Hyper Inference) for sli pip install -U ultralytics sahi ``` -Then, download a YOLO26 model and test images: +Then, download test images: ```python from sahi.utils.file import download_from_url -from sahi.utils.ultralytics import download_yolo26n_model - -# Download YOLO26 model -model_path = "models/yolo26n.pt" -download_yolo26n_model(model_path) # Download test images download_from_url( "https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/small-vehicles1.jpeg", "demo_data/small-vehicles1.jpeg", ) +download_from_url( + "https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/terrain2.png", + "demo_data/terrain2.png", +) ``` For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolo26). @@ -253,10 +257,13 @@ Learn more about the [benefits of sliced inference](#benefits-of-sliced-inferenc Yes, you can visualize prediction results when using YOLO26 with SAHI. Here's how you can export and visualize the results: ```python -from IPython.display import Image +from PIL import Image -result.export_visuals(export_dir="demo_data/") -Image("demo_data/prediction_visual.png") +result.export_visuals(export_dir="demo_data/", hide_conf=True) + +processed_image = Image.open("demo_data/prediction_visual.png") + +processed_image.show() ``` This command will save the visualized predictions to the specified directory, and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results). diff --git a/docs/en/guides/vertex-ai-deployment-with-docker.md b/docs/en/guides/vertex-ai-deployment-with-docker.md index 7352616722..8612e93c22 100644 --- a/docs/en/guides/vertex-ai-deployment-with-docker.md +++ b/docs/en/guides/vertex-ai-deployment-with-docker.md @@ -27,7 +27,7 @@ Before we start, you will need to create a Google Cloud Platform (GCP) project. ## Prerequisites 1. Install [Docker](https://docs.docker.com/engine/install/) on your machine. -2. Install the [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) and [authenticate for using the gcloud CLI](https://cloud.google.com/docs/authentication/gcloud). +2. Install the [Google Cloud SDK](https://docs.cloud.google.com/sdk/docs/install-sdk) and [authenticate for using the gcloud CLI](https://docs.cloud.google.com/docs/authentication/gcloud). 3. It is highly recommended that you go through the [Docker Quickstart Guide for Ultralytics](https://docs.ultralytics.com/guides/docker-quickstart/), because you will need to extend one of the official Ultralytics Docker images while following this guide. ## 1. Create an inference backend with FastAPI @@ -507,7 +507,7 @@ docker push YOUR_REGION-docker.pkg.dev/YOUR_PROJECT_ID/YOUR_REPOSITORY_NAME/IMAG Wait for the process to complete. You should now see the image in your Artifact Registry repository. -For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling). +For more specific instructions on how to work with images in Artifact Registry, see the Artifact Registry documentation: [Push and pull images](https://docs.cloud.google.com/artifact-registry/docs/docker/pushing-and-pulling). ## 4. Import your model in Vertex AI diff --git a/docs/en/help/contributing.md b/docs/en/help/contributing.md index 75795dba87..143af3d1a0 100644 --- a/docs/en/help/contributing.md +++ b/docs/en/help/contributing.md @@ -42,7 +42,7 @@ Before we can merge your pull request, you must sign our [Contributor License Ag After submitting your pull request, the CLA bot will guide you through the signing process. To sign the CLA, simply add a comment in your PR stating: -```text +``` I have read the CLA Document and I sign the CLA ``` @@ -207,11 +207,11 @@ We highly value bug reports as they help us improve the quality and reliability Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.ultralytics.com/legal/agpl-3-0-software-license) for its repositories. This license promotes [openness](https://en.wikipedia.org/wiki/Openness), [transparency](https://www.ultralytics.com/glossary/transparency-in-ai), and [collaborative improvement](https://en.wikipedia.org/wiki/Collaborative_software) in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation. -We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3) to contribute effectively and ethically to the Ultralytics open-source community. +We encourage all contributors to familiarize themselves with the terms of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) to contribute effectively and ethically to the Ultralytics open-source community. ## ๐ŸŒ Open-Sourcing Your YOLO Project Under AGPL-3.0 -Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-v3) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. +Using Ultralytics YOLO models or code in your project? The [AGPL-3.0 license](https://opensource.org/license/agpl-3.0) requires that your entire derivative work also be open-sourced under AGPL-3.0. This ensures modifications and larger projects built upon open-source foundations remain open. ### Why AGPL-3.0 Compliance Matters @@ -230,7 +230,7 @@ Complying means making the **complete corresponding source code** of your projec - **Use Ultralytics Template:** Start with the [Ultralytics template repository](https://github.com/ultralytics/template) for a clean, modular setup integrating YOLO. 2. **License Your Project:** - - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-v3). + - Add a `LICENSE` file containing the full text of the [AGPL-3.0 license](https://opensource.org/license/agpl-3.0). - Add a notice at the top of each source file indicating the license. 3. **Publish Your Source Code:** @@ -295,7 +295,7 @@ Contributing to Ultralytics YOLO open-source repositories improves the software, To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This process ensures that your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request stating: -```text +``` I have read the CLA Document and I sign the CLA ``` diff --git a/docs/en/help/security.md b/docs/en/help/security.md index 90edac058a..bdac9f49c7 100644 --- a/docs/en/help/security.md +++ b/docs/en/help/security.md @@ -9,9 +9,9 @@ At [Ultralytics](https://www.ultralytics.com/), the security of our users' data ## Snyk Scanning -We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users. +We utilize [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users. -[![ultralytics](https://snyk.io/advisor/python/ultralytics/badge.svg)](https://snyk.io/advisor/python/ultralytics) +[![ultralytics](https://img.shields.io/badge/Snyk_security-monitored-8A2BE2)](https://security.snyk.io/package/pip/ultralytics) ## GitHub CodeQL Scanning @@ -51,7 +51,7 @@ These tools ensure proactive identification and resolution of security issues, e ### How does Ultralytics use Snyk for security scanning? -Ultralytics utilizes [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable. +Ultralytics utilizes [Snyk](https://security.snyk.io/package/pip/ultralytics) to conduct thorough security scans on its repositories. Snyk extends beyond basic dependency checks, examining the code and Dockerfiles for various vulnerabilities. By proactively identifying and resolving potential security issues, Snyk helps ensure that Ultralytics' open-source projects remain secure and reliable. To see the Snyk badge and learn more about its deployment, check the [Snyk Scanning section](#snyk-scanning). diff --git a/docs/en/index.md b/docs/en/index.md index dc085171a3..862712974b 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -164,7 +164,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde Ultralytics offers two licensing options to accommodate diverse use cases: -- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. +- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. - **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://www.ultralytics.com/license). Our licensing strategy is designed to ensure that any improvements to our open-source projects are returned to the community. We believe in open source, and our mission is to ensure that our contributions can be used and expanded in ways that benefit everyone. diff --git a/docs/en/integrations/axelera.md b/docs/en/integrations/axelera.md index 408880e722..d58a6ac356 100644 --- a/docs/en/integrations/axelera.md +++ b/docs/en/integrations/axelera.md @@ -6,7 +6,7 @@ keywords: Axelera AI, Metis AIPU, Voyager SDK, Edge AI, YOLOv8, YOLO11, YOLO26, # Axelera AI Export and Deployment -Ultralytics partners with [Axelera AI](https://www.axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metisยฎ AIPU** using the **Voyager SDK**. +Ultralytics partners with [Axelera AI](https://axelera.ai/) to enable high-performance, energy-efficient inference on [Edge AI](https://www.ultralytics.com/glossary/edge-ai) devices. Export and deploy **Ultralytics YOLO models** directly to the **Metisยฎ AIPU** using the **Voyager SDK**. ![Axelera AI edge deployment ecosystem for YOLO](https://github.com/user-attachments/assets/c97a0297-390d-47df-bb13-ff1aa499f34a) @@ -148,20 +148,21 @@ Export your trained YOLO models using the standard Ultralytics export command. ### Export Arguments -| Argument | Type | Default | Description | -| :--------- | :--------------- | :--------------- | :------------------------------------------------------------------------------------------- | -| `format` | `str` | `'axelera'` | Target format for Axelera Metis AIPU hardware | -| `imgsz` | `int` or `tuple` | `640` | Image size for model input | -| `int8` | `bool` | `True` | Enable [INT8 quantization](https://www.ultralytics.com/glossary/model-quantization) for AIPU | -| `data` | `str` | `'coco128.yaml'` | [Dataset](https://docs.ultralytics.com/datasets/) config for quantization calibration | -| `fraction` | `float` | `1.0` | Fraction of dataset for calibration (100-400 images recommended) | -| `device` | `str` | `None` | Export device: GPU (`device=0`) or CPU (`device=cpu`) | +| Argument | Type | Default | Description | +| :--------- | :--------------- | :--------------- | :-------------------------------------------------------------------------------------------------------------------------------------- | +| `format` | `str` | `'axelera'` | Target format for Axelera Metis AIPU hardware. | +| `imgsz` | `int` or `tuple` | `640` | Image size for model input. | +| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | +| `int8` | `bool` | `True` | Enable [INT8 quantization](https://www.ultralytics.com/glossary/model-quantization) for AIPU. | +| `data` | `str` | `'coco128.yaml'` | [Dataset](https://docs.ultralytics.com/datasets/) config for quantization calibration. | +| `fraction` | `float` | `1.0` | Fraction of dataset for calibration (100-400 images recommended). | +| `device` | `str` | `None` | Export device: GPU (`device=0`) or CPU (`device=cpu`). | For all export options, see the [Export Mode documentation](https://docs.ultralytics.com/modes/export/). ### Output Structure -```text +``` yolo26n_axelera_model/ โ”œโ”€โ”€ yolo26n.axm # Axelera model file โ””โ”€โ”€ metadata.yaml # Model metadata (classes, image size, etc.) diff --git a/docs/en/integrations/coreml.md b/docs/en/integrations/coreml.md index adb56d6294..a5aca92670 100644 --- a/docs/en/integrations/coreml.md +++ b/docs/en/integrations/coreml.md @@ -117,15 +117,16 @@ Before diving into the usage instructions, be sure to check out the range of [YO ### Export Arguments -| Argument | Type | Default | Description | -| -------- | ---------------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `format` | `str` | `'coreml'` | Target format for the exported model, defining compatibility with various deployment environments. | -| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | -| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. | -| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. | -| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. | -| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | -| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). | +| Argument | Type | Default | Description | +| --------- | ---------------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `format` | `str` | `'coreml'` | Target format for the exported model, defining compatibility with various deployment environments. | +| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | +| `dynamic` | `bool` | `False` | Allows dynamic input sizes, enhancing flexibility in handling varying image dimensions. | +| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. | +| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. | +| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. | +| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | +| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). | !!! tip diff --git a/docs/en/integrations/executorch.md b/docs/en/integrations/executorch.md index 640f4bdce0..ca06cefe19 100644 --- a/docs/en/integrations/executorch.md +++ b/docs/en/integrations/executorch.md @@ -80,8 +80,10 @@ Exporting YOLO26 models to ExecuTorch is straightforward: # Export the model to ExecuTorch format model.export(format="executorch") # creates 'yolo26n_executorch_model' directory + # Load the exported ExecuTorch model executorch_model = YOLO("yolo26n_executorch_model") + # Run inference on a single image results = executorch_model.predict("https://ultralytics.com/images/bus.jpg") ``` @@ -101,16 +103,18 @@ Exporting YOLO26 models to ExecuTorch is straightforward: When exporting to ExecuTorch format, you can specify the following arguments: -| Argument | Type | Default | Description | -| -------- | --------------- | ------- | ------------------------------------------ | -| `imgsz` | `int` or `list` | `640` | Image size for model input (height, width) | -| `device` | `str` | `'cpu'` | Device to use for export (`'cpu'`) | +| Argument | Type | Default | Description | +| -------- | ---------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `format` | `str` | `'executorch'` | Target format for the exported model, defining compatibility with various deployment environments. | +| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | +| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | +| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`). | ### Output Structure The ExecuTorch export creates a directory containing the model and metadata: -```text +``` yolo26n_executorch_model/ โ”œโ”€โ”€ yolo26n.pte # ExecuTorch model file โ””โ”€โ”€ metadata.yaml # Model metadata (classes, image size, etc.) diff --git a/docs/en/integrations/google-colab.md b/docs/en/integrations/google-colab.md index c5f758576a..af3269d3c7 100644 --- a/docs/en/integrations/google-colab.md +++ b/docs/en/integrations/google-colab.md @@ -103,7 +103,7 @@ If you'd like to dive deeper into Google Colab, here are a few resources to guid - **[Image Segmentation with Ultralytics YOLO26 on Google Colab](https://www.ultralytics.com/blog/image-segmentation-with-ultralytics-yolo11-on-google-colab)**: Explore how to perform image segmentation tasks using YOLO26 in the Google Colab environment, with practical examples using datasets like the Roboflow Carparts Segmentation Dataset. -- **[Curated Notebooks](https://colab.google/notebooks/)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas. +- **[Curated Notebooks](https://developers.google.com/colab)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas. - **[Google Colab's Medium Page](https://medium.com/google-colab)**: You can find tutorials, updates, and community contributions here that can help you better understand and utilize this tool. @@ -130,7 +130,7 @@ Google Colab offers several advantages for training YOLO26 models: - **Integration with Google Drive:** Easily store and access datasets and models. - **Collaboration:** Share notebooks with others and collaborate in real-time. -For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://colab.google/notebooks/). +For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://developers.google.com/colab). ### How can I handle Google Colab session timeouts during YOLO26 training? diff --git a/docs/en/integrations/ibm-watsonx.md b/docs/en/integrations/ibm-watsonx.md index 0df5e21ea1..518aaeda7d 100644 --- a/docs/en/integrations/ibm-watsonx.md +++ b/docs/en/integrations/ibm-watsonx.md @@ -87,7 +87,7 @@ Then, you can import the needed packages. For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLO26 model to detect and classify litter and biological objects in underwater images. -We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md) under the section "API credentials". +We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md) under the section "API credentials". Copy and paste your Kaggle username and API key into the following code. Then run the code to install the API and load the dataset into Watsonx. diff --git a/docs/en/integrations/openvino.md b/docs/en/integrations/openvino.md index 53f5f2cf91..2a3efa1899 100644 --- a/docs/en/integrations/openvino.md +++ b/docs/en/integrations/openvino.md @@ -99,7 +99,7 @@ For more details about the export process, visit the [Ultralytics documentation 1. **Performance**: OpenVINO delivers high-performance inference by utilizing the power of Intel CPUs, integrated and discrete GPUs, and FPGAs. 2. **Support for Heterogeneous Execution**: OpenVINO provides an API to write once and deploy on any supported Intel hardware (CPU, GPU, FPGA, VPU, etc.). 3. **Model Optimizer**: OpenVINO provides a Model Optimizer that imports, converts, and optimizes models from popular [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) frameworks such as PyTorch, [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), TensorFlow Lite, Keras, ONNX, PaddlePaddle, and Caffe. -4. **Ease of Use**: The toolkit comes with more than [80 tutorial notebooks](https://github.com/openvinotoolkit/openvino_notebooks) (including [YOLOv8 optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov8-optimization)) teaching different aspects of the toolkit. +4. **Ease of Use**: The toolkit comes with more than [80 tutorial notebooks](https://github.com/openvinotoolkit/openvino_notebooks) (including [YOLO26 optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov26-optimization)) teaching different aspects of the toolkit. ## OpenVINO Export Structure @@ -136,7 +136,7 @@ This approach is ideal for fast prototyping or deployment when you don't need fu ### Inference with OpenVINO Runtime -The OpenVINO Runtime provides a unified API for inference across all supported Intel hardware. It also provides advanced capabilities like load balancing across Intel hardware and asynchronous execution. For more information on running inference, refer to the [YOLO26 notebooks](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov11-optimization). +The OpenVINO Runtime provides a unified API for inference across all supported Intel hardware. It also provides advanced capabilities like load balancing across Intel hardware and asynchronous execution. For more information on running inference, refer to the [YOLO26 notebooks](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov26-optimization). Remember, you'll need the XML and BIN files as well as any application-specific settings like input size, scale factor for normalization, etc., to correctly set up and use the model with the Runtime. diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md index a44ae7e4a3..40a65ab822 100644 --- a/docs/en/integrations/sony-imx500.md +++ b/docs/en/integrations/sony-imx500.md @@ -185,6 +185,7 @@ Export an Ultralytics YOLO11 model to IMX500 format and run inference with the e | `int8` | `bool` | `True` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. | | `data` | `str` | `'coco8.yaml'` | Path to the [dataset](https://docs.ultralytics.com/datasets/) configuration file (default: `coco8.yaml`), essential for quantization. | | `fraction` | `float` | `1.0` | Specifies the fraction of the dataset to use for INT8 quantization calibration. Allows for calibrating on a subset of the full dataset, useful for experiments or when resources are limited. If not specified with INT8 enabled, the full dataset will be used. | +| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the exported model. When `True`, `conf`, `iou`, and `agnostic_nms` are also accepted. | | `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`). | !!! tip @@ -204,9 +205,9 @@ The export process will create an ONNX model for quantization validation, along โ”œโ”€โ”€ dnnParams.xml โ”œโ”€โ”€ labels.txt โ”œโ”€โ”€ packerOut.zip - โ”œโ”€โ”€ yolo11n_imx.onnx - โ”œโ”€โ”€ yolo11n_imx_MemoryReport.json - โ””โ”€โ”€ yolo11n_imx.pbtxt + โ”œโ”€โ”€ model_imx.onnx + โ”œโ”€โ”€ model_imx_MemoryReport.json + โ””โ”€โ”€ model_imx.pbtxt ``` === "Pose Estimation" @@ -216,9 +217,9 @@ The export process will create an ONNX model for quantization validation, along โ”œโ”€โ”€ dnnParams.xml โ”œโ”€โ”€ labels.txt โ”œโ”€โ”€ packerOut.zip - โ”œโ”€โ”€ yolo11n-pose_imx.onnx - โ”œโ”€โ”€ yolo11n-pose_imx_MemoryReport.json - โ””โ”€โ”€ yolo11n-pose_imx.pbtxt + โ”œโ”€โ”€ model_imx.onnx + โ”œโ”€โ”€ model_imx_MemoryReport.json + โ””โ”€โ”€ model_imx.pbtxt ``` === "Classification" @@ -228,9 +229,9 @@ The export process will create an ONNX model for quantization validation, along โ”œโ”€โ”€ dnnParams.xml โ”œโ”€โ”€ labels.txt โ”œโ”€โ”€ packerOut.zip - โ”œโ”€โ”€ yolo11n-cls_imx.onnx - โ”œโ”€โ”€ yolo11n-cls_imx_MemoryReport.json - โ””โ”€โ”€ yolo11n-cls_imx.pbtxt + โ”œโ”€โ”€ model_imx.onnx + โ”œโ”€โ”€ model_imx_MemoryReport.json + โ””โ”€โ”€ model_imx.pbtxt ``` === "Instance Segmentation" @@ -240,9 +241,9 @@ The export process will create an ONNX model for quantization validation, along โ”œโ”€โ”€ dnnParams.xml โ”œโ”€โ”€ labels.txt โ”œโ”€โ”€ packerOut.zip - โ”œโ”€โ”€ yolo11n-seg_imx.onnx - โ”œโ”€โ”€ yolo11n-seg_imx_MemoryReport.json - โ””โ”€โ”€ yolo11n-seg_imx.pbtxt + โ”œโ”€โ”€ model_imx.onnx + โ”œโ”€โ”€ model_imx_MemoryReport.json + โ””โ”€โ”€ model_imx.pbtxt ``` ## Using IMX500 Export in Deployment diff --git a/docs/en/integrations/torchscript.md b/docs/en/integrations/torchscript.md index b125cc7efc..6a9c48d700 100644 --- a/docs/en/integrations/torchscript.md +++ b/docs/en/integrations/torchscript.md @@ -112,6 +112,7 @@ All [Ultralytics YOLO26 models](../models/index.md) are designed to support expo | `format` | `str` | `'torchscript'` | Target format for the exported model, defining compatibility with various deployment environments. | | `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | | `dynamic` | `bool` | `False` | Allows dynamic input sizes, enhancing flexibility in handling varying image dimensions. | +| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. | | `optimize` | `bool` | `False` | Applies optimization for mobile devices, potentially reducing model size and improving performance. | | `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS), essential for accurate and efficient detection post-processing. | | `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | diff --git a/docs/en/integrations/weights-biases.md b/docs/en/integrations/weights-biases.md index 6b4ac5dc56..1b0534b1da 100644 --- a/docs/en/integrations/weights-biases.md +++ b/docs/en/integrations/weights-biases.md @@ -214,7 +214,7 @@ These features help in tracking experiments, optimizing models, and collaboratin After running your training script with W&B integration: 1. A link to your W&B dashboard will be provided in the console output. -2. Click on the link or go to [wandb.ai](https://wandb.ai/) and log in to your account. +2. Click on the link or go to [wandb.ai](https://wandb.ai/site) and log in to your account. 3. Navigate to your project to view detailed metrics, visualizations, and model performance data. The dashboard offers insights into your model's training process, allowing you to analyze and improve your YOLO26 models effectively. diff --git a/docs/en/models/fast-sam.md b/docs/en/models/fast-sam.md index ff7655f592..9aa1a65ac4 100644 --- a/docs/en/models/fast-sam.md +++ b/docs/en/models/fast-sam.md @@ -1,7 +1,7 @@ --- comments: true description: Discover FastSAM, a real-time CNN-based solution for segmenting any object in an image. Efficient, competitive, and ideal for various vision tasks. -keywords: FastSAM, Fast Segment Anything Model, Ultralytics, real-time segmentation, CNN, YOLOv8-seg, object segmentation, image processing, computer vision +keywords: FastSAM, Fast Segment Anything Model, Ultralytics, real-time segmentation, instance segmentation, FastSAM vs YOLO, FastSAM vs SAM, YOLOv8-seg, YOLO26-seg, zero-shot segmentation, object segmentation, Meta --- # Fast Segment Anything Model (FastSAM) @@ -54,21 +54,22 @@ This table presents the available models with their specific pretrained weights, ## FastSAM Comparison vs YOLO -Here we compare Meta's SAM 2 models, including the smallest SAM2-t variant, with Ultralytics smallest segmentation model, [YOLO11n-seg](../tasks/segment.md): +Here we compare Meta's SAM 2 models, including the smallest SAM2-t variant, with Ultralytics segmentation models including [YOLO26n-seg](yolo26.md): | Model | Size
(MB) | Parameters
(M) | Speed (CPU)
(ms/im) | | ---------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------- | --------------------------------- | -| [Meta SAM-b](sam.md) | 375 | 93.7 | 49401 | -| [Meta SAM2-b](sam-2.md) | 162 | 80.8 | 31901 | -| [Meta SAM2-t](sam-2.md) | 78.1 | 38.9 | 25997 | -| [MobileSAM](mobile-sam.md) | 40.7 | 10.1 | 25381 | -| [FastSAM-s](fast-sam.md) with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.7 | 11.8 | 55.9 | -| Ultralytics [YOLOv8n-seg](yolov8.md) | **6.7** (11.7x smaller) | **3.4** (11.4x less) | **24.5** (1061x faster) | -| Ultralytics [YOLO11n-seg](yolo11.md) | **5.9** (13.2x smaller) | **2.9** (13.4x less) | **30.1** (864x faster) | +| [Meta SAM-b](sam.md) | 375 | 93.7 | 41703 | +| [Meta SAM2-b](sam-2.md) | 162 | 80.8 | 28867 | +| [Meta SAM2-t](sam-2.md) | 78.1 | 38.9 | 23430 | +| [MobileSAM](mobile-sam.md) | 40.7 | 10.1 | 23802 | +| [FastSAM-s](fast-sam.md) with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.9 | 11.8 | 58.0 | +| Ultralytics [YOLOv8n-seg](yolov8.md) | **7.1** (11.0x smaller) | **3.4** (11.4x less) | **24.8** (945x faster) | +| Ultralytics [YOLO11n-seg](yolo11.md) | **6.2** (12.6x smaller) | **2.9** (13.4x less) | **24.3** (964x faster) | +| Ultralytics [YOLO26n-seg](yolo26.md) | **6.7** (11.7x smaller) | **2.7** (14.4x less) | **25.2** (930x faster) | -This comparison demonstrates the substantial differences in model sizes and speeds between SAM variants and YOLO segmentation models. While SAM provides unique automatic segmentation capabilities, YOLO models, particularly YOLOv8n-seg and YOLO11n-seg, are significantly smaller, faster, and more computationally efficient. +This comparison demonstrates the substantial differences in model sizes and speeds between SAM variants and YOLO segmentation models. While SAM provides unique automatic segmentation capabilities, YOLO models, particularly YOLOv8n-seg, YOLO11n-seg and YOLO26n-seg, are significantly smaller, faster, and more computationally efficient. -Tests run on a 2025 Apple M4 Pro with 24GB of RAM using `torch==2.6.0` and `ultralytics==8.3.90`. To reproduce this test: +SAM speeds measured with PyTorch, YOLO speeds measured with ONNX Runtime. Tests run on a 2025 Apple M4 Air with 16GB of RAM using `torch==2.10.0`, `ultralytics==8.4.31`, and `onnxruntime==1.24.4`. To reproduce this test: !!! example @@ -88,10 +89,12 @@ Tests run on a 2025 Apple M4 Pro with 24GB of RAM using `torch==2.6.0` and `ultr model.info() model(ASSETS) - # Profile YOLO models - for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt"]: + # Profile YOLO models (ONNX) + for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt", "yolo26n-seg.pt"]: model = YOLO(file_name) model.info() + onnx_path = model.export(format="onnx", dynamic=True) + model = YOLO(onnx_path) model(ASSETS) ``` diff --git a/docs/en/models/mobile-sam.md b/docs/en/models/mobile-sam.md index 69a6629ad7..6d6b0ba4d7 100644 --- a/docs/en/models/mobile-sam.md +++ b/docs/en/models/mobile-sam.md @@ -1,7 +1,7 @@ --- comments: true description: Discover MobileSAM, a lightweight and fast image segmentation model for mobile and edge applications. Compare its performance with SAM and YOLO models. -keywords: MobileSAM, image segmentation, lightweight model, fast segmentation, mobile applications, SAM, Tiny-ViT, YOLO, Ultralytics +keywords: MobileSAM, image segmentation, lightweight segmentation, mobile segmentation, MobileSAM vs SAM, MobileSAM vs YOLO, Tiny-ViT, YOLO26-seg, edge AI segmentation, Ultralytics, Meta --- ![MobileSAM lightweight image segmentation model logo](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/logo2.png) @@ -35,21 +35,22 @@ The table below outlines the available MobileSAM model, its pretrained weights, ## MobileSAM Comparison vs YOLO -The following comparison highlights the differences between Meta's SAM variants, MobileSAM, and Ultralytics' smallest segmentation models, including [YOLO11n-seg](../models/yolo11.md): +The following comparison highlights the differences between Meta's SAM variants, MobileSAM, and Ultralytics segmentation models including [YOLO26n-seg](yolo26.md): | Model | Size
(MB) | Parameters
(M) | Speed (CPU)
(ms/im) | | ------------------------------------------------------------------------------- | ----------------------- | ---------------------------- | --------------------------------- | -| Meta SAM-b | 375 | 93.7 | 49401 | -| Meta SAM2-b | 162 | 80.8 | 31901 | -| Meta SAM2-t | 78.1 | 38.9 | 25997 | -| MobileSAM | 40.7 | 10.1 | 25381 | -| FastSAM-s with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.7 | 11.8 | 55.9 | -| Ultralytics YOLOv8n-seg | **6.7** (11.7x smaller) | **3.4** (11.4x less) | **24.5** (1061x faster) | -| Ultralytics YOLO11n-seg | **5.9** (13.2x smaller) | **2.9** (13.4x less) | **30.1** (864x faster) | +| Meta SAM-b | 375 | 93.7 | 41703 | +| Meta SAM2-b | 162 | 80.8 | 28867 | +| Meta SAM2-t | 78.1 | 38.9 | 23430 | +| MobileSAM | 40.7 | 10.1 | 23802 | +| FastSAM-s with YOLOv8 [backbone](https://www.ultralytics.com/glossary/backbone) | 23.9 | 11.8 | 58.0 | +| Ultralytics YOLOv8n-seg | **7.1** (11.0x smaller) | **3.4** (11.4x less) | **24.8** (945x faster) | +| Ultralytics YOLO11n-seg | **6.2** (12.6x smaller) | **2.9** (13.4x less) | **24.3** (964x faster) | +| Ultralytics YOLO26n-seg | **6.7** (11.7x smaller) | **2.7** (14.4x less) | **25.2** (930x faster) | -This comparison demonstrates the substantial differences in model size and speed between SAM variants and YOLO segmentation models. While SAM models offer unique automatic segmentation capabilities, YOLO modelsโ€”especially YOLOv8n-seg and YOLO11n-segโ€”are significantly smaller, faster, and more computationally efficient. +This comparison demonstrates the substantial differences in model size and speed between SAM variants and YOLO segmentation models. While SAM models offer unique automatic segmentation capabilities, YOLO modelsโ€”especially YOLOv8n-seg, YOLO11n-seg and YOLO26n-segโ€”are significantly smaller, faster, and more computationally efficient. -Tests were conducted on a 2025 Apple M4 Pro with 24GB RAM using `torch==2.6.0` and `ultralytics==8.3.90`. To reproduce these results: +SAM speeds measured with PyTorch, YOLO speeds measured with ONNX Runtime. Tests run on a 2025 Apple M4 Air with 16GB of RAM using `torch==2.10.0`, `ultralytics==8.4.31`, and `onnxruntime==1.24.4`. To reproduce these results: !!! example @@ -69,10 +70,12 @@ Tests were conducted on a 2025 Apple M4 Pro with 24GB RAM using `torch==2.6.0` a model.info() model(ASSETS) - # Profile YOLO models - for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt"]: + # Profile YOLO models (ONNX) + for file_name in ["yolov8n-seg.pt", "yolo11n-seg.pt", "yolo26n-seg.pt"]: model = YOLO(file_name) model.info() + onnx_path = model.export(format="onnx", dynamic=True) + model = YOLO(onnx_path) model(ASSETS) ``` @@ -182,7 +185,7 @@ To automatically annotate your dataset with the Ultralytics framework, use the ` ```python from ultralytics.data.annotator import auto_annotate - auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="mobile_sam.pt") + auto_annotate(data="path/to/images", det_model="yolo26x.pt", sam_model="mobile_sam.pt") ``` {% include "macros/sam-auto-annotate.md" %} diff --git a/docs/en/models/yolo12.md b/docs/en/models/yolo12.md index f259e97298..e7ad84e6f3 100644 --- a/docs/en/models/yolo12.md +++ b/docs/en/models/yolo12.md @@ -48,13 +48,19 @@ YOLO12, released in early 2025, introduces an attention-centric architecture tha YOLO12 supports a variety of computer vision tasks. The table below shows task support and the operational modes (Inference, Validation, Training, and Export) enabled for each: -| Model Type | Task | Inference | Validation | Training | Export | -| -------------------------------------------------------------------------------------------------------------- | -------------------------------------- | --------- | ---------- | -------- | ------ | -| [YOLO12](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12.yaml) | [Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| [YOLO12-seg](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-seg.yaml) | [Segmentation](../tasks/segment.md) | โœ… | โœ… | โœ… | โœ… | -| [YOLO12-pose](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-pose.yaml) | [Pose](../tasks/pose.md) | โœ… | โœ… | โœ… | โœ… | -| [YOLO12-cls](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-cls.yaml) | [Classification](../tasks/classify.md) | โœ… | โœ… | โœ… | โœ… | -| [YOLO12-obb](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-obb.yaml) | [OBB](../tasks/obb.md) | โœ… | โœ… | โœ… | โœ… | +!!! warning "Pretrained weights availability" + + Only detection weights (`yolo12n.pt`, `yolo12s.pt`, `yolo12m.pt`, `yolo12l.pt`, `yolo12x.pt`) are released on [ultralytics/assets](https://github.com/ultralytics/assets/releases). Segmentation, classification, pose, and OBB architectures are defined in [ultralytics/cfg/models/12/](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/12), so those variants support training from scratch from the `.yaml` config, but no pretrained `.pt` files are currently available for them. For pretrained segmentation, pose, classification, or OBB checkpoints, Ultralytics recommends [YOLO11](yolo11.md) or [YOLO26](yolo26.md). + +| Model Type | Task | Pretrained Weights | Inference | Validation | Training | Export | +| -------------------------------------------------------------------------------------------------------------- | -------------------------------------- | ------------------ | --------- | ---------- | -------- | ------ | +| [YOLO12](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12.yaml) | [Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | โœ… | +| [YOLO12-seg](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-seg.yaml) | [Segmentation](../tasks/segment.md) | โŒ | โœ… | โœ… | โœ… | โœ… | +| [YOLO12-pose](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-pose.yaml) | [Pose](../tasks/pose.md) | โŒ | โœ… | โœ… | โœ… | โœ… | +| [YOLO12-cls](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-cls.yaml) | [Classification](../tasks/classify.md) | โŒ | โœ… | โœ… | โœ… | โœ… | +| [YOLO12-obb](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/12/yolo12-obb.yaml) | [OBB](../tasks/obb.md) | โŒ | โœ… | โœ… | โœ… | โœ… | + +All YOLO12 architectures support every mode once a trained checkpoint is available. The `Pretrained Weights` column indicates only whether Ultralytics publishes an official pretrained `.pt` on [ultralytics/assets](https://github.com/ultralytics/assets/releases): for segmentation, pose, classification, and OBB, you must train your own checkpoint from the corresponding `.yaml` before running inference, validation, or export. ## Performance Metrics @@ -173,7 +179,7 @@ YOLO12 incorporates several key innovations to balance speed and accuracy. The A ### What [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks does YOLO12 support? -YOLO12 is a versatile model that supports a wide range of core computer vision tasks. It excels in object [detection](../tasks/detect.md), instance [segmentation](../tasks/segment.md), image [classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and oriented object detection (OBB) ([see details](../tasks/obb.md)). This comprehensive task support makes YOLO12 a powerful tool for diverse applications, from [robotics](https://www.ultralytics.com/glossary/robotics) and autonomous driving to medical imaging and industrial inspection. Each of these tasks can be performed in Inference, Validation, Training, and Export modes. +YOLO12 is a versatile model that supports a wide range of core computer vision tasks. It excels in object [detection](../tasks/detect.md), instance [segmentation](../tasks/segment.md), image [classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and oriented object detection (OBB) ([see details](../tasks/obb.md)). This comprehensive task support makes YOLO12 a powerful tool for diverse applications, from [robotics](https://www.ultralytics.com/glossary/robotics) and autonomous driving to medical imaging and industrial inspection. Note that pretrained `.pt` weights are currently published for detection only; the segmentation, pose, classification, and OBB architectures are provided as `.yaml` configs for training from scratch. ### How does YOLO12 compare to other YOLO models and competitors like RT-DETR? diff --git a/docs/en/models/yolov4.md b/docs/en/models/yolov4.md index 04bbb2c433..83e17f2405 100644 --- a/docs/en/models/yolov4.md +++ b/docs/en/models/yolov4.md @@ -50,7 +50,7 @@ We regret any inconvenience this may cause and will strive to update this docume YOLOv4 is a powerful and efficient object detection model that strikes a balance between speed and accuracy. Its use of unique features and bag of freebies techniques during training allows it to perform excellently in real-time object detection tasks. YOLOv4 can be trained and used by anyone with a conventional GPU, making it accessible and practical for a wide range of applications including [surveillance systems](https://www.ultralytics.com/blog/shattering-the-surveillance-status-quo-with-vision-ai), [autonomous vehicles](https://www.ultralytics.com/solutions/ai-in-automotive), and [industrial automation](https://www.ultralytics.com/blog/improving-manufacturing-with-computer-vision). -For those looking to implement object detection in their projects, YOLOv4 remains a strong contender, especially when real-time performance is a priority. While Ultralytics currently focuses on supporting newer YOLO versions like [YOLOv8](https://docs.ultralytics.com/models/yolov8/) and [YOLO11](https://docs.ultralytics.com/models/yolo11/), the architectural innovations introduced in YOLOv4 have influenced the development of these later models. +For those looking to implement object detection in their projects, YOLOv4 remains a strong contender, especially when real-time performance is a priority. While Ultralytics currently focuses on supporting newer YOLO versions like [YOLO11](yolo11.md) and [YOLO26](yolo26.md), the architectural innovations introduced in YOLOv4 have influenced the development of these later models. ## Citations and Acknowledgments diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md index efd333c34e..3ebf0a942d 100644 --- a/docs/en/modes/predict.md +++ b/docs/en/modes/predict.md @@ -367,7 +367,7 @@ Below are code examples for using each source type: Example `.streams` text file: - ```text + ``` rtsp://example.com/media1.mp4 rtsp://example.com/media2.mp4 rtmp://example2.com/live diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md index 7e952ed5f3..e5e2a4fd34 100644 --- a/docs/en/modes/train.md +++ b/docs/en/modes/train.md @@ -415,6 +415,49 @@ To resume training from an interrupted session, set the `resume` argument to `Tr Check the section on [Resuming Interrupted Trainings](#resuming-interrupted-trainings) for more information. +### How do I train a model on an imbalanced dataset? + +Class imbalance occurs when some classes have significantly fewer examples than others in your training data. This can cause the model to perform poorly on rare classes. Ultralytics YOLO supports class weighting through the `cls_pw` argument to address this issue. + +The `cls_pw` argument controls class weighting power based on inverse class frequency: + +- `cls_pw=0.0` (default): Disables class weighting +- `cls_pw=1.0`: Applies full inverse frequency weighting +- Values between `0.0` and `1.0`: Provide partial weighting for moderate imbalance + +The class weights are computed as `(1.0 / class_counts) ^ cls_pw` and normalized so their mean equals 1.0. + +!!! example "Training on Imbalanced Dataset" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a pretrained model + model = YOLO("yolo26n.pt") + + # Train with full class weighting for severely imbalanced data + results = model.train(data="custom.yaml", epochs=100, imgsz=640, cls_pw=1.0) + + # Or use partial weighting (0.25) for moderate imbalance + results = model.train(data="custom.yaml", epochs=100, imgsz=640, cls_pw=0.25) + ``` + + === "CLI" + + ```bash + # Train with full inverse frequency weighting + yolo detect train data=custom.yaml model=yolo26n.pt epochs=100 imgsz=640 cls_pw=1.0 + + # Train with partial weighting for moderate imbalance + yolo detect train data=custom.yaml model=yolo26n.pt epochs=100 imgsz=640 cls_pw=0.25 + ``` + +!!! tip + + Start with `cls_pw=0.25` for moderately imbalanced datasets and increase to `1.0` if the rare classes still underperform. You can check the computed class weights in the training logs to verify the weight distribution. + ### Can I train YOLO26 models on Apple silicon chips? Yes, Ultralytics YOLO26 supports training on Apple silicon chips utilizing the Metal Performance Shaders (MPS) framework. Specify 'mps' as your training device. diff --git a/docs/en/platform/account/activity.md b/docs/en/platform/account/activity.md index 9504e099a3..93ddd7f480 100644 --- a/docs/en/platform/account/activity.md +++ b/docs/en/platform/account/activity.md @@ -103,7 +103,7 @@ Use the search bar to find events by resource name or event description. Filter by time period using the date range picker: - Select a start and end date -- No default date filter (shows all events) +- The page defaults to the last 30 days - Custom date ranges supported ![Ultralytics Platform Activity Page Date Range Picker Expanded](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/activity-page-date-range-picker-expanded.avif) @@ -123,7 +123,7 @@ Each event displays: Some actions support undo directly from the Activity feed: -- **Settings changes**: Click **Undo** next to a settings update event to revert the change +- **Settings changes**: Click **Undo** next to a recent settings update event to revert the change - Undo is available for a short time window after the action ## Pagination @@ -134,9 +134,13 @@ The Activity feed supports pagination: - Navigate between pages using the pagination controls - Page size is configurable via URL query parameter -## API Access +## API Routes -Access activity programmatically via the [REST API](../api/index.md#activity-api): +The Activity feed is powered by browser-authenticated routes โ€” it is not exposed as a public API and cannot be accessed with an API key. The route shapes are listed below for reference; to view, mark, or archive activity, use the Activity feed in the platform UI. + +!!! note "Browser Session Only" + + The routes shown below require an active platform browser session. The `Authorization: Bearer YOUR_API_KEY` header in the examples will not authenticate these routes โ€” they are documented only to describe how the in-app feed talks to the server. === "List Activity" diff --git a/docs/en/platform/account/api-keys.md b/docs/en/platform/account/api-keys.md index 4ef9e8cc59..434cf7faff 100644 --- a/docs/en/platform/account/api-keys.md +++ b/docs/en/platform/account/api-keys.md @@ -14,11 +14,10 @@ keywords: Ultralytics Platform, API keys, authentication, remote training, secur Create a new API key: -1. Go to **Settings > Profile** -2. Scroll to the **API Keys** section -3. Click **Create Key** -4. Enter a name for the key (e.g., "Training Server") -5. Click **Create Key** +1. Go to **Settings > API Keys** +2. Click **Create Key** +3. Enter a name for the key (e.g., "Training Server") +4. Click **Create Key** ![Ultralytics Platform Settings Profile Tab Create Api Key Dialog](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/settings-profile-tab-create-api-key-dialog.avif) @@ -115,10 +114,10 @@ Enable metric streaming with your key. !!! warning "Package Version Requirement" - Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform. + Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform. ```bash - pip install "ultralytics>=8.4.14" + pip install "ultralytics>=8.4.35" ``` ```bash @@ -132,7 +131,7 @@ See [Cloud Training](../train/cloud-training.md#remote-training) for the complet ### View Keys -All keys are listed in `Settings > Profile` under the API Keys section: +All keys are listed on the `Settings > API Keys` tab: Each key card shows the key name, the full decrypted key value (copyable), relative creation time, and a revoke button. @@ -208,7 +207,7 @@ Solutions: 1. Verify key is copied correctly (including the `ul_` prefix) 2. Check key hasn't been revoked 3. Confirm environment variable is set -4. Ensure you're using `ultralytics>=8.4.14` +4. Ensure you're using `ultralytics>=8.4.35` ### Permission Denied @@ -246,7 +245,7 @@ Keys don't expire automatically. They remain valid until revoked. Consider imple ### Can I see my key after creation? -Yes, full key values are visible in the key list on `Settings > Profile`. The Platform decrypts and displays your keys so you can copy them anytime. +Yes, full key values are visible in the key list on `Settings > API Keys`. The Platform decrypts and displays your keys so you can copy them anytime. ### Are keys region-specific? diff --git a/docs/en/platform/account/billing.md b/docs/en/platform/account/billing.md index fb1b438d8f..7564546386 100644 --- a/docs/en/platform/account/billing.md +++ b/docs/en/platform/account/billing.md @@ -84,7 +84,7 @@ For organizations with advanced needs: - SLA guarantees (coming soon) - Enterprise support -Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) for Enterprise pricing. +See [Ultralytics Licensing](https://www.ultralytics.com/license) for Enterprise plan details. ## Credits diff --git a/docs/en/platform/account/index.md b/docs/en/platform/account/index.md index 7871d349be..d26e0df5f3 100644 --- a/docs/en/platform/account/index.md +++ b/docs/en/platform/account/index.md @@ -129,7 +129,7 @@ Yes, Ultralytics Platform implements: No, data region is selected during signup and cannot be changed. To use a different region: 1. Export your data -2. Create a new account in desired region +2. Create a new account in the desired region 3. Re-upload your data This ensures data residency compliance. diff --git a/docs/en/platform/account/settings.md b/docs/en/platform/account/settings.md index ad396537d3..0de67bd8e8 100644 --- a/docs/en/platform/account/settings.md +++ b/docs/en/platform/account/settings.md @@ -8,11 +8,11 @@ keywords: Ultralytics Platform, settings, profile, preferences, GDPR, data expor [Ultralytics Platform](https://platform.ultralytics.com) settings allow you to configure your profile, social links, workspace preferences, and manage your data with GDPR-compliant export and deletion options. -Settings is organized into five tabs: `Profile`, `Plans`, `Billing`, `Teams`, and `Trash`. +Settings is organized into six tabs: `Profile`, `API Keys`, `Plans`, `Billing`, `Teams`, and `Trash`. ## Profile Tab -The `Profile` tab contains your profile information, social links, API keys, data region, and account management options. +The `Profile` tab contains your profile information, social links, data region, and account management options. ### Profile Information @@ -95,10 +95,6 @@ Manage email addresses linked to your account in the `Profile` tab: Your primary email is used for notifications and account recovery. Only verified emails can be set as primary. -### API Keys - -API keys are managed directly on the `Profile` tab. See [API Keys](api-keys.md) for full documentation. - ### Data Region View your data region on the `Profile` tab: @@ -221,6 +217,10 @@ Permanently delete your account: - Server logs (90 days) - Legal compliance records +## API Keys Tab + +The `API Keys` tab lets you create and manage API keys for remote training and inference. See [API Keys](api-keys.md) for full documentation. + ## Plans Tab The `Plans` tab lets you compare available plans and upgrade or downgrade your subscription. @@ -237,7 +237,7 @@ From this tab you can: - **Compare features** across Free, Pro, and Enterprise tiers - **Upgrade to Pro** to unlock more storage, models, team collaboration, and priority GPU access -- **Contact Sales** for Enterprise needs including SSO/SAML and commercial licensing +- **Review Enterprise** capabilities including SSO/SAML, RBAC, and commercial licensing โ€” see [Ultralytics Licensing](https://www.ultralytics.com/license) See [Billing](billing.md) for detailed plan information, pricing, and upgrade instructions. diff --git a/docs/en/platform/account/teams.md b/docs/en/platform/account/teams.md index 13baa905f6..52a25d45e3 100644 --- a/docs/en/platform/account/teams.md +++ b/docs/en/platform/account/teams.md @@ -21,7 +21,7 @@ Teams allow multiple users to work together under a shared workspace: !!! note "Plan Requirement" - Creating a team requires a [Pro or Enterprise plan](billing.md#plans). You can upgrade from Settings or when clicking **+ Create Team** in the workspace switcher. + Team workspaces require a [Pro or Enterprise plan](billing.md#plans). You can start team setup before upgrading, but the workspace must be on a Pro or Enterprise plan to use team features. ## Creating a Team @@ -87,7 +87,7 @@ Admins and Owners can invite new members to the team: ![Ultralytics Platform Teams Invite Member Dialog](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/settings-teams-invite-member-dialog.avif) -The invitee receives an email invitation with a link to accept and join the team. Invitations expire after 7 days. Once accepted, the team workspace appears in the invitee's workspace switcher. +The invitee receives an email invitation with a link to accept and join the team. Invitations expire after 7 days. Once accepted, the team workspace appears in the invitee's workspace switcher. If an invite is missed, resend or cancel it from the Teams tab and send a fresh invite. !!! note "Admin Invites" @@ -101,17 +101,11 @@ Enterprise plans include additional capabilities for organizations with advanced !!! warning "License Expiration" - If your Enterprise license expires, workspace access is blocked until renewed. Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) to renew. + If your Enterprise license expires, workspace access is blocked until the license is renewed. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details. ### Getting Started with Enterprise -Enterprise plans are provisioned by the Ultralytics team: - -1. Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) -2. Discuss your team size, credit needs, and compliance requirements -3. Receive a provisioning invite with your enterprise configuration -4. Accept the invite to become the team Owner -5. Invite your team members +Enterprise plans are provisioned by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details. Once your enterprise configuration is set up, you'll receive a provisioning invite to accept as the team Owner, after which you can invite your team members. ## FAQ @@ -129,4 +123,4 @@ All team members share a single credit balance. The Owner and Admins can top up ### How do I upgrade from Pro to Enterprise? -Contact [sales@ultralytics.com](mailto:sales@ultralytics.com) to discuss Enterprise pricing and provisioning. The Ultralytics team will handle the upgrade and configuration. +Enterprise pricing and provisioning are handled directly by the Ultralytics team. See [Ultralytics Licensing](https://www.ultralytics.com/license) for plan details. diff --git a/docs/en/platform/account/trash.md b/docs/en/platform/account/trash.md index 97d70a89e4..d8a320bf91 100644 --- a/docs/en/platform/account/trash.md +++ b/docs/en/platform/account/trash.md @@ -87,6 +87,8 @@ Recover a deleted item: The item returns to its original location with all data intact. +If the original slug is already taken, the platform restores the item with a unique available slug so you can access it immediately. + ### Restore Behavior | Resource | Restore Behavior | @@ -115,7 +117,7 @@ Permanently delete all items immediately: !!! warning "Irreversible Action" - Emptying Trash permanently deletes all items immediately. This action cannot be undone and all data will be lost. + Emptying Trash permanently deletes all items immediately. This action cannot be undone and all data will be lost, including attached deployments, export jobs, and stored files tied to the trashed resources. ### Delete Single Item Permanently @@ -125,6 +127,8 @@ To permanently delete one item without waiting: 2. Click the **Delete** button 3. Confirm deletion +For projects, permanent deletion also removes related deployments and export files that belong to the deleted workspace resources. + ## Storage and Trash Items in Trash still count toward your storage quota: diff --git a/docs/en/platform/api/index.md b/docs/en/platform/api/index.md index 517294c70e..b5662ef636 100644 --- a/docs/en/platform/api/index.md +++ b/docs/en/platform/api/index.md @@ -52,11 +52,11 @@ graph LR ## Authentication -Most API requests require authentication via API key. Public endpoints (listing public datasets, projects, and models) support anonymous read access without a key. +Resource APIs such as datasets, projects, models, training, exports, and predictions use API-key authentication. Public endpoints (listing public datasets, projects, and models) support anonymous read access without a key. Account-oriented routes โ€” including activity, settings, teams, billing, and GDPR flows โ€” currently require an authenticated browser session and are not available via API key. ### Get API Key -1. Go to `Settings` > `Profile` (API Keys section) +1. Go to `Settings` > `API Keys` 2. Click `Create Key` 3. Copy the generated key @@ -66,7 +66,7 @@ See [API Keys](../account/api-keys.md) for detailed instructions. Include your API key in all requests: -``` +```http Authorization: Bearer YOUR_API_KEY ``` @@ -122,7 +122,7 @@ The API uses a two-layer rate limiting system to protect against abuse while kee When throttled, the API returns `429` with retry metadata: -``` +```http Retry-After: 12 X-RateLimit-Reset: 2026-02-21T12:34:56.000Z ``` @@ -145,7 +145,7 @@ Each category has an independent counter per API key. For example, making 20 pre ### Dedicated Endpoints (Unlimited) -[Dedicated endpoints](../deploy/endpoints.md) are **not subject to API key rate limits**. When you deploy a model to a dedicated endpoint, requests to that endpoint URL (e.g., `https://predict-abc123.run.app/predict`) go directly to your dedicated service with no rate limiting from the Platform. You're paying for the compute, so you get unlimited throughput up to your endpoint's scaling configuration. +[Dedicated endpoints](../deploy/endpoints.md) are **not subject to API key rate limits**. When you deploy a model to a dedicated endpoint, requests to that endpoint URL (e.g., `https://predict-abc123.run.app/predict`) go directly to your dedicated service with no rate limiting from the Platform. You're paying for the compute, so you get throughput from your dedicated service configuration rather than the shared API limits. !!! tip "Handling Rate Limits" @@ -192,7 +192,7 @@ Create, browse, and manage labeled image datasets for training YOLO models. See ### List Datasets -``` +```http GET /api/datasets ``` @@ -262,7 +262,7 @@ GET /api/datasets ### Get Dataset -``` +```http GET /api/datasets/{datasetId} ``` @@ -270,7 +270,7 @@ Returns full dataset details including metadata, class names, and split counts. ### Create Dataset -``` +```http POST /api/datasets ``` @@ -293,7 +293,7 @@ POST /api/datasets ### Update Dataset -``` +```http PATCH /api/datasets/{datasetId} ``` @@ -309,7 +309,7 @@ PATCH /api/datasets/{datasetId} ### Delete Dataset -``` +```http DELETE /api/datasets/{datasetId} ``` @@ -317,7 +317,7 @@ Soft-deletes the dataset (moved to [trash](../account/trash.md), recoverable for ### Clone Dataset -``` +```http POST /api/datasets/{datasetId}/clone ``` @@ -336,7 +336,7 @@ Creates a copy of the dataset with all images and labels. Only public datasets c ### Export Dataset -``` +```http GET /api/datasets/{datasetId}/export ``` @@ -359,7 +359,7 @@ Returns a JSON response with a signed download URL for the latest dataset export ### Create Dataset Version -``` +```http POST /api/datasets/{datasetId}/export ``` @@ -386,7 +386,7 @@ All fields are optional. The `description` field is a user-provided label for th ### Update Version Description -``` +```http PATCH /api/datasets/{datasetId}/export ``` @@ -411,7 +411,7 @@ Update the description of an existing version. Owner-only. ### Get Class Statistics -``` +```http GET /api/datasets/{datasetId}/class-stats ``` @@ -454,7 +454,7 @@ Returns class distribution, location heatmap, and dimension statistics. Results ### Get Models Trained on Dataset -``` +```http GET /api/datasets/{datasetId}/models ``` @@ -495,7 +495,7 @@ Returns models that were trained using this dataset. ### Auto-Annotate Dataset -``` +```http POST /api/datasets/{datasetId}/predict ``` @@ -512,7 +512,7 @@ Run YOLO inference on dataset images to auto-generate annotations. Uses a select ### Dataset Ingest -``` +```http POST /api/datasets/ingest ``` @@ -532,7 +532,7 @@ graph LR #### List Images -``` +```http GET /api/datasets/{datasetId}/images ``` @@ -552,7 +552,7 @@ GET /api/datasets/{datasetId}/images #### Get Signed Image URLs -``` +```http POST /api/datasets/{datasetId}/images/urls ``` @@ -560,13 +560,13 @@ Get signed URLs for a batch of image hashes (for display in the browser). #### Delete Image -``` +```http DELETE /api/datasets/{datasetId}/images/{hash} ``` #### Get Image Labels -``` +```http GET /api/datasets/{datasetId}/images/{hash}/labels ``` @@ -574,7 +574,7 @@ Returns annotations and class names for a specific image. #### Update Image Labels -``` +```http PUT /api/datasets/{datasetId}/images/{hash}/labels ``` @@ -594,13 +594,13 @@ PUT /api/datasets/{datasetId}/images/{hash}/labels Move images between splits (train/val/test) within a dataset: -``` +```http PATCH /api/datasets/{datasetId}/images/bulk ``` Bulk delete images: -``` +```http DELETE /api/datasets/{datasetId}/images/bulk ``` @@ -612,7 +612,7 @@ Organize your models into projects. Each model belongs to one project. See [Proj ### List Projects -``` +```http GET /api/projects ``` @@ -626,13 +626,13 @@ GET /api/projects ### Get Project -``` +```http GET /api/projects/{projectId} ``` ### Create Project -``` +```http POST /api/projects ``` @@ -659,13 +659,13 @@ POST /api/projects ### Update Project -``` +```http PATCH /api/projects/{projectId} ``` ### Delete Project -``` +```http DELETE /api/projects/{projectId} ``` @@ -673,7 +673,7 @@ Soft-deletes the project (moved to [trash](../account/trash.md)). ### Clone Project -``` +```http POST /api/projects/{projectId}/clone ``` @@ -681,13 +681,13 @@ POST /api/projects/{projectId}/clone Upload a project icon (multipart form with image file): -``` +```http POST /api/projects/{projectId}/icon ``` Remove the project icon: -``` +```http DELETE /api/projects/{projectId}/icon ``` @@ -699,7 +699,7 @@ Manage trained YOLO models โ€” view metrics, download weights, run inference, an ### List Models -``` +```http GET /api/models ``` @@ -714,7 +714,7 @@ GET /api/models ### List Completed Models -``` +```http GET /api/models/completed ``` @@ -722,13 +722,13 @@ Returns models that have finished training (for use in model selectors and deplo ### Get Model -``` +```http GET /api/models/{modelId} ``` ### Create Model -``` +```http POST /api/models ``` @@ -748,19 +748,19 @@ POST /api/models ### Update Model -``` +```http PATCH /api/models/{modelId} ``` ### Delete Model -``` +```http DELETE /api/models/{modelId} ``` ### Download Model Files -``` +```http GET /api/models/{modelId}/files ``` @@ -768,7 +768,7 @@ Returns signed download URLs for model files. ### Clone Model -``` +```http POST /api/models/{modelId}/clone ``` @@ -794,7 +794,7 @@ Clone a public model to one of your projects. ### Track Download -``` +```http POST /api/models/{modelId}/track-download ``` @@ -802,7 +802,7 @@ Track model download analytics. ### Run Inference -``` +```http POST /api/models/{modelId}/predict ``` @@ -863,7 +863,7 @@ POST /api/models/{modelId}/predict ### Get Predict Token -``` +```http POST /api/models/{modelId}/predict/token ``` @@ -871,7 +871,7 @@ Get a short-lived token for direct prediction requests. The token bypasses the A ### Warmup Model -``` +```http POST /api/models/{modelId}/predict/warmup ``` @@ -895,7 +895,7 @@ graph LR ### Start Training -``` +```http POST /api/training/start ``` @@ -947,7 +947,7 @@ POST /api/training/start ### Get Training Status -``` +```http GET /api/models/{modelId}/training ``` @@ -955,7 +955,7 @@ Returns the current training job status, metrics, and progress for a model. ### Cancel Training -``` +```http DELETE /api/models/{modelId}/training ``` @@ -965,7 +965,7 @@ Terminates the running compute instance and marks the job as cancelled. ## Deployments API -Deploy models to dedicated inference endpoints with auto-scaling, health checks, and monitoring. See [Endpoints documentation](../deploy/endpoints.md). +Deploy models to dedicated inference endpoints with health checks and monitoring. New deployments use scale-to-zero by default, and the API accepts an optional `resources` object. See [Endpoints documentation](../deploy/endpoints.md). ```mermaid graph LR @@ -980,7 +980,7 @@ graph LR ### List Deployments -``` +```http GET /api/deployments ``` @@ -995,7 +995,7 @@ GET /api/deployments ### Create Deployment -``` +```http POST /api/deployments ``` @@ -1015,34 +1015,38 @@ POST /api/deployments } ``` -| Field | Type | Required | Description | -| ----------- | ------ | -------- | ------------------------------------------------------------------ | -| `modelId` | string | Yes | Model ID to deploy | -| `name` | string | Yes | Deployment name | -| `region` | string | Yes | Deployment region | -| `resources` | object | No | Resource configuration (cpu, memoryGi, minInstances, maxInstances) | +| Field | Type | Required | Description | +| ----------- | ------ | -------- | -------------------------------------------------------------------------- | +| `modelId` | string | Yes | Model ID to deploy | +| `name` | string | Yes | Deployment name | +| `region` | string | Yes | Deployment region | +| `resources` | object | No | Resource configuration (`cpu`, `memoryGi`, `minInstances`, `maxInstances`) | Creates a dedicated inference endpoint in the specified region. The endpoint is globally accessible via a unique URL. +!!! note "Default Resources" + + The deployment dialog currently submits fixed defaults of `cpu=1`, `memoryGi=2`, `minInstances=0`, and `maxInstances=1`. The API route accepts a `resources` object, but plan limits cap `minInstances` at `0` and `maxInstances` at `1`. + !!! tip "Region Selection" Choose a region close to your users for lowest latency. The platform UI shows latency estimates for all 43 available regions. ### Get Deployment -``` +```http GET /api/deployments/{deploymentId} ``` ### Delete Deployment -``` +```http DELETE /api/deployments/{deploymentId} ``` ### Start Deployment -``` +```http POST /api/deployments/{deploymentId}/start ``` @@ -1050,7 +1054,7 @@ Resume a stopped deployment. ### Stop Deployment -``` +```http POST /api/deployments/{deploymentId}/stop ``` @@ -1058,7 +1062,7 @@ Pause a running deployment (stops billing). ### Health Check -``` +```http GET /api/deployments/{deploymentId}/health ``` @@ -1066,7 +1070,7 @@ Returns the health status of the deployment endpoint. ### Run Inference on Deployment -``` +```http POST /api/deployments/{deploymentId}/predict ``` @@ -1083,7 +1087,7 @@ Send an image directly to a deployment endpoint for inference. Functionally equi ### Get Metrics -``` +```http GET /api/deployments/{deploymentId}/metrics ``` @@ -1098,7 +1102,7 @@ Returns request counts, latency, and error rate metrics with sparkline data. ### Get Logs -``` +```http GET /api/deployments/{deploymentId}/logs ``` @@ -1116,7 +1120,7 @@ GET /api/deployments/{deploymentId}/logs ### Aggregated Metrics -``` +```http GET /api/monitoring ``` @@ -1130,7 +1134,7 @@ Convert models to optimized formats like ONNX, TensorRT, CoreML, and TFLite for ### List Exports -``` +```http GET /api/exports ``` @@ -1144,7 +1148,7 @@ GET /api/exports ### Create Export -``` +```http POST /api/exports ``` @@ -1202,19 +1206,19 @@ POST /api/exports ### Get Export Status -``` +```http GET /api/exports/{exportId} ``` ### Cancel Export -``` +```http DELETE /api/exports/{exportId} ``` ### Track Export Download -``` +```http POST /api/exports/{exportId}/track-download ``` @@ -1224,9 +1228,13 @@ POST /api/exports/{exportId}/track-download View a feed of recent actions on your account โ€” training runs, uploads, and more. See [Activity documentation](../account/activity.md). +!!! note "Browser Session Only" + + The Activity routes are powered by browser-authenticated requests from the platform UI. They are not exposed as a public API, do not accept API-key authentication, and the route shapes below are documented only for reference. Use the Activity feed in the platform UI to view, mark, or archive events. + ### List Activity -``` +```http GET /api/activity ``` @@ -1241,7 +1249,7 @@ GET /api/activity ### Mark Events Seen -``` +```http POST /api/activity/mark-seen ``` @@ -1263,7 +1271,7 @@ Or pass specific IDs: ### Archive Events -``` +```http POST /api/activity/archive ``` @@ -1293,7 +1301,7 @@ View and restore deleted items. Items are permanently removed after 30 days. See ### List Trash -``` +```http GET /api/trash ``` @@ -1308,7 +1316,7 @@ GET /api/trash ### Restore Item -``` +```http POST /api/trash ``` @@ -1323,7 +1331,7 @@ POST /api/trash ### Permanently Delete Item -``` +```http DELETE /api/trash ``` @@ -1342,12 +1350,16 @@ DELETE /api/trash ### Empty Trash -``` +```http DELETE /api/trash/empty ``` Permanently deletes all items in trash. +!!! note "Authentication" + + `DELETE /api/trash/empty` requires an authenticated browser session and is not available via API key. Use the **Empty Trash** button in the UI instead. + --- ## Billing API @@ -1360,7 +1372,7 @@ Check your credit balance, purchase credits, view transaction history, and confi ### Get Balance -``` +```http GET /api/billing/balance ``` @@ -1385,7 +1397,7 @@ GET /api/billing/balance ### Get Usage Summary -``` +```http GET /api/billing/usage-summary ``` @@ -1393,7 +1405,7 @@ Returns plan details, limits, and usage metrics. ### Get Transactions -``` +```http GET /api/billing/transactions ``` @@ -1407,7 +1419,7 @@ Returns transaction history (most recent first). ### Create Checkout Session -``` +```http POST /api/billing/checkout-session ``` @@ -1429,7 +1441,7 @@ Creates a checkout session for credit purchase. ### Create Subscription Checkout -``` +```http POST /api/billing/subscription-checkout ``` @@ -1451,13 +1463,21 @@ Creates a checkout session for Pro subscription upgrade. | `billingCycle` | string | No | Billing cycle: `monthly` (default) or `yearly` | | `owner` | string | No | Team username for workspace upgrades (requires admin role) | -### Create Portal Session +### Cancel or Resume Subscription -``` -POST /api/billing/portal-session +```http +DELETE /api/billing/subscription-checkout ``` -Returns URL to billing portal for subscription management. +Cancels a Pro subscription at period end by default. Send `{"resume": true}` to resume an already scheduled cancellation before the billing period ends. + +**Body:** + +```json +{ + "resume": true +} +``` ### Auto Top-Up @@ -1465,7 +1485,7 @@ Automatically add credits when balance falls below a threshold. #### Get Auto Top-Up Config -``` +```http GET /api/billing/auto-topup ``` @@ -1477,7 +1497,7 @@ GET /api/billing/auto-topup #### Update Auto Top-Up Config -``` +```http PATCH /api/billing/auto-topup ``` @@ -1495,13 +1515,13 @@ PATCH /api/billing/auto-topup #### List Payment Methods -``` +```http GET /api/billing/payment-methods ``` #### Create Setup Intent -``` +```http POST /api/billing/payment-methods/setup ``` @@ -1509,7 +1529,7 @@ Returns a client secret for adding a new payment method. #### Set Default Payment Method -``` +```http POST /api/billing/payment-methods/default ``` @@ -1523,7 +1543,7 @@ POST /api/billing/payment-methods/default #### Update Billing Info -``` +```http PATCH /api/billing/payment-methods ``` @@ -1544,7 +1564,7 @@ PATCH /api/billing/payment-methods #### Delete Payment Method -``` +```http DELETE /api/billing/payment-methods/{id} ``` @@ -1556,7 +1576,7 @@ Check your storage usage breakdown by category (datasets, models, exports) and s ### Get Storage Info -``` +```http GET /api/storage ``` @@ -1605,7 +1625,7 @@ GET /api/storage ### Recalculate Storage -``` +```http POST /api/storage ``` @@ -1619,7 +1639,7 @@ Upload files directly to cloud storage using signed URLs for fast, reliable tran ### Get Signed Upload URL -``` +```http POST /api/upload/signed-url ``` @@ -1659,7 +1679,7 @@ Request a signed URL for uploading a file directly to cloud storage. The signed ### Complete Upload -``` +```http POST /api/upload/complete ``` @@ -1685,13 +1705,13 @@ Manage your API keys for programmatic access. See [API Keys documentation](../ac ### List API Keys -``` +```http GET /api/api-keys ``` ### Create API Key -``` +```http POST /api/api-keys ``` @@ -1705,7 +1725,7 @@ POST /api/api-keys ### Delete API Key -``` +```http DELETE /api/api-keys ``` @@ -1731,13 +1751,13 @@ Create team workspaces, invite members, and manage roles for collaboration. See ### List Teams -``` +```http GET /api/teams ``` ### Create Team -``` +```http POST /api/teams/create ``` @@ -1752,7 +1772,7 @@ POST /api/teams/create ### List Members -``` +```http GET /api/members ``` @@ -1760,7 +1780,7 @@ Returns members of the current workspace. ### Invite Member -``` +```http POST /api/members ``` @@ -1785,19 +1805,19 @@ POST /api/members ### Update Member Role -``` +```http PATCH /api/members/{userId} ``` ### Remove Member -``` +```http DELETE /api/members/{userId} ``` ### Transfer Ownership -``` +```http POST /api/members/transfer-ownership ``` @@ -1805,13 +1825,13 @@ POST /api/members/transfer-ownership #### Accept Invite -``` +```http POST /api/invites/accept ``` #### Get Invite Info -``` +```http GET /api/invites/info ``` @@ -1823,13 +1843,13 @@ GET /api/invites/info #### Revoke Invite -``` +```http DELETE /api/invites/{inviteId} ``` #### Resend Invite -``` +```http POST /api/invites/{inviteId}/resend ``` @@ -1841,7 +1861,7 @@ Search and browse public datasets and projects shared by the community. See [Exp ### Search Public Content -``` +```http GET /api/explore/search ``` @@ -1856,7 +1876,7 @@ GET /api/explore/search ### Sidebar Data -``` +```http GET /api/explore/sidebar ``` @@ -1870,7 +1890,7 @@ Manage your profile, API keys, storage usage, and data privacy settings. See [Se ### Get User by Username -``` +```http GET /api/users ``` @@ -1882,7 +1902,7 @@ GET /api/users ### Follow or Unfollow User -``` +```http PATCH /api/users ``` @@ -1897,7 +1917,7 @@ PATCH /api/users ### Check Username Availability -``` +```http GET /api/username/check ``` @@ -1910,7 +1930,7 @@ GET /api/username/check ### Settings -``` +```http GET /api/settings POST /api/settings ``` @@ -1919,7 +1939,7 @@ Get or update user profile settings (display name, bio, social links, etc.). ### Profile Icon -``` +```http POST /api/settings/icon DELETE /api/settings/icon ``` @@ -1928,7 +1948,7 @@ Upload or remove profile avatar. ### Onboarding -``` +```http POST /api/onboarding ``` @@ -1942,7 +1962,7 @@ Request an export of all your data or permanently delete your account. See [Sett ### Get GDPR Job Status -``` +```http GET /api/gdpr ``` @@ -1956,7 +1976,7 @@ Returns job status. For completed export jobs, response includes a `downloadUrl` ### Start Export or Delete Flow -``` +```http POST /api/gdpr ``` @@ -2022,7 +2042,7 @@ yolo check !!! warning "Package Version Requirement" - Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform. + Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform. ### Authentication @@ -2163,7 +2183,7 @@ Webhooks notify your server of Platform events via HTTP POST callbacks: **All plans**: Training webhooks via the Python SDK (real-time metrics, completion notifications) work automatically on every plan -- no configuration required. - **Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. [Contact sales](https://www.ultralytics.com/contact) for details. + **Enterprise only**: Custom webhook endpoints that send HTTP POST callbacks to your own server URL require an Enterprise plan. See [Ultralytics Licensing](https://www.ultralytics.com/license) for details. --- diff --git a/docs/en/platform/data/annotation.md b/docs/en/platform/data/annotation.md index d9eea4cbee..b30ac869f9 100644 --- a/docs/en/platform/data/annotation.md +++ b/docs/en/platform/data/annotation.md @@ -243,10 +243,9 @@ Smart annotation adds model-assisted annotation to the editor. In Smart mode, yo With a SAM model selected: 1. Enter edit mode and select `Smart` or press `S` -2. **Left-click** to add positive points (include this area) -3. **Right-click** to add negative points (exclude this area) -4. SAM generates a precise mask in real-time -5. Press `Enter` or `Escape` to save the annotation, or enable **auto-apply** for one-click workflows +2. Click on the object you want to annotate โ€” SAM generates an initial mask in real-time +3. Refine the mask with additional clicks: click **outside** the current mask to add coverage, or click **inside** the current mask to subtract regions +4. Press `Enter` or `Escape` to save the annotation, or enable **auto-apply** for one-click workflows ![Ultralytics Platform Annotate Sam Positive Negative Points Mask](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-positive-negative-points-mask.avif) @@ -269,14 +268,13 @@ graph LR !!! tip "SAM Tips" - - Start with a positive click on the object center - - Add negative clicks to exclude background - - Hold `Alt`/`Option` to invert click behavior (left-click becomes negative, right-click becomes positive) - - Enable **auto-apply** (`A`) for one-click annotation โ€” the mask saves automatically after each click + - Start with a click on the object center + - Click again outside the mask to expand coverage + - Click inside the mask to subtract unwanted regions + - Enable **auto-apply** (`A`) for one-click annotation - Hold `Shift` while auto-apply is on to place multiple points before the mask is applied - - Positive and negative points appear as square markers with `+` and `โˆ’` symbols on the canvas - Works best for distinct objects with clear edges - - Use 2-3 positive points for elongated objects + - Use a few refinement clicks for elongated or overlapping objects SAM smart annotation can generate: @@ -292,11 +290,11 @@ SAM smart annotation can generate: Auto-apply mode speeds up Smart annotation by automatically saving the SAM mask after each click โ€” no need to press `Enter`. Toggle it with the auto-apply button in the toolbar or press `A`. -| Mode | Behavior | -| --------------------------- | ---------------------------------------------------- | -| **Auto-apply ON** (default) | Mask applies automatically after each click | -| **Auto-apply ON + `Shift`** | Place multiple points first, mask applies on release | -| **Auto-apply OFF** | Place points freely, press `Enter` to apply | +| Mode | Behavior | +| ---------------------------- | ---------------------------------------------------- | +| **Auto-apply ON** | Mask applies automatically after each click | +| **Auto-apply ON + `Shift`** | Place multiple points first, mask applies on release | +| **Auto-apply OFF** (default) | Place points freely, press `Enter` to apply | ![Ultralytics Platform Annotate Sam Auto Apply Toggle](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-auto-apply-toggle.avif) @@ -308,13 +306,13 @@ Auto-apply mode speeds up Smart annotation by automatically saving the SAM mask When Smart mode is active, a model picker appears in the toolbar. Five SAM models are available โ€” choose based on the speed vs. accuracy trade-off that suits your dataset: -| Model | Size | Speed | Notes | -| ----------------- | ------- | -------- | ------------------------ | -| **SAM 2.1 Tiny** | 74.5 MB | Fastest | | -| **SAM 2.1 Small** | 88 MB | Fast | Default | -| **SAM 2.1 Base** | 154 MB | Moderate | | -| **SAM 2.1 Large** | 428 MB | Slower | Most accurate of SAM 2.1 | -| **SAM 3** | 3.45 GB | Slowest | Latest generation | +| Model | Size | Speed | Notes | +| ----------------- | ------- | -------- | -------------------------- | +| **SAM 2.1 Tiny** | 74.5 MB | Fastest | | +| **SAM 2.1 Small** | 88 MB | Fast | | +| **SAM 2.1 Base** | 154 MB | Moderate | | +| **SAM 2.1 Large** | 428 MB | Slower | Most accurate of SAM 2.1 | +| **SAM 3** | 3.45 GB | Slowest | Default, latest generation | ![Ultralytics Platform Annotate Sam Model Selector](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-annotate-sam-model-selector.avif) @@ -382,7 +380,7 @@ In edit mode, a crosshair overlay tracks the cursor position and displays pixel ## SAM Hover Preview -In Smart mode for **segment** tasks, SAM provides a real-time mask preview as you hover over the image โ€” before clicking any points. This lets you see the predicted segmentation boundary and decide where to click. Once you add positive or negative points, the preview updates to reflect your refinements. +In Smart mode, SAM provides a real-time hover preview before you click any points. This preview is available for **detect**, **segment**, and **OBB** tasks. Once you add refinement clicks, the preview updates to reflect the current mask and the annotation type for the active task. ## Polygon Vertex Editing @@ -436,18 +434,22 @@ Efficient annotation with keyboard shortcuts: === "General" - | Shortcut | Action | - | ---------------------- | -------------------------- | - | `Cmd/Ctrl+S` | Save annotations | - | `Cmd/Ctrl+Z` | Undo | - | `Cmd/Ctrl+Shift+Z` | Redo | - | `Cmd/Ctrl+Y` | Redo (alternative) | - | `Escape` | Save / Deselect / Exit | - | `Delete` / `Backspace` | Delete selected annotation | - | `1-9` | Select class 1-9 | - | `Cmd/Ctrl+Scroll` | Zoom in/out | - | `Shift+Click` | Multi-select annotations | - | `Cmd/Ctrl+A` | Select all annotations | + | Shortcut | Action | + | ----------------------------- | ---------------------------- | + | `Cmd/Ctrl+S` | Save annotations | + | `Cmd/Ctrl+Z` | Undo | + | `Cmd/Ctrl+Shift+Z` | Redo | + | `Cmd/Ctrl+Y` | Redo (alternative) | + | `Escape` | Save / Deselect / Exit | + | `Delete` / `Backspace` | Delete selected annotation | + | `1-9` | Select class 1-9 | + | `Cmd/Ctrl+Scroll` | Zoom in/out | + | `Cmd/Ctrl++` or `Cmd/Ctrl+=` | Zoom in | + | `Cmd/Ctrl+-` | Zoom out | + | `Cmd/Ctrl+0` | Reset to fit | + | `Space+Drag` | Pan canvas when zoomed | + | `Shift+Click` | Multi-select annotations | + | `Cmd/Ctrl+A` | Select all annotations | === "Modes" @@ -458,15 +460,15 @@ Efficient annotation with keyboard shortcuts: === "Drawing" - | Shortcut | Action | - | -------------- | --------------------------------------------------------- | - | `Click+Drag` | Draw bounding box (detect/OBB) | - | `Click` | Add polygon point (segment) / Place skeleton (pose) | - | `Right-click` | Complete polygon / Add SAM negative point | - | `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) | - | `A` | Toggle auto-apply (Smart mode) | - | `Enter` | Complete polygon / Confirm pose / Save SAM annotation | - | `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit | + | Shortcut | Action | + | ------------------------------- | ----------------------------------------------------------- | + | `Click+Drag` | Draw bounding box (detect/OBB) | + | `Click` | Add polygon point (segment) / Place skeleton (pose) | + | `Right-click` | Complete polygon / Add SAM negative point | + | `Shift` + `click`/`right-click` | Place multiple SAM points before applying (auto-apply on) | + | `A` | Toggle auto-apply (Smart mode) | + | `Enter` | Complete polygon / Confirm pose / Save SAM annotation | + | `Escape` | Cancel pose / Save SAM annotation / Deselect / Exit | === "Arrange (Z-Order)" @@ -526,7 +528,7 @@ SAM provides high-quality masks for most objects. Accuracy depends on: - Image quality and resolution - Number of positive/negative points provided -For best results, start with a positive point on the object center and add negative points to exclude nearby objects. +For best results, start with a click on the object center, then use outside-mask clicks to add coverage and inside-mask clicks to subtract nearby objects or background. ### Can I import existing annotations? @@ -552,7 +554,7 @@ Yes, but for best results: ### Which SAM model should I use? -Start with **SAM 2.1 Small** (the default) โ€” it's fast and accurate for most objects. Switch to **SAM 2.1 Large** when you need higher mask precision on complex shapes. Use **SAM 2.1 Tiny** for maximum speed on simple, high-contrast objects. **SAM 3** is the latest generation model and may produce better results on challenging images, but is significantly slower. +**SAM 3** is the default and the latest generation model โ€” start there for the highest quality masks. Switch to **SAM 2.1 Small** for a faster interactive workflow on common objects, or **SAM 2.1 Large** when you need higher mask precision on complex shapes. Use **SAM 2.1 Tiny** for maximum speed on simple, high-contrast objects. ### Which tasks support SAM smart annotation? diff --git a/docs/en/platform/data/datasets.md b/docs/en/platform/data/datasets.md index ff4abae718..9241b7e479 100644 --- a/docs/en/platform/data/datasets.md +++ b/docs/en/platform/data/datasets.md @@ -6,7 +6,7 @@ keywords: Ultralytics Platform, datasets, dataset management, dataset versioning # Datasets -[Ultralytics Platform](https://platform.ultralytics.com) datasets provide a streamlined solution for managing your training data. Once uploaded, datasets can be immediately used for model training, with automatic processing and statistics generation. +[Ultralytics Platform](https://platform.ultralytics.com) datasets provide a streamlined solution for managing your training data. After upload, the platform processes images, labels, and statistics automatically. A dataset is ready to train once processing has completed and it has at least one image in the `train` split, at least one image in either the `val` or `test` split, and at least one labeled image. ## Upload Dataset @@ -58,7 +58,7 @@ Ultralytics Platform accepts multiple upload formats for flexibility. ### Preparing Your Dataset -The Platform supports two annotation formats plus raw uploads: [Ultralytics YOLO](../../datasets/detect/index.md#ultralytics-yolo-format), [COCO](https://cocodataset.org/#format-data), and raw (unannotated images): +The Platform supports [Ultralytics YOLO](../../datasets/detect/index.md#ultralytics-yolo-format), [COCO](https://cocodataset.org/#format-data), [Ultralytics NDJSON](../../datasets/detect/index.md#ultralytics-ndjson-format), and raw (unannotated) uploads: === "YOLO Format" @@ -125,17 +125,43 @@ The Platform supports two annotation formats plus raw uploads: [Ultralytics YOLO COCO annotations are automatically converted during upload. Detection (`bbox`), segmentation (`segmentation` polygons), and pose (`keypoints`) tasks are supported. Category IDs are remapped to a dense 0-indexed sequence across all annotation files. For converting between formats, see [format conversion tools](../../datasets/detect/index.md#port-or-convert-label-formats). +=== "Classification Layouts" + + Classification uploads are auto-detected from common folder layouts: + + ``` + split/class/image.jpg + class/split/image.jpg + class/image.jpg + ``` + + Example: + + ``` + my-classify-dataset/ + โ”œโ”€โ”€ train/ + โ”‚ โ”œโ”€โ”€ cats/ + โ”‚ โ””โ”€โ”€ dogs/ + โ””โ”€โ”€ val/ + โ”œโ”€โ”€ cats/ + โ””โ”€โ”€ dogs/ + ``` + +=== "NDJSON" + + Ultralytics NDJSON exports can be uploaded directly back into Platform. This is useful for moving datasets between workspaces while preserving metadata, classes, splits, and annotations. + !!! tip "Raw Uploads" **Raw**: Upload unannotated images (no labels). Useful when you plan to annotate directly on the platform using the [annotation editor](annotation.md). !!! tip "Flat Directory Structure" - You can also upload images without the train/val folder structure. Images uploaded without split folders are assigned to the `train` split by default. You can reassign them later using the bulk move-to-split feature. + You can also upload images without explicit split folders. Platform respects the active split target during upload, and for non-classify datasets it may automatically create a validation split from part of the training set when no split information is provided. You can always reassign images later with bulk move-to-split or split redistribution. !!! tip "Format Auto-Detection" - The format is detected automatically: datasets with a `data.yaml` containing `names`, `train`, or `val` keys are treated as YOLO. Datasets with COCO JSON files (containing `images`, `annotations`, and `categories` arrays) are treated as COCO. Datasets with only images and no annotations are treated as raw. + The format is detected automatically: datasets with a `data.yaml` containing `names`, `train`, or `val` keys are treated as YOLO. Datasets with COCO JSON files (containing `images`, `annotations`, and `categories` arrays) are treated as COCO. `.ndjson` exports are imported as Ultralytics NDJSON. Datasets with only images and no annotations are treated as raw. For task-specific format details, see [supported tasks](index.md#supported-tasks) and the [Datasets Overview](../../datasets/index.md). @@ -242,7 +268,9 @@ Click any image to open the fullscreen viewer with: - **Edit**: Enter annotation mode to add or modify labels - **Download**: Download the original image file - **Delete**: Delete the image from the dataset -- **Zoom**: `Cmd/Ctrl+Scroll` to zoom in/out +- **Zoom**: `Cmd/Ctrl+Scroll`, `Cmd/Ctrl++`, or `Cmd/Ctrl+=` to zoom in, and `Cmd/Ctrl+-` to zoom out +- **Reset view**: `Cmd/Ctrl + 0` or the reset button to fit the image to the viewer +- **Pan**: Hold `Space` and drag to pan the canvas when zoomed - **Pixel view**: Toggle pixelated rendering for close inspection ![Ultralytics Platform Datasets Fullscreen Viewer With Metadata Panel](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-datasets-fullscreen-viewer-with-metadata-panel.avif) @@ -259,7 +287,7 @@ Filter images by their dataset split: ## Dataset Tabs -Each dataset page has six tabs accessible from the tab bar: +Each dataset page can show up to six tabs, depending on the dataset state and your permissions: ### Images Tab @@ -267,6 +295,8 @@ The default view showing the image gallery with annotation overlays. Supports gr ### Classes Tab +This tab appears when the dataset has images. + Manage annotation classes for your dataset: - **Class histogram**: Bar chart showing annotation count per class with linear/log scale toggle @@ -283,6 +313,8 @@ Manage annotation classes for your dataset: ### Charts Tab +This tab appears when the dataset has images. + Automatic statistics computed from your dataset: | Chart | Description | @@ -324,6 +356,8 @@ View all models trained on this dataset in a searchable table: ### Errors Tab +This tab appears only when one or more files fail processing. + Images that failed processing are listed here with: - **Error banner**: Total count of failed images and guidance @@ -360,10 +394,15 @@ To create a version: 1. Open the **Versions** tab 2. Optionally enter a description (e.g., "Added 500 training images" or "Fixed mislabeled classes") 3. Click **+ New Version** -4. The NDJSON snapshot is generated and downloads automatically +4. The new version appears in the table +5. Download the version separately from the table when needed Each version is numbered sequentially (v1, v2, v3...) and stored permanently. You can download any previous version at any time from the versions table. +!!! note "Ready Datasets Only" + + Version creation is available after the dataset reaches `ready` status. + !!! tip "When to Create Versions" Create a version before and after major changes to your dataset โ€” adding images, fixing annotations, or rebalancing splits. This lets you compare model performance across different dataset states. @@ -374,20 +413,13 @@ Each version is numbered sequentially (v1, v2, v3...) and stored permanently. Yo ## Export Dataset -Export your dataset for offline use. The Platform supports multiple export formats: - -| Format | Description | -| -------------- | -------------------------------------------------- | -| **YOLO** | Standard YOLO format with images and `.txt` labels | -| **COCO** | COCO JSON format with annotation arrays | -| **Pascal VOC** | XML annotation files per image | -| **NDJSON** | One JSON object per line (lightweight metadata) | +Export your dataset for offline use with an NDJSON download from the dataset header or the Versions tab. To export: 1. Click the **Export** button in the dataset header -2. Select the desired format -3. The export job runs asynchronously โ€” you'll be notified when the download is ready +2. Download the current NDJSON snapshot directly +3. Use the **Versions** tab when you want an immutable numbered snapshot you can re-download later ![Ultralytics Platform Datasets Export Ndjson Download](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-datasets-export-ndjson-download.avif) @@ -633,7 +665,7 @@ Use the bulk move-to-split feature: ### What label formats are supported? -Ultralytics Platform supports two annotation formats for upload: +Ultralytics Platform supports YOLO labels, COCO JSON, Ultralytics NDJSON, and raw image uploads: === "YOLO Format" @@ -653,6 +685,10 @@ Ultralytics Platform supports two annotation formats for upload: JSON files with `images`, `annotations`, and `categories` arrays. Supports detection (`bbox`), segmentation (polygon), and pose (`keypoints`) tasks. COCO uses absolute pixel coordinates which are automatically converted to normalized format during upload. +=== "NDJSON" + + Ultralytics NDJSON exports can be re-imported into Platform. This is the most complete way to move dataset metadata, splits, and annotations between workspaces. + ### Can I annotate the same dataset for multiple task types? Yes. Each image stores annotations for all 5 task types (detect, segment, pose, OBB, classify) together. You can switch the dataset's active task type at any time without losing existing annotations. Only annotations matching the active task type are shown in the editor and included in exports and training โ€” annotations for other tasks are preserved and reappear when you switch back. diff --git a/docs/en/platform/data/index.md b/docs/en/platform/data/index.md index 0b1ab78b38..abfef237cd 100644 --- a/docs/en/platform/data/index.md +++ b/docs/en/platform/data/index.md @@ -65,7 +65,7 @@ Ultralytics Platform supports all 5 YOLO task types: !!! info "Task Type Selection" - The task type is set when creating a dataset and determines which annotation tools are available. You can change it later from the dataset settings, but incompatible annotations won't be displayed after switching. + The task type is set when creating a dataset and determines which annotation tools are available. You can change it later from the dataset header task selector, but incompatible annotations won't be displayed after switching. ## Key Features @@ -102,7 +102,7 @@ Create immutable NDJSON snapshots of your dataset for reproducible training. Eac ### Dataset Tabs -Every dataset page provides six tabs: +Dataset pages can show up to six tabs, depending on the dataset state and your permissions: | Tab | Description | | ------------ | ---------------------------------------------------------------------------- | @@ -113,6 +113,8 @@ Every dataset page provides six tabs: | **Versions** | Create and download immutable NDJSON snapshots for reproducible training | | **Errors** | Images that failed processing with error details and fix guidance | +`Classes` and `Charts` appear when the dataset has images. `Errors` appears only when processing failures exist. `Versions` appears for owners, or for non-owners when versions already exist. + ### Statistics and Visualization The `Charts` tab provides automatic analysis including: diff --git a/docs/en/platform/deploy/endpoints.md b/docs/en/platform/deploy/endpoints.md index d88b904737..c6a4afeacb 100644 --- a/docs/en/platform/deploy/endpoints.md +++ b/docs/en/platform/deploy/endpoints.md @@ -1,12 +1,12 @@ --- comments: true -description: Deploy YOLO models to dedicated endpoints in 43 global regions with auto-scaling and monitoring on Ultralytics Platform. +description: Deploy YOLO models to dedicated endpoints in 43 global regions with scale-to-zero behavior and monitoring on Ultralytics Platform. keywords: Ultralytics Platform, deployment, endpoints, YOLO, production, scaling, global regions --- # Dedicated Endpoints -[Ultralytics Platform](https://platform.ultralytics.com) enables deployment of YOLO models to dedicated endpoints in 43 global regions. Each endpoint is a single-tenant service with auto-scaling, a unique endpoint URL, and independent monitoring. +[Ultralytics Platform](https://platform.ultralytics.com) enables deployment of YOLO models to dedicated endpoints in 43 global regions. Each endpoint is a single-tenant service with scale-to-zero behavior, a unique endpoint URL, and independent monitoring. ![Ultralytics Platform Model Deploy Tab With Region Map And Table](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/model-deploy-tab-with-region-map-and-table.avif) @@ -30,7 +30,7 @@ Create a deployment from the global `Deploy` page in the sidebar: 1. Click **New Deployment** 2. Select a model from the model selector 3. Select a region from the map or table -4. Optionally customize the deployment name and resources +4. Review the auto-generated deployment name (editable) and the default resources 5. Click **Deploy Model** ![Ultralytics Platform New Deployment Dialog With Model Selector And Region Map](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/new-deployment-dialog-with-model-selector-and-region-map.avif) @@ -157,12 +157,12 @@ The `New Deployment` dialog provides: | **Model** | Select from completed models | - | | **Region** | Deployment region | - | | **Deployment Name** | Auto-generated, editable | - | -| **CPU Cores** | CPU allocation (1-8) | 1 | -| **Memory (GB)** | Memory allocation (1-32 GB) | 2 | +| **CPU Cores** | Fixed default | 1 | +| **Memory (GB)** | Fixed default | 2 | ![Ultralytics Platform New Deployment Dialog Resources Panel Expanded](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/new-deployment-dialog-resources-panel-expanded.avif) -Resource settings are available under the collapsible **Resources** section. Deployments use scale-to-zero by default (min instances = 0, max instances = 1) โ€” you only pay for active inference time. +Deployments use fixed defaults of `1 CPU`, `2 GiB` memory, `minInstances = 0`, and `maxInstances = 1`. They scale to zero when idle, so you only pay for active inference time. !!! note "Auto-Generated Names" diff --git a/docs/en/platform/deploy/index.md b/docs/en/platform/deploy/index.md index 8045f6be65..a3807c2e64 100644 --- a/docs/en/platform/deploy/index.md +++ b/docs/en/platform/deploy/index.md @@ -26,7 +26,7 @@ The Deployment section helps you: - **Test** models directly in the browser with the `Predict` tab - **Deploy** to dedicated endpoints in 43 global regions - **Monitor** request metrics, logs, and health checks -- **Scale** automatically with traffic (including scale-to-zero) +- **Scale to zero** when idle (deployments currently run a single active instance) ![Ultralytics Platform Deploy Page World Map With Overview Cards](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/deploy-page-world-map-with-overview-cards.avif) @@ -54,12 +54,12 @@ graph LR style D fill:#9C27B0,color:#fff ``` -| Stage | Description | -| ------------- | ------------------------------------------------------------------------ | -| **Test** | Validate model with the [`Predict` tab](inference.md) | -| **Configure** | Select region, resources, and deployment name | -| **Deploy** | Create a dedicated endpoint from the [`Deploy` tab](endpoints.md) | -| **Monitor** | Track requests, latency, errors, and logs in [Monitoring](monitoring.md) | +| Stage | Description | +| ------------- | --------------------------------------------------------------------------- | +| **Test** | Validate model with the [`Predict` tab](inference.md) | +| **Configure** | Select region and deployment name (deployments use fixed default resources) | +| **Deploy** | Create a dedicated endpoint from the [`Deploy` tab](endpoints.md) | +| **Monitor** | Track requests, latency, errors, and logs in [Monitoring](monitoring.md) | ## Architecture @@ -100,8 +100,8 @@ Deploy to 43 regions worldwide on Ultralytics Cloud: Each endpoint is a single-tenant service with: -- Dedicated compute resources (configurable CPU and memory) -- Auto-scaling (scale-to-zero when idle) +- Default resources of `1 CPU`, `2 GiB` memory, `minInstances=0`, `maxInstances=1` +- Scale-to-zero when idle - Unique endpoint URL - Independent monitoring, logs, and health checks @@ -118,7 +118,7 @@ Access the global deployments page from the sidebar under `Deploy`. This page sh !!! info "Automatic Polling" - The page polls every 30 seconds for metric updates. When deployments are in a transitional state (creating, deploying, stopping), polling increases to every 2-3 seconds for near-instant feedback. + The page polls every 15 seconds normally. When deployments are in a transitional state (`creating`, `deploying`, or `stopping`), polling increases to every 3 seconds for faster feedback. ## Key Features @@ -130,12 +130,12 @@ Deploy close to your users with 43 regions covering: - Europe, Middle East, Africa - Asia Pacific, Oceania -### Auto-Scaling +### Scaling Behavior -Endpoints scale automatically: +Endpoints currently behave as follows: - **Scale to zero**: No cost when idle (default) -- **Scale up**: Handle traffic spikes automatically +- **Single active instance**: `maxInstances` is currently capped at `1` on all plans !!! tip "Cost Savings" @@ -189,7 +189,7 @@ Deploy a model in under 2 minutes: | ----------- | --------------- | ------------------------------------ | | **Latency** | Variable | Consistent | | **Cost** | Free (included) | Free (basic), usage-based (advanced) | -| **Scale** | Limited | Configurable | +| **Scale** | Limited | Scale-to-zero, single instance | | **Regions** | 3 | 43 | | **URL** | Generic | Custom | | **Rate** | 20 req/min | Unlimited | @@ -204,7 +204,7 @@ Dedicated endpoint deployment typically takes 1-2 minutes: ### Can I deploy multiple models? -Yes, each model can have multiple endpoints in different regions. There's no limit on total endpoints (subject to your plan). +Yes, each model can have multiple endpoints in different regions. Deployment counts are limited by plan: Free `3`, Pro `10`, Enterprise `unlimited`. ### What happens when an endpoint is idle? diff --git a/docs/en/platform/deploy/inference.md b/docs/en/platform/deploy/inference.md index c4729b2c84..94fb381a70 100644 --- a/docs/en/platform/deploy/inference.md +++ b/docs/en/platform/deploy/inference.md @@ -147,11 +147,11 @@ Authorization: Bearer YOUR_API_KEY !!! warning "API Key Required" - To run inference from your own scripts, notebooks, or apps, include an API key. Generate one in [`Settings`](../account/api-keys.md) (API Keys section on the Profile tab). + To run inference from your own scripts, notebooks, or apps, include an API key. Generate one in [`Settings > API Keys`](../account/api-keys.md). ### Endpoint -``` +```http POST https://platform.ultralytics.com/api/models/{modelId}/predict ``` diff --git a/docs/en/platform/deploy/monitoring.md b/docs/en/platform/deploy/monitoring.md index ec27cd1c16..e580034e3e 100644 --- a/docs/en/platform/deploy/monitoring.md +++ b/docs/en/platform/deploy/monitoring.md @@ -79,7 +79,7 @@ Below the overview cards, the deployments list shows all endpoints across your p !!! tip "Real-Time Updates" - The dashboard polls every 30 seconds for deployment status updates. When deployments are in a transitional state (creating, deploying), polling increases to every 3 seconds. Metric charts refresh every 60 seconds. Click the refresh button for immediate updates. + The dashboard polls every 15 seconds for deployment status updates. When deployments are in a transitional state (`creating`, `deploying`, or `stopping`), polling increases to every 3 seconds. Metric charts refresh every 60 seconds. Click the refresh button for immediate updates. ## Per-Deployment Metrics @@ -230,7 +230,7 @@ The `Predict` tab on each deployment card provides an inline predict panel โ€” t ### Monitoring Overview -``` +```http GET /api/monitoring ``` @@ -238,7 +238,7 @@ Returns aggregated metrics for all deployments owned by the authenticated user. ### Deployment Metrics -``` +```http GET /api/deployments/{deploymentId}/metrics?sparkline=true&range=24h ``` @@ -251,7 +251,7 @@ Returns sparkline data and summary metrics for a specific deployment. Refresh in ### Deployment Logs -``` +```http GET /api/deployments/{deploymentId}/logs?limit=50&severity=ERROR,WARNING ``` @@ -265,7 +265,7 @@ Returns recent log entries with optional severity filter and pagination. ### Deployment Health -``` +```http GET /api/deployments/{deploymentId}/health ``` diff --git a/docs/en/platform/explore.md b/docs/en/platform/explore.md index ea08a03d94..2176e5cfa0 100644 --- a/docs/en/platform/explore.md +++ b/docs/en/platform/explore.md @@ -343,12 +343,12 @@ Contact creators for commercial licensing. To report inappropriate content: 1. Navigate to the public page containing the content (project or dataset), if accessible -2. Click the **Feedback** button in the sidebar +2. Open the **Help** page from the sidebar 3. Select **General** as the feedback type 4. Describe the content and the issue, including a link to the page 5. Submit the report -If the content is no longer accessible, use the **Feedback** button from any page and include as much detail as possible (URL, username, or description). +If the content is no longer accessible, use the **Help** page from any page and include as much detail as possible (URL, username, or description). Our team reviews reports within 24-48 hours. diff --git a/docs/en/platform/index.md b/docs/en/platform/index.md index 91778b90e5..eb8bbb1dbc 100644 --- a/docs/en/platform/index.md +++ b/docs/en/platform/index.md @@ -68,7 +68,7 @@ graph LR | **Annotate** | Manual tools for all 5 task types, plus [Smart Annotation](data/annotation.md#smart-annotation) with SAM and YOLO models for detect, segment, and OBB (see [supported tasks](data/index.md#supported-tasks)) | | **Train** | Cloud GPUs (20 free + 3 Pro-exclusive), real-time metrics, project organization | | **Export** | [17 deployment formats](../modes/export.md) (ONNX, TensorRT, CoreML, TFLite, etc.; see [supported formats](train/models.md#supported-formats)) | -| **Deploy** | 43 global regions with dedicated endpoints, auto-scaling, monitoring | +| **Deploy** | 43 global regions with dedicated endpoints, scale-to-zero behavior, and monitoring | **What you can do:** @@ -146,7 +146,7 @@ You can train models either through the web UI (cloud training) or from your own ```bash # Install ultralytics - pip install "ultralytics>=8.4.14" + pip install "ultralytics>=8.4.35" # Set your API key export ULTRALYTICS_API_KEY="YOUR_API_KEY" @@ -177,7 +177,7 @@ You can train models either through the web UI (cloud training) or from your own ### Deployment - **Inference Testing**: Test models directly in the browser with custom images -- **Dedicated Endpoints**: Deploy to 43 global regions with auto-scaling +- **Dedicated Endpoints**: Deploy to 43 global regions with scale-to-zero behavior - **Monitoring**: Real-time metrics, request logs, and performance dashboards ```mermaid @@ -315,10 +315,10 @@ You can train models on your own hardware and stream real-time metrics to the pl !!! warning "Package Version Requirement" - Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform. + Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform. ```bash - pip install "ultralytics>=8.4.14" + pip install "ultralytics>=8.4.35" ``` === "CLI" @@ -473,4 +473,4 @@ See [Models Export](train/models.md#export-model), the [Export mode guide](../mo ??? question "Can I use Platform models commercially?" - Free and Pro plans use AGPL license. For commercial use without AGPL requirements, contact sales@ultralytics.com for Enterprise licensing. + Free and Pro plans use the AGPL license. For commercial use without AGPL requirements, see [Ultralytics Licensing](https://www.ultralytics.com/license). diff --git a/docs/en/platform/quickstart.md b/docs/en/platform/quickstart.md index bcd8dd99dd..441c10affd 100644 --- a/docs/en/platform/quickstart.md +++ b/docs/en/platform/quickstart.md @@ -19,21 +19,20 @@ keywords: Ultralytics Platform, Quickstart, YOLO models, dataset upload, model t Watch: Get Started with Ultralytics Platform - QuickStart

+The following interactive diagram outlines the four primary stages of the Ultralytics Platform workflow. Click any stage or sub-step to access detailed instructions for that section. + ```mermaid -journey - title Your First Model in 5 Minutes - section Sign Up - Create account: 5: User - Select region: 5: User - section Prepare Data - Upload dataset: 5: User - Review images: 4: User - section Train - Configure training: 5: User - Monitor progress: 3: Platform - section Deploy - Test model: 5: User - Deploy endpoint: 5: User +graph LR + A(Sign Up) --> B(Prepare Data) --> C(Train) --> D(Deploy) + A -.- A1["Create account
Select region"] + B -.- B1["Upload dataset
Create Project"] + C -.- C1["Configure training
Monitor progress"] + D -.- D1["Test model
Deploy endpoint"] + + click A "#get-started" + click B "#upload-your-first-dataset" + click C "#train-your-first-model" + click D "#deploy-to-production" ``` ## Get Started @@ -105,7 +104,7 @@ The sidebar provides access to all Platform sections: | | Deploy | Your active deployments | | **Bottom** | Trash | Deleted items (recoverable for 30 days) | | | Settings | Account, billing, and preferences | -| | Feedback | Send feedback to Ultralytics | +| | Help | Open help, docs, and feedback tools | ### Welcome Card @@ -352,7 +351,7 @@ graph LR Your endpoint will be ready in about a minute with: - **Unique URL**: HTTPS endpoint for API calls -- **Auto-Scaling**: Scales with traffic automatically +- **Scale-to-zero behavior**: No idle compute cost (deployments currently run a single active instance) - **Monitoring**: Request metrics and logs !!! info "Deployment Lifecycle" @@ -367,7 +366,7 @@ Read more about [endpoints](deploy/endpoints.md). If you prefer to train on your own hardware, you can stream metrics to the platform using your API key. This works like Weights & Biases โ€” train anywhere, monitor on the platform. -1. Generate an API key in [`Settings > Profile`](account/api-keys.md) (API Keys section) +1. Generate an API key in [`Settings > API Keys`](account/api-keys.md) 2. Set the environment variable and train with a `project/name` format: ```bash @@ -384,7 +383,7 @@ Read more about [API keys](account/api-keys.md), [dataset URIs](data/datasets.md ## Feedback & Help -Use the **Help** page in the sidebar footer to send feedback directly to Ultralytics. You can rate your experience, choose a feedback type (bug report, feature request, or general), and attach screenshots. +The **Help** page in the sidebar footer includes an in-app feedback form. You can rate your experience, choose a feedback type (bug, feature request, or general), and attach screenshots. If you need more help: diff --git a/docs/en/platform/train/cloud-training.md b/docs/en/platform/train/cloud-training.md index fd75d80620..6dc078d404 100644 --- a/docs/en/platform/train/cloud-training.md +++ b/docs/en/platform/train/cloud-training.md @@ -62,12 +62,12 @@ Choose a dataset to train on (see [Datasets](../data/datasets.md)): Set core training parameters: -| Parameter | Description | Default | -| -------------- | --------------------------------------------------------------------------- | ------- | -| **Epochs** | Number of training iterations | 100 | -| **Batch Size** | Samples per iteration | 16 | -| **Image Size** | Input resolution (320/416/512/640/1280 dropdown, or 32-4096 in YAML editor) | 640 | -| **Run Name** | Optional name for the training run | auto | +| Parameter | Description | Default | +| -------------- | --------------------------------------------------------------------------- | --------- | +| **Epochs** | Number of training iterations | 100 | +| **Batch Size** | Samples per iteration | -1 (auto) | +| **Image Size** | Input resolution (320/416/512/640/1280 dropdown, or 32-4096 in YAML editor) | 640 | +| **Run Name** | Optional name for the training run | auto | ### Step 4: Advanced Settings (Optional) @@ -169,11 +169,7 @@ Real-time GPU utilization, memory, temperature, CPU, and disk usage. ### Checkpoints -Checkpoints are saved automatically: - -- **Every epoch**: Latest weights saved -- **Best model**: Highest mAP checkpoint preserved -- **Final model**: Weights at training completion +After training completes, the **best model** (`best.pt`, the highest-mAP checkpoint) is uploaded to the platform and made available for download, export, and deployment. ## Cancel Training @@ -181,7 +177,7 @@ Click **Cancel Training** on the model page to stop a running job: - The compute instance is terminated - Credits stop being charged -- Checkpoints saved up to that point are preserved +- The best checkpoint remains available if it was reached before cancellation ## Remote Training @@ -201,7 +197,7 @@ Train on your own hardware while streaming metrics to the platform. !!! warning "Package Version Requirement" - Platform integration requires **ultralytics>=8.4.14**. Lower versions will NOT work with Platform. + Platform integration requires **ultralytics>=8.4.35**. Lower versions will NOT work with Platform. ```bash pip install -U ultralytics @@ -209,7 +205,7 @@ Train on your own hardware while streaming metrics to the platform. ### Setup API Key -1. Go to [`Settings > Profile`](../account/api-keys.md) (API Keys section) +1. Go to [`Settings > API Keys`](../account/api-keys.md) 2. Create a new key (or the platform auto-creates one when you open the Local Training tab) 3. Set the environment variable: diff --git a/docs/en/platform/train/index.md b/docs/en/platform/train/index.md index 33ef59cdbc..5bb557eb93 100644 --- a/docs/en/platform/train/index.md +++ b/docs/en/platform/train/index.md @@ -105,7 +105,7 @@ graph LR !!! info "Automatic Checkpoints" - The Platform automatically saves checkpoints at every epoch. The **best model** (highest mAP) and **final model** are always preserved. + For cloud training, the **best model** (`best.pt`, the highest-mAP checkpoint) is saved automatically and made available for download, export, and deployment after training completes. ## Quick Start diff --git a/docs/en/platform/train/models.md b/docs/en/platform/train/models.md index 8df7d14d7e..787a53e384 100644 --- a/docs/en/platform/train/models.md +++ b/docs/en/platform/train/models.md @@ -139,7 +139,7 @@ GPU and system metrics during training: Run interactive inference directly in the browser: -- Upload an image, paste a URL, or use webcam +- Upload an image, use example images, or use webcam - Results display with bounding boxes, masks, or keypoints - Auto-inference when an image is provided - Supports all task types ([detect](../../tasks/detect.md), [segment](../../tasks/segment.md), [pose](../../tasks/pose.md), [OBB](../../tasks/obb.md), [classify](../../tasks/classify.md)) @@ -269,11 +269,17 @@ Export jobs progress through the following statuses: Some export formats have architecture or task restrictions: -| Format | Restriction | -| ---------------- | --------------------------------------------------------------- | -| **IMX500** | Only available for YOLOv8 and YOLO11 models | -| **Axelera** | Not available for YOLO26 segmentation models | -| **PaddlePaddle** | Not available for YOLO26 detection/segmentation/pose/OBB models | +| Format | Restriction | +| ---------------- | ------------------------------------------------------- | +| **IMX500** | Available only for `YOLOv8n` and `YOLO11n` | +| **Axelera** | Detect models only | +| **PaddlePaddle** | Not available for YOLO26 detect/segment/pose/OBB models | + +!!! note "Additional Export Rules" + + - Classification exports do not include NMS. + - CoreML exports with batch sizes greater than `1` use `dynamic=true`. + - Unsupported format/model combinations are disabled in the export dialog before you launch. ## Clone Model diff --git a/docs/en/platform/train/projects.md b/docs/en/platform/train/projects.md index 061c191c9e..8c75f48c95 100644 --- a/docs/en/platform/train/projects.md +++ b/docs/en/platform/train/projects.md @@ -40,7 +40,7 @@ Enter your project details: - **Name**: A descriptive name for your project (a random name is auto-generated) - **Description**: Optional notes about the project purpose - **Visibility**: Public (anyone can view) or Private (only you can access) -- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements โ€” contact [sales@ultralytics.com](mailto:sales@ultralytics.com) for details. +- **License**: Optional license for your project (AGPL-3.0, Apache-2.0, MIT, GPL-3.0, BSD-3-Clause, LGPL-3.0, MPL-2.0, EUPL-1.1, Unlicense, Ultralytics-Enterprise, and more). The **Ultralytics-Enterprise** license is for commercial use without AGPL requirements and is available with an Enterprise plan โ€” see [Ultralytics Licensing](https://www.ultralytics.com/license). ![Ultralytics Platform New Project Dialog Name Visibility License](https://cdn.jsdelivr.net/gh/ultralytics/assets@main/docs/platform/platform-new-project-dialog-name-visibility-license.avif) diff --git a/docs/en/solutions/index.md b/docs/en/solutions/index.md index 40c730b8ca..a53bf9e32e 100644 --- a/docs/en/solutions/index.md +++ b/docs/en/solutions/index.md @@ -141,7 +141,7 @@ For more details, refer to the [`SolutionResults` class documentation](https://d Most of the Solutions can be used directly through the command-line interface, including: - `Count`, `Crop`, `Blur`, `Workout`, `Heatmap`, `Isegment`, `Visioneye`, `Speed`, `Queue`, `Analytics`, `Inference` + `Count`, `Crop`, `Blur`, `Workout`, `Heatmap`, `Isegment`, `Visioneye`, `Speed`, `Queue`, `Analytics`, `Inference`, `Trackzone` **Syntax** diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md index bf8706e405..aa71d9136f 100644 --- a/docs/en/tasks/obb.md +++ b/docs/en/tasks/obb.md @@ -66,7 +66,7 @@ Train YOLO26n-obb on the DOTA8 dataset for 100 [epochs](https://www.ultralytics. # Load a model model = YOLO("yolo26n-obb.yaml") # build a new model from YAML model = YOLO("yolo26n-obb.pt") # load a pretrained model (recommended for training) - model = YOLO("yolo26n-obb.yaml").load("yolo26n.pt") # build from YAML and transfer weights + model = YOLO("yolo26n-obb.yaml").load("yolo26n-obb.pt") # build from YAML and transfer weights # Train the model results = model.train(data="dota8.yaml", epochs=100, imgsz=640) diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md index 54ea14b443..795ba4440f 100644 --- a/docs/en/tasks/segment.md +++ b/docs/en/tasks/segment.md @@ -54,7 +54,7 @@ Train YOLO26n-seg on the COCO8-seg dataset for 100 [epochs](https://www.ultralyt # Load a model model = YOLO("yolo26n-seg.yaml") # build a new model from YAML model = YOLO("yolo26n-seg.pt") # load a pretrained model (recommended for training) - model = YOLO("yolo26n-seg.yaml").load("yolo26n.pt") # build from YAML and transfer weights + model = YOLO("yolo26n-seg.yaml").load("yolo26n-seg.pt") # build from YAML and transfer weights # Train the model results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640) diff --git a/docs/en/usage/callbacks.md b/docs/en/usage/callbacks.md index 76fff4bf3b..59489132be 100644 --- a/docs/en/usage/callbacks.md +++ b/docs/en/usage/callbacks.md @@ -87,22 +87,22 @@ Below are all the supported callbacks. For more details, refer to the callbacks ### Trainer Callbacks -| Callback | Description | -| --------------------------- | -------------------------------------------------------------------------------------------- | -| `on_pretrain_routine_start` | Triggered at the beginning of the pre-training routine. | -| `on_pretrain_routine_end` | Triggered at the end of the pre-training routine. | -| `on_train_start` | Triggered when the training starts. | -| `on_train_epoch_start` | Triggered at the start of each training [epoch](https://www.ultralytics.com/glossary/epoch). | -| `on_train_batch_start` | Triggered at the start of each training batch. | -| `optimizer_step` | Triggered during the optimizer step. | -| `on_before_zero_grad` | Triggered before gradients are zeroed. | -| `on_train_batch_end` | Triggered at the end of each training batch. | -| `on_train_epoch_end` | Triggered at the end of each training epoch. | -| `on_fit_epoch_end` | Triggered at the end of each fit epoch. | -| `on_model_save` | Triggered when the model is saved. | -| `on_train_end` | Triggered when the training process ends. | -| `on_params_update` | Triggered when model parameters are updated. | -| `teardown` | Triggered when the training process is being cleaned up. | +| Callback | Description | +| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `on_pretrain_routine_start` | Triggered at the beginning of the pre-training routine, before data loading and model setup. | +| `on_pretrain_routine_end` | Triggered at the end of the pre-training routine, after data loading and model setup are complete. | +| `on_train_start` | Triggered when the training starts, before the first [epoch](https://www.ultralytics.com/glossary/epoch) begins. | +| `on_train_epoch_start` | Triggered at the start of each training [epoch](https://www.ultralytics.com/glossary/epoch), before batch iteration begins. | +| `on_train_batch_start` | Triggered at the start of each training batch, before the forward pass. | +| `optimizer_step` | Triggered during the optimizer step. Reserved for custom integrations; not called by the default training loop. | +| `on_before_zero_grad` | Triggered before gradients are zeroed. Reserved for custom integrations; not called by the default training loop. | +| `on_train_batch_end` | Triggered at the end of each training batch, after the backward pass. The optimizer step may be deferred due to gradient accumulation. | +| `on_train_epoch_end` | Triggered at the end of each training epoch, after all batches are processed but **before** validation. Validation metrics and fitness may not be available yet. | +| `on_model_save` | Triggered when the model checkpoint is saved, after validation. | +| `on_fit_epoch_end` | Triggered at the end of each fit epoch (train + val), **after** validation and any checkpoint save. Validation metrics are available, and fitness is available for the per-epoch training call. This callback is also called during final best-model evaluation, where no checkpoint save occurs and fitness may not be present. | +| `on_train_end` | Triggered when the training process ends, after final evaluation of the best model. | +| `on_params_update` | Triggered when model parameters are updated. Reserved for custom integrations; not called by the default training loop. | +| `teardown` | Triggered when the training process is being cleaned up. | ### Validator Callbacks diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md index b503342654..12f2828a71 100644 --- a/docs/en/usage/cli.md +++ b/docs/en/usage/cli.md @@ -237,7 +237,7 @@ You can then pass this file as `cfg=default_copy.yaml` along with any additional ## Solutions Commands -Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. These solutions simplify the implementation of complex tasks like object counting, workout monitoring, and queue management. +Ultralytics provides ready-to-use solutions for common computer vision applications through the CLI. The `yolo solutions` command exposes object counting, cropping, blurring, workout monitoring, heatmaps, instance segmentation, VisionEye, speed estimation, queue management, analytics, Streamlit inference, and zone-based tracking โ€” see the [Solutions](../solutions/index.md) page for the full catalog. Run `yolo solutions help` to list every supported solution and its arguments. !!! example @@ -250,6 +250,26 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions count source="path/to/video.mp4" # specify video file path ``` + === "Crop" + + Crop detected objects and save them to disk: + + ```bash + yolo solutions crop show=True + yolo solutions crop source="path/to/video.mp4" # specify video file path + yolo solutions crop classes="[0, 2]" # crop only selected classes + ``` + + === "Blur" + + Blur detected objects in a video for privacy or to highlight other regions: + + ```bash + yolo solutions blur show=True + yolo solutions blur source="path/to/video.mp4" # specify video file path + yolo solutions blur classes="[0, 5]" # blur only selected classes + ``` + === "Workout" Monitor workout exercises using a pose model: @@ -259,8 +279,49 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions workout source="path/to/video.mp4" # specify video file path # Use keypoints for ab-workouts - yolo solutions workout kpts=[5, 11, 13] # left side - yolo solutions workout kpts=[6, 12, 14] # right side + yolo solutions workout kpts="[5, 11, 13]" # left side + yolo solutions workout kpts="[6, 12, 14]" # right side + ``` + + === "Heatmap" + + Generate a heatmap showing object density and movement patterns: + + ```bash + yolo solutions heatmap show=True + yolo solutions heatmap source="path/to/video.mp4" # specify video file path + yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO # customize colormap + yolo solutions heatmap region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # restrict heatmap to a region + ``` + + === "Isegment" + + Run instance segmentation with tracking on a video: + + ```bash + yolo solutions isegment show=True + yolo solutions isegment source="path/to/video.mp4" # specify video file path + yolo solutions isegment classes="[0, 5]" # segment only selected classes + ``` + + === "VisionEye" + + Draw object-to-observer sightlines with VisionEye: + + ```bash + yolo solutions visioneye show=True + yolo solutions visioneye source="path/to/video.mp4" # specify video file path + yolo solutions visioneye classes="[0, 5]" # monitor only selected classes + ``` + + === "Speed" + + Estimate the speed of moving objects in a video: + + ```bash + yolo solutions speed show=True + yolo solutions speed source="path/to/video.mp4" # specify video file path + yolo solutions speed meter_per_pixel=0.05 # set scale for real-world units ``` === "Queue" @@ -273,6 +334,18 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions queue region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]" # configure queue coordinates ``` + === "Analytics" + + Generate analytical charts (line, bar, area, or pie) from tracked detections: + + ```bash + yolo solutions analytics show=True + yolo solutions analytics source="path/to/video.mp4" # specify video file path + yolo solutions analytics analytics_type="pie" show=True + yolo solutions analytics analytics_type="bar" show=True + yolo solutions analytics analytics_type="area" show=True + ``` + === "Inference" Perform object detection, instance segmentation, or pose estimation in a web browser using Streamlit: @@ -282,6 +355,16 @@ Ultralytics provides ready-to-use solutions for common computer vision applicati yolo solutions inference model="path/to/model.pt" # use custom model ``` + === "TrackZone" + + Track objects only inside a specified polygonal zone: + + ```bash + yolo solutions trackzone show=True + yolo solutions trackzone source="path/to/video.mp4" # specify video file path + yolo solutions trackzone region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]" # configure zone coordinates + ``` + === "Help" View available solutions and their options: diff --git a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md index f507986bc8..8c591409e2 100644 --- a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md +++ b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md @@ -8,7 +8,7 @@ keywords: YOLOv5, Google Cloud Platform, GCP, Deep Learning VM, object detection Embarking on the journey of [artificial intelligence (AI)](https://www.ultralytics.com/glossary/artificial-intelligence-ai) and [machine learning (ML)](https://www.ultralytics.com/glossary/machine-learning-ml) can be exhilarating, especially when you leverage the power and flexibility of a [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) platform. Google Cloud Platform (GCP) offers robust tools tailored for ML enthusiasts and professionals alike. One such tool is the Deep Learning VM, preconfigured for data science and ML tasks. In this tutorial, we will navigate the process of setting up [Ultralytics YOLOv5](../../models/yolov5.md) on a [GCP Deep Learning VM](https://docs.cloud.google.com/deep-learning-vm/docs). Whether you're taking your first steps in ML or you're a seasoned practitioner, this guide provides a clear pathway to implementing [object detection](https://www.ultralytics.com/glossary/object-detection) models powered by YOLOv5. -๐Ÿ†“ Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://cloud.google.com/free/docs/free-cloud-features#free-trial) to kickstart your projects. +๐Ÿ†“ Plus, if you're a new GCP user, you're in luck with a [$300 free credit offer](https://docs.cloud.google.com/free/docs/free-cloud-features) to kickstart your projects. In addition to GCP, explore other accessible quickstart options for YOLOv5, like our [Google Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab for a browser-based experience, or the scalability of [Amazon AWS](./aws_quickstart_tutorial.md). Furthermore, container aficionados can utilize our official Docker image available on [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls for an encapsulated environment, following our [Docker Quickstart Guide](../../guides/docker-quickstart.md). diff --git a/docs/en/yolov5/index.md b/docs/en/yolov5/index.md index 8d803d1b9d..21f6f42508 100644 --- a/docs/en/yolov5/index.md +++ b/docs/en/yolov5/index.md @@ -23,7 +23,7 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning, # Comprehensive Guide to Ultralytics YOLOv5 -Welcome to the Ultralytics [YOLOv5](https://github.com/ultralytics/yolov5)๐Ÿš€ Documentation! Ultralytics YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time. While YOLOv5 remains a powerful tool, consider exploring its successor, [Ultralytics YOLOv8](../models/yolov8.md), for the latest advancements. +Welcome to the Ultralytics [YOLOv5](https://github.com/ultralytics/yolov5)๐Ÿš€ Documentation! Ultralytics YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time. While YOLOv5 remains a powerful tool, consider exploring its successors, [Ultralytics YOLOv8](../models/yolov8.md), [YOLO11](../models/yolo11.md), and [YOLO26](../models/yolo26.md), for the latest advancements. Built on [PyTorch](https://pytorch.org/), this powerful [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. Let's get started! diff --git a/docs/en/yolov5/tutorials/comet_logging_integration.md b/docs/en/yolov5/tutorials/comet_logging_integration.md index 67da10590f..e9a68aa178 100644 --- a/docs/en/yolov5/tutorials/comet_logging_integration.md +++ b/docs/en/yolov5/tutorials/comet_logging_integration.md @@ -41,7 +41,7 @@ Or create a `.comet.config` file in your working directory and set your credenti **Comet Configuration File** -``` +```ini [comet] api_key=YOUR_API_KEY project_name=YOUR_COMET_PROJECT_NAME # This will default to 'yolov5' diff --git a/docs/en/yolov5/tutorials/model_ensembling.md b/docs/en/yolov5/tutorials/model_ensembling.md index f2418b95aa..d1cc654f18 100644 --- a/docs/en/yolov5/tutorials/model_ensembling.md +++ b/docs/en/yolov5/tutorials/model_ensembling.md @@ -32,7 +32,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half Output: -```text +``` val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) @@ -76,7 +76,7 @@ You can list as many checkpoints as you would like, including custom weights suc Output: -```text +``` val: data=./data/coco.yaml, weights=['yolov5x.pt', 'yolov5l6.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) @@ -117,7 +117,7 @@ python detect.py --weights yolov5x.pt yolov5l6.pt --img 640 --source data/images Output: -```text +``` YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) Fusing layers... diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md index 39c3010d97..c1e2a1ca7a 100644 --- a/docs/en/yolov5/tutorials/model_export.md +++ b/docs/en/yolov5/tutorials/model_export.md @@ -112,7 +112,7 @@ python export.py --weights yolov5s.pt --include torchscript onnx Output: -```text +``` export: data=data/coco128.yaml, weights=['yolov5s.pt'], imgsz=[640, 640], batch_size=1, device=cpu, half=False, inplace=False, train=False, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['torchscript', 'onnx'] YOLOv5 ๐Ÿš€ v6.2-104-ge3e5122 Python-3.8.0 torch-1.12.1+cu113 CPU diff --git a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md index 4ec6b885bb..0c12305d9b 100644 --- a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md +++ b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md @@ -39,7 +39,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half Output: -```text +``` val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False YOLOv5 ๐Ÿš€ v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB) @@ -75,7 +75,7 @@ We can apply pruning to the model using the `torch_utils.prune()` command define 30% pruned output: -```text +``` val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False YOLOv5 ๐Ÿš€ v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB) diff --git a/docs/en/yolov5/tutorials/test_time_augmentation.md b/docs/en/yolov5/tutorials/test_time_augmentation.md index 971174832e..d9f7357868 100644 --- a/docs/en/yolov5/tutorials/test_time_augmentation.md +++ b/docs/en/yolov5/tutorials/test_time_augmentation.md @@ -28,7 +28,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half Output: -```text +``` val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) @@ -67,7 +67,7 @@ python val.py --weights yolov5x.pt --data coco.yaml --img 832 --augment --half Output: -```text +``` val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=832, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=True, verbose=False, save_txt=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) @@ -107,7 +107,7 @@ python detect.py --weights yolov5s.pt --img 832 --source data/images --augment Output: -```text +``` YOLOv5 ๐Ÿš€ v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) Downloading https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5s.pt to yolov5s.pt... diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md index 426a73fc18..6d4048ff83 100644 --- a/docs/en/yolov5/tutorials/train_custom_data.md +++ b/docs/en/yolov5/tutorials/train_custom_data.md @@ -39,7 +39,7 @@ Developing a custom [object detection](https://docs.ultralytics.com/tasks/detect Ultralytics provides two licensing options to accommodate diverse usage scenarios: - - **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-v3) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. + - **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license/agpl-3.0) open-source license is ideal for students, researchers, and enthusiasts passionate about open collaboration and knowledge sharing. It requires derived works to be shared under the same license. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for full details. - **Enterprise License**: Designed for commercial applications, this license permits the seamless integration of Ultralytics software and AI models into commercial products and services without the open-source stipulations of AGPL-3.0. If your project requires commercial deployment, request an [Enterprise License](https://www.ultralytics.com/license). Explore our licensing options further on the [Ultralytics Licensing](https://www.ultralytics.com/license) page. diff --git a/docs/macros/export-table.md b/docs/macros/export-table.md index a7b4e0aecb..4084476823 100644 --- a/docs/macros/export-table.md +++ b/docs/macros/export-table.md @@ -17,7 +17,7 @@ | [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolo26n" }}_paddle_model/` | โœ… | `imgsz`, `batch`, `device` | | [MNN](../integrations/mnn.md) | `mnn` | `{{ model_name or "yolo26n" }}.mnn` | โœ… | `imgsz`, `batch`, `int8`, `half`, `device` | | [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolo26n" }}_ncnn_model/` | โœ… | `imgsz`, `half`, `batch`, `device` | -| [IMX500](../integrations/sony-imx500.md){{ tip2 }} | `imx` | `{{ model_name or "yolo26n" }}_imx_model/` | โœ… | `imgsz`, `int8`, `data`, `fraction`, `device` | +| [IMX500](../integrations/sony-imx500.md){{ tip2 }} | `imx` | `{{ model_name or "yolo26n" }}_imx_model/` | โœ… | `imgsz`, `int8`, `data`, `fraction`, `nms`{{ tip1 }}, `device` | | [RKNN](../integrations/rockchip-rknn.md) | `rknn` | `{{ model_name or "yolo26n" }}_rknn_model/` | โœ… | `imgsz`, `batch`, `name`, `device` | -| [ExecuTorch](../integrations/executorch.md) | `executorch` | `{{ model_name or "yolo26n" }}_executorch_model/` | โœ… | `imgsz`, `device` | -| [Axelera](../integrations/axelera.md) | `axelera` | `{{ model_name or "yolo26n" }}_axelera_model/` | โœ… | `imgsz`, `int8`, `data`, `fraction`, `device` | +| [ExecuTorch](../integrations/executorch.md) | `executorch` | `{{ model_name or "yolo26n" }}_executorch_model/` | โœ… | `imgsz`, `batch`, `device` | +| [Axelera](../integrations/axelera.md) | `axelera` | `{{ model_name or "yolo26n" }}_axelera_model/` | โœ… | `imgsz`, `batch`, `int8`, `data`, `fraction`, `device` | diff --git a/docs/macros/predict-args.md b/docs/macros/predict-args.md index 2eaaa5ba7b..c13a132dea 100644 --- a/docs/macros/predict-args.md +++ b/docs/macros/predict-args.md @@ -1,25 +1,25 @@ -| Argument | Type | Default | Description | -| --------------- | ---------------- | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across [different types of input](https://docs.ultralytics.com/modes/predict/#inference-sources). | -| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | -| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | -| `imgsz` | `int` or `tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | -| `rect` | `bool` | `True` | If enabled, minimally pads the shorter side of the image until it's divisible by stride to improve inference speed. If disabled, pads the image to a square during inference. | -| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | -| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0`, `0`, `npu` or `npu:0`). Allows users to select between CPU, a specific GPU, Huawei Ascend NPU, or other compute devices for model execution. | -| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file, or `.txt` file](https://docs.ultralytics.com/modes/predict/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. | -| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | -| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | -| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accommodate new frames (optimized for real-time applications). If `True`, queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. | -| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | -| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | -| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. For end-to-end models (YOLO26, YOLOv10), this only prevents the same detection from appearing with multiple class labels (IoU=1.0 duplicates) and does not perform IoU-threshold-based suppression between distinct boxes. | -| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | -| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. | -| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | -| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. | -| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. | -| `stream` | `bool` | `False` | Enables memory-efficient processing for long videos or numerous images by returning a generator of Results objects instead of loading all frames into memory at once. | -| `verbose` | `bool` | `True` | Controls whether to display detailed inference logs in the terminal, providing real-time feedback on the prediction process. | -| `compile` | `bool` or `str` | `False` | Enables PyTorch 2.x `torch.compile` graph compilation with `backend='inductor'`. Accepts `True` โ†’ `"default"`, `False` โ†’ disables, or a string mode such as `"default"`, `"reduce-overhead"`, `"max-autotune-no-cudagraphs"`. Falls back to eager with a warning if unsupported. | -| `end2end` | `bool` | `None` | Overrides the end-to-end mode in YOLO models that support NMS-free inference (YOLO26, YOLOv10). Setting it to `False` lets you run prediction using the traditional NMS pipeline, additionally allowing you to make use of the `iou` argument. See the [End-to-End Detection guide](../guides/end2end-detection.md) for details. | +| Argument | Type | Default | Description | +| --------------- | ------------------------ | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `source` | `str` or `int` or `None` | `None` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. If omitted, a warning is logged and the model falls back to the built-in demo assets (`ultralytics/assets`, or a demo URL for OBB). Supports a wide range of formats and sources, enabling flexible application across [different types of input](https://docs.ultralytics.com/modes/predict/#inference-sources). | +| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | +| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | +| `imgsz` | `int` or `tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | +| `rect` | `bool` | `True` | If enabled, minimally pads the shorter side of the image until it's divisible by stride to improve inference speed. If disabled, pads the image to a square during inference. | +| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | +| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0`, `0`, `npu` or `npu:0`). Allows users to select between CPU, a specific GPU, Huawei Ascend NPU, or other compute devices for model execution. | +| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file, or `.txt` file](https://docs.ultralytics.com/modes/predict/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. | +| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | +| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | +| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accommodate new frames (optimized for real-time applications). If `True`, queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. | +| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | +| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | +| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. For end-to-end models (YOLO26, YOLOv10), this only prevents the same detection from appearing with multiple class labels (IoU=1.0 duplicates) and does not perform IoU-threshold-based suppression between distinct boxes. | +| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | +| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. | +| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | +| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. | +| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. | +| `stream` | `bool` | `False` | Enables memory-efficient processing for long videos or numerous images by returning a generator of Results objects instead of loading all frames into memory at once. | +| `verbose` | `bool` | `True` | Controls whether to display detailed inference logs in the terminal, providing real-time feedback on the prediction process. | +| `compile` | `bool` or `str` | `False` | Enables PyTorch 2.x `torch.compile` graph compilation with `backend='inductor'`. Accepts `True` โ†’ `"default"`, `False` โ†’ disables, or a string mode such as `"default"`, `"reduce-overhead"`, `"max-autotune-no-cudagraphs"`. Falls back to eager with a warning if unsupported. | +| `end2end` | `bool` | `None` | Overrides the end-to-end mode in YOLO models that support NMS-free inference (YOLO26, YOLOv10). Setting it to `False` lets you run prediction using the traditional NMS pipeline, additionally allowing you to make use of the `iou` argument. See the [End-to-End Detection guide](../guides/end2end-detection.md) for details. | diff --git a/docs/macros/solutions-args.md b/docs/macros/solutions-args.md index f40c80533b..b479496eed 100644 --- a/docs/macros/solutions-args.md +++ b/docs/macros/solutions-args.md @@ -7,11 +7,11 @@ "show_in": ["bool", "True", "Flag to control whether to display the in counts on the video stream."], "show_out": ["bool", "True", "Flag to control whether to display the out counts on the video stream."], "analytics_type": ["str", "'line'", "Type of graph, i.e., `line`, `bar`, `area`, or `pie`."], - "colormap": ["int", "cv2.COLORMAP_JET", "Colormap to use for the heatmap."], + "colormap": ["int", "cv2.COLORMAP_DEEPGREEN", "Colormap to use for the heatmap."], "json_file": ["str", "None", "Path to the JSON file that contains all parking coordinates data."], "up_angle": ["float", "145.0", "Angle threshold for the 'up' pose."], "kpts": ["list[int]", "'[6, 8, 10]'", "List of three keypoint indices used for monitoring workouts. These keypoints correspond to body joints or parts, such as shoulders, elbows, and wrists, for exercises like push-ups, pull-ups, squats, and ab-workouts."], - "down_angle": ["float", "90.0", "Angle threshold for the 'down' pose."], + "down_angle": ["int", "90", "Angle threshold for the 'down' pose."], "blur_ratio": ["float", "0.5", "Adjusts percentage of blur intensity, with values in range `0.1 - 1.0`."], "crop_dir": ["str", "'cropped-detections'", "Directory name for storing cropped detections."], "records": ["int", "5", "Total detections count to trigger an email with security alarm system."], diff --git a/docs/macros/train-args.md b/docs/macros/train-args.md index 453f001721..674bf02d97 100644 --- a/docs/macros/train-args.md +++ b/docs/macros/train-args.md @@ -40,6 +40,7 @@ | `warmup_bias_lr` | `float` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. | | `box` | `float` | `7.5` | Weight of the box loss component in the [loss function](https://www.ultralytics.com/glossary/loss-function), influencing how much emphasis is placed on accurately predicting [bounding box](https://www.ultralytics.com/glossary/bounding-box) coordinates. | | `cls` | `float` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. | +| `cls_pw` | `float` | `0.0` | Power for class weighting to handle class imbalance using inverse class frequency. `0.0` disables class weighting, `1.0` applies full inverse frequency weighting. Values between 0 and 1 provide partial weighting. | | `dfl` | `float` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. | | `pose` | `float` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. | | `kobj` | `float` | `1.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. | diff --git a/docs/mkdocs_github_authors.yaml b/docs/mkdocs_github_authors.yaml index d27aca9292..ecd553fdb2 100644 --- a/docs/mkdocs_github_authors.yaml +++ b/docs/mkdocs_github_authors.yaml @@ -217,6 +217,9 @@ davis.justin@mssm.org: esat@ultralytics.com: avatar: https://avatars.githubusercontent.com/u/43647848?v=4 username: artest08 +faruk.gmstss@gmail.com: + avatar: null + username: null fatih@ultralytics.com: avatar: https://avatars.githubusercontent.com/u/34196005?v=4 username: fcakyon @@ -259,6 +262,9 @@ lakshantha@ultralytics.com: lakshanthad@yahoo.com: avatar: https://avatars.githubusercontent.com/u/20147381?v=4 username: lakshanthad +lli28@nd.edu: + avatar: https://avatars.githubusercontent.com/u/126812331?v=4 + username: easyrider11 lukasbuligonantunes@gmail.com: avatar: https://avatars.githubusercontent.com/u/48484445?v=4 username: Buligon diff --git a/examples/RTDETR-ONNXRuntime-Python/README.md b/examples/RTDETR-ONNXRuntime-Python/README.md index 398a662586..b558b6f092 100644 --- a/examples/RTDETR-ONNXRuntime-Python/README.md +++ b/examples/RTDETR-ONNXRuntime-Python/README.md @@ -16,7 +16,7 @@ pip install -r requirements.txt ### Installing `onnxruntime-gpu` (Optional) -For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions. +For accelerated inference using an NVIDIA GPU, install the **`onnxruntime-gpu`** package. Ensure you have the correct [NVIDIA drivers](https://www.nvidia.com/Download/index.aspx) and [CUDA toolkit](https://developer.nvidia.com/cuda/toolkit) installed first. Consult the official [ONNX Runtime GPU documentation](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility information and setup instructions. ```bash pip install onnxruntime-gpu diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/README.md b/examples/YOLO-Series-ONNXRuntime-Rust/README.md index cbb8839158..40292746f4 100644 --- a/examples/YOLO-Series-ONNXRuntime-Rust/README.md +++ b/examples/YOLO-Series-ONNXRuntime-Rust/README.md @@ -12,7 +12,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U - **Extensive Model Compatibility**: Supports a wide range of YOLO versions including [YOLOv5](https://docs.ultralytics.com/models/yolov5/), [YOLOv6](https://docs.ultralytics.com/models/yolov6/), [YOLOv7](https://docs.ultralytics.com/models/yolov7/), [YOLOv8](https://docs.ultralytics.com/models/yolov8/), [YOLOv9](https://docs.ultralytics.com/models/yolov9/), [YOLOv10](https://docs.ultralytics.com/models/yolov10/), [YOLO11](https://docs.ultralytics.com/models/yolo11/), [YOLO-World](https://docs.ultralytics.com/models/yolo-world/), [RT-DETR](https://docs.ultralytics.com/models/rtdetr/), and others. - **Versatile Task Coverage**: Includes examples for `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`. - **Precision Flexibility**: Works seamlessly with `FP16` and `FP32` precision [ONNX models](https://docs.ultralytics.com/integrations/onnx/). -- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda-toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/). +- **Execution Providers**: Accelerated support for `CPU`, [CUDA](https://developer.nvidia.com/cuda/toolkit), [CoreML](https://developer.apple.com/documentation/coreml), and [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/). - **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input. - **Flexible Data Loading**: The `DataLoader` component handles images, folders, videos, and real-time video streams. - **Real-Time Display and Video Export**: The `Viewer` provides real-time frame visualization and video export functions, similar to OpenCVโ€™s `imshow()` and `imwrite()`. @@ -45,7 +45,7 @@ This repository provides a [Rust](https://rust-lang.org/) demo showcasing key [U ### 2. [Optional] Install CUDA, CuDNN, and TensorRT -- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) version `12.x`. +- The CUDA execution provider requires [NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) version `12.x`. - The TensorRT execution provider requires both CUDA `12.x` and [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt) `10.x`. Ensure [cuDNN](https://developer.nvidia.com/cudnn) is also correctly installed. ### 3. [Optional] Install ffmpeg diff --git a/examples/YOLOv8-ONNXRuntime-CPP/README.md b/examples/YOLOv8-ONNXRuntime-CPP/README.md index ec5b3cf314..49fa403e75 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/README.md +++ b/examples/YOLOv8-ONNXRuntime-CPP/README.md @@ -8,7 +8,7 @@ This example provides a practical guide on performing inference with [Ultralytic - **Deployment-Friendly:** Well-suited for deployment in industrial and production environments. - **Performance:** Offers faster [inference latency](https://www.ultralytics.com/glossary/inference-latency) compared to OpenCV's DNN module on both CPU and [GPU](https://www.ultralytics.com/glossary/gpu-graphics-processing-unit). -- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda-toolkit). +- **Acceleration:** Supports FP32 and [FP16 (Half Precision)](https://www.ultralytics.com/glossary/half-precision) inference acceleration using [NVIDIA CUDA](https://developer.nvidia.com/cuda/toolkit). ## โ˜• Note @@ -85,7 +85,7 @@ Ensure you have the following dependencies installed: | [OpenCV](https://opencv.org/releases/) | >=4.0.0 | Required for image loading and preprocessing. | | C++ Compiler | C++17 Support | Needed for features like ``. ([GCC](https://gcc.gnu.org/), [Clang](https://clang.llvm.org/), [MSVC](https://visualstudio.microsoft.com/vs/features/cplusplus/)) | | [CMake](https://cmake.org/download/) | >=3.18 | Cross-platform build system generator. Version 3.18+ recommended for better CUDA support discovery. | -| [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. | +| [CUDA Toolkit](https://developer.nvidia.com/cuda/toolkit) (Optional) | >=11.4, <12.0 | Required for GPU acceleration via ONNX Runtime's CUDA Execution Provider. **Must be CUDA 11.x**. | | [cuDNN](https://developer.nvidia.com/cudnn) (CUDA required) | =8.x | Required by CUDA Execution Provider. **Must be cuDNN 8.x** compatible with your CUDA 11.x version. | **Important Notes:** diff --git a/examples/YOLOv8-ONNXRuntime-Rust/README.md b/examples/YOLOv8-ONNXRuntime-Rust/README.md index 6a9b6552f5..cf99fc1536 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/README.md +++ b/examples/YOLOv8-ONNXRuntime-Rust/README.md @@ -35,7 +35,7 @@ Please follow the official Rust installation guide: [https://www.rust-lang.org/t ### 3. [Optional] Install CUDA & CuDNN & TensorRT -- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda-toolkit) v11.6+. +- The CUDA execution provider requires [CUDA](https://developer.nvidia.com/cuda/toolkit) v11.6+. - The TensorRT execution provider requires CUDA v11.4+ and [TensorRT](https://developer.nvidia.com/tensorrt) v8.4+. You may also need [cuDNN](https://developer.nvidia.com/cudnn). ## โ–ถ๏ธ Get Started diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md index c5fed39006..280f7bc5be 100644 --- a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md +++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md @@ -54,7 +54,7 @@ We welcome contributions to improve this demo! If you encounter bugs, have featu ## ๐Ÿ“„ License -This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-v3). +This project is licensed under the AGPL-3.0 License. For detailed information, please see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file or read the full [AGPL-3.0 license text](https://opensource.org/license/agpl-3.0). ## ๐Ÿ™ Acknowledgments diff --git a/tests/conftest.py b/tests/conftest.py index 6a6b644d6d..d53768aff4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,49 @@ import shutil from pathlib import Path +import pytest + + +@pytest.fixture(scope="session") +def solution_assets(): + """Session-scoped fixture to cache solution test assets. + + Lazily downloads solution assets into a persistent directory (WEIGHTS_DIR/solution_assets) and returns a callable + that resolves asset names to cached paths. + """ + from ultralytics.utils import ASSETS_URL, WEIGHTS_DIR + from ultralytics.utils.downloads import safe_download + + # Use persistent directory alongside weights + cache_dir = WEIGHTS_DIR / "solution_assets" + cache_dir.mkdir(parents=True, exist_ok=True) + + # Define all assets needed for solution tests + assets = { + # Videos + "demo_video": "solutions_ci_demo.mp4", + "crop_video": "decelera_landscape_min.mov", + "pose_video": "solution_ci_pose_demo.mp4", + "parking_video": "solution_ci_parking_demo.mp4", + "vertical_video": "solution_vertical_demo.mp4", + # Parking manager files + "parking_areas": "solution_ci_parking_areas.json", + "parking_model": "solutions_ci_parking_model.pt", + } + + asset_paths = {} + + def get_asset(name): + """Return the cached path for a named solution asset, downloading it on first use.""" + if name not in asset_paths: + asset_path = cache_dir / assets[name] + if not asset_path.exists(): + safe_download(url=f"{ASSETS_URL}/{asset_path.name}", dir=cache_dir) + asset_paths[name] = asset_path + return asset_paths[name] + + return get_asset + def pytest_addoption(parser): """Add custom command-line options to pytest.""" @@ -55,5 +98,5 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): # Remove directories models = [path for x in {"*.mlpackage", "*_openvino_model"} for path in WEIGHTS_DIR.rglob(x)] - for directory in [WEIGHTS_DIR / "path with spaces", *models]: + for directory in [WEIGHTS_DIR / "solution_assets", WEIGHTS_DIR / "path with spaces", *models]: shutil.rmtree(directory, ignore_errors=True) diff --git a/tests/test_engine.py b/tests/test_engine.py index f7f05b0f8e..b7ef006e14 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -1,15 +1,18 @@ # Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import sys +from types import SimpleNamespace from unittest import mock +import pytest import torch -from tests import MODEL, SOURCE +from tests import MODEL, SOURCE, TASK_MODEL_DATA from ultralytics import YOLO from ultralytics.cfg import get_cfg from ultralytics.engine.exporter import Exporter -from ultralytics.models.yolo import classify, detect, segment +from ultralytics.models.yolo import classify, detect, obb, pose, segment +from ultralytics.nn.tasks import load_checkpoint from ultralytics.utils import ASSETS, DEFAULT_CFG, WEIGHTS_DIR @@ -22,122 +25,124 @@ def test_export(): """Test model exporting functionality by adding a callback and verifying its execution.""" exporter = Exporter() exporter.add_callback("on_export_start", test_func) - assert test_func in exporter.callbacks["on_export_start"], "callback test failed" + assert test_func in exporter.callbacks["on_export_start"], "on_export_start callback not registered" f = exporter(model=YOLO("yolo26n.yaml").model) YOLO(f)(SOURCE) # exported model inference -def test_detect(): - """Test YOLO object detection training, validation, and prediction functionality.""" - overrides = {"data": "coco8.yaml", "model": "yolo26n.yaml", "imgsz": 32, "epochs": 1, "save": False} - cfg = get_cfg(DEFAULT_CFG) - cfg.data = "coco8.yaml" - cfg.imgsz = 32 - - # Trainer - trainer = detect.DetectionTrainer(overrides=overrides) - trainer.add_callback("on_train_start", test_func) - assert test_func in trainer.callbacks["on_train_start"], "callback test failed" - trainer.train() - - # Validator - val = detect.DetectionValidator(args=cfg) - val.add_callback("on_val_start", test_func) - assert test_func in val.callbacks["on_val_start"], "callback test failed" - val(model=trainer.best) # validate best.pt - - # Predictor - pred = detect.DetectionPredictor(overrides={"imgsz": [64, 64]}) - pred.add_callback("on_predict_start", test_func) - assert test_func in pred.callbacks["on_predict_start"], "callback test failed" - # Confirm there is no issue with sys.argv being empty - with mock.patch.object(sys, "argv", []): - result = pred(source=ASSETS, model=MODEL) - assert len(result), "predictor test failed" - - # Test resume functionality - overrides["resume"] = trainer.last - trainer = detect.DetectionTrainer(overrides=overrides) - try: - trainer.train() - except Exception as e: - print(f"Expected exception caught: {e}") - return - - raise Exception("Resume test failed!") - - -def test_segment(): - """Test image segmentation training, validation, and prediction pipelines using YOLO models.""" +@pytest.mark.parametrize( + "trainer_cls,validator_cls,predictor_cls,data,model,weights", + [ + ( + detect.DetectionTrainer, + detect.DetectionValidator, + detect.DetectionPredictor, + "coco8.yaml", + "yolo26n.yaml", + MODEL, + ), + ( + segment.SegmentationTrainer, + segment.SegmentationValidator, + segment.SegmentationPredictor, + "coco8-seg.yaml", + "yolo26n-seg.yaml", + WEIGHTS_DIR / "yolo26n-seg.pt", + ), + ( + classify.ClassificationTrainer, + classify.ClassificationValidator, + classify.ClassificationPredictor, + "imagenet10", + "yolo26n-cls.yaml", + None, + ), + (obb.OBBTrainer, obb.OBBValidator, obb.OBBPredictor, "dota8.yaml", "yolo26n-obb.yaml", None), + (pose.PoseTrainer, pose.PoseValidator, pose.PosePredictor, "coco8-pose.yaml", "yolo26n-pose.yaml", None), + ], +) +def test_task(trainer_cls, validator_cls, predictor_cls, data, model, weights): + """Test YOLO training, validation, and prediction for various tasks.""" overrides = { - "data": "coco8-seg.yaml", - "model": "yolo26n-seg.yaml", + "data": data, + "model": model, "imgsz": 32, "epochs": 1, "save": False, "mask_ratio": 1, "overlap_mask": False, } - cfg = get_cfg(DEFAULT_CFG) - cfg.data = "coco8-seg.yaml" - cfg.imgsz = 32 # Trainer - trainer = segment.SegmentationTrainer(overrides=overrides) + trainer = trainer_cls(overrides=overrides) trainer.add_callback("on_train_start", test_func) - assert test_func in trainer.callbacks["on_train_start"], "callback test failed" + assert test_func in trainer.callbacks["on_train_start"], "on_train_start callback not registered" trainer.train() # Validator - val = segment.SegmentationValidator(args=cfg) - val.add_callback("on_val_start", test_func) - assert test_func in val.callbacks["on_val_start"], "callback test failed" - val(model=trainer.best) # validate best.pt - - # Predictor - pred = segment.SegmentationPredictor(overrides={"imgsz": [64, 64]}) - pred.add_callback("on_predict_start", test_func) - assert test_func in pred.callbacks["on_predict_start"], "callback test failed" - result = pred(source=ASSETS, model=WEIGHTS_DIR / "yolo26n-seg.pt") - assert len(result), "predictor test failed" - - # Test resume functionality - overrides["resume"] = trainer.last - trainer = segment.SegmentationTrainer(overrides=overrides) - try: - trainer.train() - except Exception as e: - print(f"Expected exception caught: {e}") - return - - raise Exception("Resume test failed!") - - -def test_classify(): - """Test image classification including training, validation, and prediction phases.""" - overrides = {"data": "imagenet10", "model": "yolo26n-cls.yaml", "imgsz": 32, "epochs": 1, "save": False} cfg = get_cfg(DEFAULT_CFG) - cfg.data = "imagenet10" + cfg.data = data cfg.imgsz = 32 - - # Trainer - trainer = classify.ClassificationTrainer(overrides=overrides) - trainer.add_callback("on_train_start", test_func) - assert test_func in trainer.callbacks["on_train_start"], "callback test failed" - trainer.train() - - # Validator - val = classify.ClassificationValidator(args=cfg) + val = validator_cls(args=cfg) val.add_callback("on_val_start", test_func) - assert test_func in val.callbacks["on_val_start"], "callback test failed" + assert test_func in val.callbacks["on_val_start"], "on_val_start callback not registered" val(model=trainer.best) # Predictor - pred = classify.ClassificationPredictor(overrides={"imgsz": [64, 64]}) + pred = predictor_cls(overrides={"imgsz": [64, 64]}) pred.add_callback("on_predict_start", test_func) - assert test_func in pred.callbacks["on_predict_start"], "callback test failed" - result = pred(source=ASSETS, model=trainer.best) - assert len(result), "predictor test failed" + assert test_func in pred.callbacks["on_predict_start"], "on_predict_start callback not registered" + + # Determine model path for prediction + model_path = weights if weights else trainer.best + if model == "yolo26n.yaml": # only for detection + # Confirm there is no issue with sys.argv being empty + with mock.patch.object(sys, "argv", []): + result = pred(source=ASSETS, model=model_path) + assert len(result) > 0, f"Predictor returned no results for {model}" + else: + result = pred(source=ASSETS, model=model_path) + assert len(result) > 0, f"Predictor returned no results for {model}" + + # Test resume functionality + with pytest.raises(AssertionError): + trainer_cls(overrides={**overrides, "resume": trainer.last}).train() + + +@pytest.mark.parametrize("task,weight,data", TASK_MODEL_DATA) +def test_resume_incomplete(task, weight, data, tmp_path): + """Test training resumes from an incomplete checkpoint.""" + train_args = { + "data": data, + "epochs": 2, + "save": True, + "plots": False, + "workers": 0, + "project": tmp_path, + "name": task, + "imgsz": 32, + "exist_ok": True, + } + + def stop_after_first_epoch(trainer): + if trainer.epoch == 0: + trainer.stop = True + + def disable_final_eval(trainer): + trainer.final_eval = lambda: None + + model = YOLO(weight) + model.add_callback("on_train_start", disable_final_eval) + model.add_callback("on_train_epoch_end", stop_after_first_epoch) + model.train(**train_args) + last_path = model.trainer.last + _, ckpt = load_checkpoint(last_path) + assert ckpt["epoch"] == 0, "checkpoint should be resumable" + + # Resume training using the checkpoint + resume_model = YOLO(last_path) + resume_model.train(resume=True, **train_args) + assert resume_model.trainer.start_epoch == resume_model.trainer.epoch == 1, "resume test failed" def test_nan_recovery(): @@ -155,3 +160,44 @@ def test_nan_recovery(): trainer.add_callback("on_train_batch_end", inject_nan) trainer.train() assert nan_injected[0], "NaN injection failed" + + +def test_train_reuses_loaded_checkpoint_model(monkeypatch): + """Test training reuses an already-loaded checkpoint model instead of re-parsing the model source.""" + model = YOLO("yolo26n.yaml") + model.ckpt = {"checkpoint": True} + model.ckpt_path = "/tmp/fake.pt" + model.overrides["model"] = "ul://glenn-jocher/m2/exp-14" + original_model = model.model + captured = {} + + class FakeTrainer: + def __init__(self, overrides=None, _callbacks=None): + self.overrides = overrides + self.callbacks = _callbacks + self.model = None + self.validator = SimpleNamespace(metrics=None) + self.best = MODEL.parent / "nonexistent-best.pt" + self.last = MODEL + captured["trainer"] = self + + def get_model(self, cfg=None, weights=None, verbose=True): + captured["cfg"] = cfg + captured["weights"] = weights + return original_model + + def train(self): + return None + + monkeypatch.setattr("ultralytics.engine.model.checks.check_pip_update_available", lambda: None) + monkeypatch.setattr(model, "_smart_load", lambda key: FakeTrainer) + monkeypatch.setattr( + "ultralytics.engine.model.load_checkpoint", + lambda path: (original_model, {"checkpoint": True}), + ) + + model.train(data="coco8.yaml", epochs=1) + + assert captured["trainer"].model is original_model, "Trainer model does not match original" + assert captured["cfg"] == original_model.yaml, f"Config mismatch: {captured['cfg']} != {original_model.yaml}" + assert captured["weights"] is original_model, "Weights do not match original model" diff --git a/tests/test_exports.py b/tests/test_exports.py index 0eb11dee5d..51b73040d5 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -64,8 +64,8 @@ def test_torch2onnx_serializes_concurrent_exports(monkeypatch, tmp_path): for thread in threads: thread.join() - assert not errors - assert max_active == 1 + assert not errors, f"Concurrent export errors: {errors}" + assert max_active == 1, f"Expected max 1 concurrent export, got {max_active}" @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1") @@ -341,7 +341,7 @@ def test_export_executorch(): file = YOLO(MODEL).export(format="executorch", imgsz=32) assert Path(file).exists(), f"ExecuTorch export failed, directory not found: {file}" # Check that .pte file exists in the exported directory - pte_file = Path(file) / Path(MODEL).with_suffix(".pte").name + pte_file = Path(file) / "model.pte" assert pte_file.exists(), f"ExecuTorch .pte file not found: {pte_file}" # Check that metadata.yaml exists metadata_file = Path(file) / "metadata.yaml" @@ -359,8 +359,7 @@ def test_export_executorch_matrix(task): file = YOLO(TASK2MODEL[task]).export(format="executorch", imgsz=32) assert Path(file).exists(), f"ExecuTorch export failed for task '{task}', directory not found: {file}" # Check that .pte file exists in the exported directory - model_name = Path(TASK2MODEL[task]).with_suffix(".pte").name - pte_file = Path(file) / model_name + pte_file = Path(file) / "model.pte" assert pte_file.exists(), f"ExecuTorch .pte file not found for task '{task}': {pte_file}" # Check that metadata.yaml exists metadata_file = Path(file) / "metadata.yaml" diff --git a/tests/test_python.py b/tests/test_python.py index 4daa1758cb..871c7bb8af 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -3,6 +3,7 @@ import contextlib import csv import urllib +import zipfile from copy import copy from pathlib import Path @@ -35,7 +36,7 @@ from ultralytics.utils import ( checks, is_github_action_running, ) -from ultralytics.utils.downloads import download +from ultralytics.utils.downloads import download, safe_download from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13 @@ -81,7 +82,7 @@ def test_predict_txt(tmp_path): for src in SOURCES_LIST: f.write(f"{src}\n") results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from source list, got {len(results)}" @pytest.mark.skipif(True, reason="disabled for testing") @@ -93,7 +94,7 @@ def test_predict_csv_multi_row(tmp_path): writer.writerow(["source"]) writer.writerows([[src] for src in SOURCES_LIST]) results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from multi-row CSV, got {len(results)}" @pytest.mark.skipif(True, reason="disabled for testing") @@ -104,7 +105,7 @@ def test_predict_csv_single_row(tmp_path): writer = csv.writer(f) writer.writerow(SOURCES_LIST) results = YOLO(MODEL)(source=file, imgsz=32) - assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images + assert len(results) == 7, f"Expected 7 results from single-row CSV, got {len(results)}" @pytest.mark.parametrize("model_name", MODELS) @@ -155,7 +156,7 @@ def test_predict_gray_and_4ch(tmp_path): for f in source_rgba, source_grayscale, source_non_utf, source_spaces: for source in Image.open(f), cv2.imread(str(f)), f: results = model(source, save=True, verbose=True, imgsz=32) - assert len(results) == 1 # verify that an image was run + assert len(results) == 1, f"Expected 1 result for {f.name}, got {len(results)}" f.unlink() # cleanup @@ -334,16 +335,21 @@ def test_labels_and_crops(): assert len(cls_idxs) >= 2, f"Expected at least 2 detections, got {len(cls_idxs)}" # Check label path labels = save_path / f"labels/{im_name}.txt" - assert labels.exists() + assert labels.exists(), f"Label file {labels} does not exist" # Check detections match label count - assert len(r.boxes.data) == len([line for line in labels.read_text().splitlines() if line]) + label_count = len([line for line in labels.read_text().splitlines() if line]) + assert len(r.boxes.data) == label_count, f"Box count {len(r.boxes.data)} != label count {label_count}" # Check crops path and files crop_dirs = list((save_path / "crops").iterdir()) crop_files = [f for p in crop_dirs for f in p.glob("*")] # Crop directories match detections - assert all(r.names.get(c) in {d.name for d in crop_dirs} for c in cls_idxs) + crop_dir_names = {d.name for d in crop_dirs} + assert all(r.names.get(c) in crop_dir_names for c in cls_idxs), ( + f"Crop dirs {crop_dir_names} don't match classes {cls_idxs}" + ) # Same number of crops as detections - assert len([f for f in crop_files if im_name in f.name]) == len(r.boxes.data) + crop_count = len([f for f in crop_files if im_name in f.name]) + assert crop_count == len(r.boxes.data), f"Crop count {crop_count} != detection count {len(r.boxes.data)}" @pytest.mark.skipif(not ONLINE, reason="environment is offline") @@ -367,6 +373,27 @@ def test_data_utils(tmp_path): zip_directory(tmp_path / "coco8/images/val") # zip +def test_safe_download_unzips_local_path_archive(tmp_path): + """Test safe_download() unzips local archive paths without treating them like remote URLs.""" + dataset_dir = tmp_path / "coco8 local" + archive = tmp_path / "coco8 local.zip" + (dataset_dir / "images" / "train").mkdir(parents=True) + (dataset_dir / "images" / "val").mkdir(parents=True) + (dataset_dir / "labels" / "train").mkdir(parents=True) + (dataset_dir / "labels" / "val").mkdir(parents=True) + (dataset_dir / "data.yaml").write_text("path: .\ntrain: images/train\nval: images/val\nnames:\n 0: item\n") + + with zipfile.ZipFile(archive, "w") as zf: + for path in dataset_dir.rglob("*"): + zf.write(path, arcname=path.relative_to(tmp_path)) + + extracted = safe_download(archive, dir=tmp_path / "datasets", unzip=True, progress=False) + expected_path = tmp_path / "datasets" / dataset_dir.name + assert extracted == expected_path, f"Extracted path {extracted} != expected {expected_path}" + assert (extracted / "data.yaml").is_file(), f"data.yaml not found in {extracted}" + assert (extracted / "images" / "val").is_dir(), f"images/val not found in {extracted}" + + @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_data_converter(tmp_path): """Test dataset conversion functions from COCO to YOLO format and class mappings.""" @@ -641,6 +668,9 @@ def test_classify_transforms_train(image, auto_augment, erasing, force_color_jit @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_model_tune(): """Tune YOLO model for performance improvement.""" + YOLO("yolo26n.pt").tune( + data=["coco8.yaml", "coco8-grayscale.yaml"], plots=False, imgsz=32, epochs=1, iterations=2, device="cpu" + ) YOLO("yolo26n-pose.pt").tune(data="coco8-pose.yaml", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu") YOLO("yolo26n-cls.pt").tune(data="imagenet10", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu") diff --git a/tests/test_solutions.py b/tests/test_solutions.py index dd05a7267e..5892efc7a5 100644 --- a/tests/test_solutions.py +++ b/tests/test_solutions.py @@ -18,13 +18,6 @@ from ultralytics.utils.torch_utils import TORCH_2_4 # Predefined argument values SHOW = False -DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management -CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution -POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution -PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution -PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution -PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution -VERTICAL_VIDEO = "solution_vertical_demo.mp4" # only for vertical line counting REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management HORIZONTAL_LINE = [(10, 200), (540, 200)] # for object counting VERTICAL_LINE = [(320, 0), (320, 400)] # for object counting @@ -50,129 +43,129 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False): @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled for testing due to --slow test errors after YOLOE PR.") @pytest.mark.parametrize( - "name, solution_class, needs_frame_count, video, kwargs", + "name, solution_class, needs_frame_count, video_key, kwargs_update", [ ( "ObjectCounter", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "ObjectCounter", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": HORIZONTAL_LINE, "model": MODEL, "show": SHOW}, ), ( "ObjectCounterVertical", solutions.ObjectCounter, False, - DEMO_VIDEO, + "vertical_video", {"region": VERTICAL_LINE, "model": MODEL, "show": SHOW}, ), ( "ObjectCounterwithOBB", solutions.ObjectCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": "yolo26n-obb.pt", "show": SHOW}, ), ( "Heatmap", solutions.Heatmap, False, - DEMO_VIDEO, + "demo_video", {"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None}, ), ( "HeatmapWithRegion", solutions.Heatmap, False, - DEMO_VIDEO, + "demo_video", {"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW}, ), ( "SpeedEstimator", solutions.SpeedEstimator, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "QueueManager", solutions.QueueManager, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), ( "LineAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "line", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "PieAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "pie", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "BarAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "bar", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), ( "AreaAnalytics", solutions.Analytics, True, - DEMO_VIDEO, + "demo_video", {"analytics_type": "area", "model": MODEL, "show": SHOW, "figsize": (6.4, 3.2)}, ), - ("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}), + ("TrackZone", solutions.TrackZone, False, "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}), ( "ObjectCropper", solutions.ObjectCropper, False, - CROP_VIDEO, + "crop_video", {"temp_crop_dir": "cropped-detections", "model": MODEL, "show": SHOW}, ), ( "ObjectBlurrer", solutions.ObjectBlurrer, False, - DEMO_VIDEO, + "demo_video", {"blur_ratio": 0.02, "model": MODEL, "show": SHOW}, ), ( "InstanceSegmentation", solutions.InstanceSegmentation, False, - DEMO_VIDEO, + "demo_video", {"model": "yolo26n-seg.pt", "show": SHOW}, ), - ("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}), + ("VisionEye", solutions.VisionEye, False, "demo_video", {"model": MODEL, "show": SHOW}), ( "RegionCounter", solutions.RegionCounter, False, - DEMO_VIDEO, + "demo_video", {"region": REGION, "model": MODEL, "show": SHOW}, ), - ("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}), + ("AIGym", solutions.AIGym, False, "pose_video", {"kpts": [6, 8, 10], "show": SHOW}), ( "ParkingManager", solutions.ParkingManagement, False, - PARKING_VIDEO, - {"temp_model": str(PARKING_MODEL), "show": SHOW, "temp_json_file": str(PARKING_AREAS_JSON)}, + "parking_video", + {"model": "parking_model", "show": SHOW, "json_file": "parking_areas"}, ), ( "StreamlitInference", @@ -183,34 +176,31 @@ def process_video(solution, video_path: str, needs_frame_count: bool = False): ), ], ) -def test_solution(name, solution_class, needs_frame_count, video, kwargs, tmp_path): +def test_solution(name, solution_class, needs_frame_count, video_key, kwargs_update, tmp_path, solution_assets): """Test individual Ultralytics solution with video processing and parameter validation.""" - if video: - if name != "ObjectCounterVertical": - safe_download(url=f"{ASSETS_URL}/{video}", dir=tmp_path) - else: - safe_download(url=f"{ASSETS_URL}/{VERTICAL_VIDEO}", dir=tmp_path) - if name == "ParkingManager": - safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=tmp_path) - safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=tmp_path) + # Get video path from persistent cache (no copying needed, read-only access) + video_path = str(solution_assets(video_key)) if video_key else None - elif name == "StreamlitInference": + # Update kwargs to use cached paths for parking manager + kwargs = {} + for key, value in kwargs_update.items(): + if key.startswith("temp_"): + kwargs[key.replace("temp_", "")] = str(tmp_path / value) + elif value == "parking_model": + kwargs[key] = str(solution_assets("parking_model")) + elif value == "parking_areas": + kwargs[key] = str(solution_assets("parking_areas")) + else: + kwargs[key] = value + + if name == "StreamlitInference": if checks.check_imshow(): # do not merge with elif above solution_class(**kwargs).inference() # requires interactive GUI environment return - # Update kwargs to use tmp_path - kwargs_updated = {} - for key in kwargs: - if key.startswith("temp_"): - kwargs_updated[key.replace("temp_", "")] = str(tmp_path / kwargs[key]) - else: - kwargs_updated[key] = kwargs[key] - - video = VERTICAL_VIDEO if name == "ObjectCounterVertical" else video process_video( - solution=solution_class(**kwargs_updated), - video_path=str(tmp_path / video), + solution=solution_class(**kwargs), + video_path=video_path, needs_frame_count=needs_frame_count, ) @@ -220,7 +210,7 @@ def test_left_click_selection(): dc = solutions.DistanceCalculation() dc.boxes, dc.track_ids = [[10, 10, 50, 50]], [1] dc.mouse_event_for_distance(cv2.EVENT_LBUTTONDOWN, 30, 30, None, None) - assert 1 in dc.selected_boxes + assert 1 in dc.selected_boxes, f"Expected track_id 1 in selected_boxes, got {dc.selected_boxes}" def test_right_click_reset(): @@ -228,8 +218,8 @@ def test_right_click_reset(): dc = solutions.DistanceCalculation() dc.selected_boxes, dc.left_mouse_count = {1: [10, 10, 50, 50]}, 1 dc.mouse_event_for_distance(cv2.EVENT_RBUTTONDOWN, 0, 0, None, None) - assert not dc.selected_boxes - assert dc.left_mouse_count == 0 + assert not dc.selected_boxes, f"Expected empty selected_boxes after reset, got {dc.selected_boxes}" + assert dc.left_mouse_count == 0, f"Expected left_mouse_count=0 after reset, got {dc.left_mouse_count}" def test_parking_json_none(): @@ -249,7 +239,7 @@ def test_analytics_graph_not_supported(): analytics.process(im0=np.zeros((640, 480, 3), dtype=np.uint8), frame_number=0) assert False, "Expected ValueError for unsupported chart type" except ValueError as e: - assert "Unsupported analytics_type" in str(e) + assert "Unsupported analytics_type" in str(e), f"Expected 'Unsupported analytics_type' in error, got: {e}" def test_area_chart_padding(): @@ -257,7 +247,7 @@ def test_area_chart_padding(): analytics = solutions.Analytics(analytics_type="area") analytics.update_graph(frame_number=1, count_dict={"car": 2}, plot="area") plot_im = analytics.update_graph(frame_number=2, count_dict={"car": 3, "person": 1}, plot="area") - assert plot_im is not None + assert plot_im is not None, "Area chart plot returned None" def test_config_update_method_with_invalid_argument(): @@ -267,7 +257,7 @@ def test_config_update_method_with_invalid_argument(): obj.update(invalid_key=123) assert False, "Expected ValueError for invalid update argument" except ValueError as e: - assert "is not a valid solution argument" in str(e) + assert "is not a valid solution argument" in str(e), f"Expected validation error message, got: {e}" def test_plot_with_no_masks(): @@ -275,7 +265,7 @@ def test_plot_with_no_masks(): im0 = np.zeros((640, 480, 3), dtype=np.uint8) isegment = solutions.InstanceSegmentation(model="yolo26n-seg.pt") results = isegment(im0) - assert results.plot_im is not None + assert results.plot_im is not None, "Instance segmentation plot returned None" def test_streamlit_handle_video_upload_creates_file(): @@ -291,10 +281,11 @@ def test_streamlit_handle_video_upload_creates_file(): output_path = "ultralytics.mp4" else: output_path = None - assert output_path == "ultralytics.mp4" - assert os.path.exists("ultralytics.mp4") + assert output_path == "ultralytics.mp4", f"Expected output_path 'ultralytics.mp4', got {output_path}" + assert os.path.exists("ultralytics.mp4"), "ultralytics.mp4 file not created" with open("ultralytics.mp4", "rb") as f: - assert f.read() == b"fake video content" + content = f.read() + assert content == b"fake video content", f"File content mismatch: {content}" os.remove("ultralytics.mp4") @@ -329,7 +320,7 @@ def test_similarity_search_complete(tmp_path): img.save(image_dir / f"test_image_{i}.jpg") searcher = solutions.VisualAISearch(data=str(image_dir)) results = searcher("a red and white object") - assert results + assert results, "Similarity search returned empty results" def test_distance_calculation_process_method(): @@ -347,9 +338,9 @@ def test_distance_calculation_process_method(): frame = np.zeros((480, 640, 3), dtype=np.uint8) with patch.object(dc, "extract_tracks"), patch.object(dc, "display_output"), patch("cv2.setMouseCallback"): result = dc.process(frame) - assert isinstance(result, SolutionResults) - assert result.total_tracks == 2 - assert result.pixels_distance > 0 + assert isinstance(result, SolutionResults), f"Expected SolutionResults, got {type(result)}" + assert result.total_tracks == 2, f"Expected 2 tracks, got {result.total_tracks}" + assert result.pixels_distance > 0, f"Expected positive distance, got {result.pixels_distance}" def test_object_crop_with_show_True(): diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 00ea4cb047..1184e63c07 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -__version__ = "8.4.33" +__version__ = "8.4.38" import importlib import os diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 9d5d52aa23..86f7255623 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -170,6 +170,7 @@ CFG_FLOAT_KEYS = frozenset( "warmup_epochs", "box", "cls", + "cls_pw", "dfl", "degrees", "shear", diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 84ddd1ae41..14c9214f5c 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -101,6 +101,7 @@ warmup_momentum: 0.8 # (float) initial momentum during warmup warmup_bias_lr: 0.1 # (float) bias learning rate during warmup box: 7.5 # (float) box loss gain cls: 0.5 # (float) classification loss gain +cls_pw: 0.0 # (float) class weights power for handling class imbalance (0.0=disable, 1.0=full inverse frequency) dfl: 1.5 # (float) distribution focal loss gain pose: 12.0 # (float) pose loss gain (pose tasks) kobj: 1.0 # (float) keypoint objectness loss gain (pose tasks) diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 4c89764771..5f97e602de 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -843,7 +843,12 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat yaml_path = dataset_dir / "data.yaml" if yaml_path.is_file(): try: - if YAML.load(yaml_path).get("hash") == _hash: + cached = YAML.load(yaml_path) + if cached.get("hash") == _hash and all( + (dataset_dir / cached[split]).is_dir() and (dataset_dir / "labels" / split).is_dir() + for split in ("train", "val", "test") + if split in cached + ): return yaml_path except Exception: pass diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index a4046d2454..4b588d7f1a 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -153,7 +153,8 @@ class YOLODataset(BaseDataset): x["hash"] = get_hash(self.label_files + self.im_files) x["results"] = nf, nm, ne, nc, len(self.im_files) x["msgs"] = msgs # warnings - save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) + if x["labels"]: + save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) return x def get_labels(self) -> list[dict]: @@ -182,12 +183,11 @@ class YOLODataset(BaseDataset): LOGGER.info("\n".join(cache["msgs"])) # display warnings # Read cache - [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items labels = cache["labels"] if not labels: - raise RuntimeError( - f"No valid images found in {cache_path}. Images with incorrectly formatted labels are ignored. {HELP_URL}" - ) + issues = "\n ".join(sorted(set(cache["msgs"]))) or "no error details" + raise RuntimeError(f"No valid images found in {cache_path}.\n {issues}\n{HELP_URL}") + [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items self.im_files = [lb["im_file"] for lb in labels] # update im_files # Check if the dataset is all boxes or all segments diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 0ccbcfed0f..423de2ac4f 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -417,7 +417,9 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> dict[str, Any] Returns: (dict[str, Any]): Parsed dataset information and paths. """ - file = check_file(dataset) + file = Path(check_file(dataset)) + if file.is_dir(): + file = find_dataset_yaml(file) # Download (optional) extract_dir = "" diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index d0c241ba18..32ee9d544f 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -476,7 +476,7 @@ class Exporter: m.agnostic_nms = self.args.agnostic_nms m.xyxy = self.args.nms and fmt != "coreml" m.shape = None # reset cached shape for new export input size - if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models + if hasattr(model, "pe") and hasattr(m, "fuse") and not hasattr(m, "lrpc"): # for YOLOE models m.fuse(model.pe.to(self.device)) elif isinstance(m, C2f) and not is_tf_format: # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph @@ -601,9 +601,9 @@ class Exporter: from ultralytics.utils.export.torchscript import torch2torchscript return torch2torchscript( - NMSModel(self.model, self.args) if self.args.nms else self.model, - self.im, - self.file, + model=NMSModel(self.model, self.args) if self.args.nms else self.model, + im=self.im, + output_file=self.file.with_suffix(".torchscript"), optimize=self.args.optimize, metadata=self.metadata, prefix=prefix, @@ -692,9 +692,9 @@ class Exporter: @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): """Export YOLO model to OpenVINO format.""" - from ultralytics.utils.export import torch2openvino + from ultralytics.utils.export.openvino import torch2openvino - # OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023" + # OpenVINO <= 2025.1.0 error on macOS 15.4+: https://github.com/openvinotoolkit/openvino/issues/30023 check_requirements("openvino>=2025.2.0" if MACOS and MACOS_VERSION >= "15.4" else "openvino>=2024.0.0") import openvino as ov @@ -757,16 +757,26 @@ class Exporter: """Export YOLO model to PaddlePaddle format.""" from ultralytics.utils.export.paddle import torch2paddle - return torch2paddle(self.model, self.im, self.file, self.metadata, prefix) + return torch2paddle( + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}"), + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_mnn(self, prefix=colorstr("MNN:")): """Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN.""" from ultralytics.utils.export.mnn import onnx2mnn - f_onnx = self.export_onnx() return onnx2mnn( - f_onnx, self.file, half=self.args.half, int8=self.args.int8, metadata=self.metadata, prefix=prefix + onnx_file=self.export_onnx(), + output_file=self.file.with_suffix(".mnn"), + half=self.args.half, + int8=self.args.int8, + metadata=self.metadata, + prefix=prefix, ) @try_export @@ -775,9 +785,9 @@ class Exporter: from ultralytics.utils.export.ncnn import torch2ncnn return torch2ncnn( - self.model, - self.im, - self.file, + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, "_ncnn_model/"), half=self.args.half, metadata=self.metadata, device=self.device, @@ -986,9 +996,7 @@ class Exporter: """Export YOLO model to TensorFlow GraphDef *.pb format https://github.com/leimao/Frozen-Graph-TensorFlow.""" from ultralytics.utils.export.tensorflow import keras2pb - f = self.file.with_suffix(".pb") - keras2pb(keras_model, f, prefix) - return f + return keras2pb(keras_model, output_file=self.file.with_suffix(".pb"), prefix=prefix) @try_export def export_tflite(self, prefix=colorstr("TensorFlow Lite:")): @@ -1016,11 +1024,13 @@ class Exporter: from ultralytics.utils.export.axelera import torch2axelera + output_dir = self.file.parent / f"{self.file.stem}_axelera_model" return torch2axelera( model=self.model, - file=self.file, + output_dir=output_dir, calibration_dataset=self.get_int8_calibration_dataloader(prefix), transform_fn=self._transform_fn, + model_name=self.file.stem, metadata=self.metadata, prefix=prefix, ) @@ -1032,7 +1042,13 @@ class Exporter: check_executorch_requirements() from ultralytics.utils.export.executorch import torch2executorch - return torch2executorch(self.model, self.file, self.im, metadata=self.metadata, prefix=prefix) + return torch2executorch( + model=self.model, + im=self.im, + output_dir=str(self.file).replace(self.file.suffix, "_executorch_model/"), + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): @@ -1055,10 +1071,9 @@ class Exporter: from ultralytics.utils.export.tensorflow import tflite2edgetpu LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...") - tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix) - f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model - self._add_tflite_metadata(f) - return f + output_file = tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix) + self._add_tflite_metadata(output_file) + return output_file @try_export def export_tfjs(self, prefix=colorstr("TensorFlow.js:")): @@ -1066,12 +1081,15 @@ class Exporter: check_requirements("tensorflowjs") from ultralytics.utils.export.tensorflow import pb2tfjs - f = str(self.file).replace(self.file.suffix, "_web_model") # js dir - f_pb = str(self.file.with_suffix(".pb")) # *.pb path - pb2tfjs(pb_file=f_pb, output_dir=f, half=self.args.half, int8=self.args.int8, prefix=prefix) - # Add metadata - YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml - return f + output_dir = pb2tfjs( + pb_file=str(self.file.with_suffix(".pb")), + output_dir=str(self.file).replace(self.file.suffix, "_web_model/"), + half=self.args.half, + int8=self.args.int8, + prefix=prefix, + ) + YAML.save(Path(output_dir) / "metadata.yaml", self.metadata) + return output_dir @try_export def export_rknn(self, prefix=colorstr("RKNN:")): @@ -1080,7 +1098,13 @@ class Exporter: self.args.opset = min(self.args.opset or 19, 19) # rknn-toolkit expects opset<=19 f_onnx = self.export_onnx() - return onnx2rknn(f_onnx, name=self.args.name, metadata=self.metadata, prefix=prefix) + return onnx2rknn( + onnx_file=f_onnx, + output_dir=str(self.file).replace(self.file.suffix, f"_rknn_model{os.sep}"), + name=self.args.name, + metadata=self.metadata, + prefix=prefix, + ) @try_export def export_imx(self, prefix=colorstr("IMX:")): @@ -1120,11 +1144,11 @@ class Exporter: check_apt_requirements(["openjdk-17-jre"]) return torch2imx( - self.model, - self.file, - self.args.conf, - self.args.iou, - self.args.max_det, + model=self.model, + output_dir=str(self.file).replace(self.file.suffix, "_imx_model/"), + conf=self.args.conf, + iou=self.args.iou, + max_det=self.args.max_det, metadata=self.metadata, dataset=self.get_int8_calibration_dataloader(prefix), prefix=prefix, diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 6cc678b9c0..ee953fbe1f 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -426,7 +426,7 @@ class Model(torch.nn.Module): self._check_is_pytorch_model() return self.model.info(detailed=detailed, verbose=verbose, imgsz=imgsz) - def fuse(self) -> None: + def fuse(self) -> Model: """Fuse Conv2d and BatchNorm2d layers in the model for optimized inference. This method iterates through the model's modules and fuses consecutive Conv2d and BatchNorm2d layers into a @@ -444,6 +444,7 @@ class Model(torch.nn.Module): """ self._check_is_pytorch_model() self.model.fuse() + return self def embed( self, @@ -756,8 +757,6 @@ class Model(torch.nn.Module): checks.check_pip_update_available() - if isinstance(kwargs.get("pretrained", None), (str, Path)): - self.load(kwargs["pretrained"]) # load pretrained weights if provided overrides = YAML.load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides custom = { # NOTE: handle the case when 'cfg' includes 'data'. @@ -781,8 +780,9 @@ class Model(torch.nn.Module): args["resume"] = False self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks) - if not args.get("resume"): # manually set model only if not resuming - self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml) + if not args.get("resume") and self.ckpt: + # Reuse the already-loaded checkpoint model to avoid re-resolving remote weight sources during trainer setup. + self.trainer.model = self.trainer.get_model(weights=self.model, cfg=self.model.yaml) self.model = self.trainer.model self.trainer.train() diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 4aa94dad9a..53c0fe3771 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -335,14 +335,14 @@ class BaseTrainer: self.scaler = ( torch.amp.GradScaler("cuda", enabled=self.amp) if TORCH_2_4 else torch.cuda.amp.GradScaler(enabled=self.amp) ) - if self.world_size > 1: - self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True) - # Check imgsz gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32) # grid size (max stride) self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1) self.stride = gs # for multiscale training + if self.world_size > 1: + self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True) + # Batch size if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size self.args.batch = self.batch_size = self.auto_batch() @@ -350,6 +350,7 @@ class BaseTrainer: self._build_train_pipeline() self.validator = self.get_validator() self.ema = ModelEMA(self.model) + self.set_class_weights() # compute class weights after dataloader is ready if RANK in {-1, 0}: metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val") self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) @@ -531,8 +532,7 @@ class BaseTrainer: self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600) # Save model - if self.args.save or final_epoch: - self.save_model() + if (self.args.save or final_epoch) and self.save_model(): self.run_callbacks("on_model_save") # Scheduler @@ -630,6 +630,11 @@ class BaseTrainer: """Save model training checkpoints with additional metadata.""" import io + ema = deepcopy(unwrap_model(self.ema.ema)).half() + if not all(torch.isfinite(v).all() for v in ema.state_dict().values() if isinstance(v, torch.Tensor)): + LOGGER.warning(f"Skipping checkpoint save at epoch {self.epoch}: EMA contains NaN/Inf") + return False + # Serialize ckpt to a byte buffer once (faster than repeated torch.save() calls) buffer = io.BytesIO() torch.save( @@ -637,7 +642,7 @@ class BaseTrainer: "epoch": self.epoch, "best_fitness": self.best_fitness, "model": None, # resume and final checkpoints derive from EMA - "ema": deepcopy(unwrap_model(self.ema.ema)).half(), + "ema": ema, "updates": self.ema.updates, "optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())), "scaler": self.scaler.state_dict(), @@ -666,6 +671,7 @@ class BaseTrainer: self.best.write_bytes(serialized_ckpt) # save best.pt if (self.save_period > 0) and (self.epoch % self.save_period == 0): (self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt) # save epoch, i.e. 'epoch3.pt' + return True def get_dataset(self): """Get train and validation datasets from data dictionary. @@ -720,7 +726,7 @@ class BaseTrainer: cfg = weights.yaml elif isinstance(self.args.pretrained, (str, Path)): weights, _ = load_checkpoint(self.args.pretrained) - self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights) + self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK in {-1, 0}) # calls Model(cfg, weights) return ckpt def optimizer_step(self): @@ -785,6 +791,10 @@ class BaseTrainer: """Set or update model parameters before training.""" self.model.names = self.data["names"] + def set_class_weights(self): + """Compute and set class weights for handling class imbalance. Override in subclasses.""" + pass + def build_targets(self, preds, targets): """Build target tensors for training YOLO model.""" pass @@ -912,9 +922,11 @@ class BaseTrainer: corrupted = broadcast_list[0] if not corrupted: return False - if epoch == self.start_epoch or not self.last.exists(): + if epoch == self.start_epoch: LOGGER.warning(f"{reason} detected but can not recover from last.pt...") return False # Cannot recover on first epoch, let training continue + if not self.last.exists(): + raise RuntimeError(f"{reason} detected but no valid last.pt is available for recovery") self.nan_recovery_attempts += 1 if self.nan_recovery_attempts > 3: raise RuntimeError(f"Training failed: NaN persisted for {self.nan_recovery_attempts} epochs") @@ -946,7 +958,7 @@ class BaseTrainer: ) self.epochs += ckpt["epoch"] # finetune additional epochs self._load_checkpoint_state(ckpt) - if unwrap_model(self.model).end2end: + if getattr(unwrap_model(self.model), "end2end", False): # initialize loss and resume o2o and o2m args unwrap_model(self.model).criterion = unwrap_model(self.model).init_criterion() unwrap_model(self.model).criterion.updates = start_epoch - 1 diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py index c3f3327572..c8136cf9ed 100644 --- a/ultralytics/engine/tuner.py +++ b/ultralytics/engine/tuner.py @@ -17,11 +17,14 @@ Examples: from __future__ import annotations import gc +import json import random import shutil import subprocess import time +from collections import Counter from datetime import datetime +from pathlib import Path import numpy as np import torch @@ -37,13 +40,13 @@ class Tuner: """A class for hyperparameter tuning of YOLO models. The class evolves YOLO model hyperparameters over a given number of iterations by mutating them according to the - search space and retraining the model to evaluate their performance. Supports both local CSV storage and distributed - MongoDB Atlas coordination for multi-machine hyperparameter optimization. + search space and retraining the model to evaluate their performance. Supports both local NDJSON storage and + distributed MongoDB Atlas coordination for multi-machine hyperparameter optimization. Attributes: space (dict[str, tuple]): Hyperparameter search space containing bounds and scaling factors for mutation. tune_dir (Path): Directory where evolution logs and results will be saved. - tune_csv (Path): Path to the CSV file where evolution logs are saved. + tune_file (Path): Path to the NDJSON file where evolution logs are saved. args (SimpleNamespace): Configuration arguments for the tuning process. callbacks (dict): Callback functions to be executed during tuning. prefix (str): Prefix string for logging messages. @@ -98,6 +101,7 @@ class Tuner: "warmup_momentum": (0.0, 0.95), # warmup initial momentum "box": (1.0, 20.0), # box loss gain "cls": (0.1, 4.0), # cls loss gain (scale with pixels) + "cls_pw": (0.0, 1.0), # cls power weight "dfl": (0.4, 12.0), # dfl loss gain "hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction) "hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction) @@ -124,7 +128,7 @@ class Tuner: self.args.exist_ok = self.args.resume # resume w/ same tune_dir self.tune_dir = get_save_dir(self.args, name=self.args.name or "tune") self.args.name, self.args.exist_ok, self.args.resume = (None, False, False) # reset to not affect training - self.tune_csv = self.tune_dir / "tune_results.csv" + self.tune_file = self.tune_dir / "tune_results.ndjson" self.callbacks = _callbacks or callbacks.get_default_callbacks() self.prefix = colorstr("Tuner: ") callbacks.add_integration_callbacks(self) @@ -192,7 +196,7 @@ class Tuner: Notes: - Creates a fitness index for fast queries of top results - - Falls back to CSV-only mode if connection fails + - Falls back to local NDJSON mode if connection fails - Uses connection pooling and retry logic for production reliability """ self.mongodb = self._connect(mongodb_uri) @@ -214,13 +218,45 @@ class Tuner: except Exception: return [] - def _save_to_mongodb(self, fitness: float, hyperparameters: dict[str, float], metrics: dict, iteration: int): + @staticmethod + def _json_default(x): + """Convert tensor-like values for JSON serialization.""" + return x.item() if hasattr(x, "item") else str(x) + + def _result_record( + self, + iteration: int, + fitness: float, + hyperparameters: dict[str, float], + datasets: dict[str, dict], + save_dirs: dict[str, str] | None = None, + ) -> dict: + """Build one local tuning result record.""" + result = { + "iteration": iteration, + "fitness": round(fitness, 5), + "hyperparameters": hyperparameters, + "datasets": datasets, + } + if save_dirs: + result["save_dirs"] = save_dirs + return result + + def _save_to_mongodb( + self, + fitness: float, + hyperparameters: dict[str, float], + metrics: dict, + datasets: dict[str, dict], + iteration: int, + ): """Save results to MongoDB with proper type conversion. Args: fitness (float): Fitness score achieved with these hyperparameters. hyperparameters (dict[str, float]): Dictionary of hyperparameter values. metrics (dict): Complete training metrics dictionary (mAP, precision, recall, losses, etc.). + datasets (dict[str, dict]): Per-dataset metrics for the iteration. iteration (int): Current iteration number. """ try: @@ -229,6 +265,7 @@ class Tuner: "fitness": fitness, "hyperparameters": {k: (v.item() if hasattr(v, "item") else v) for k, v in hyperparameters.items()}, "metrics": metrics, + "datasets": datasets, "timestamp": datetime.now(), "iteration": iteration, } @@ -236,30 +273,85 @@ class Tuner: except Exception as e: LOGGER.warning(f"{self.prefix}MongoDB save failed: {e}") - def _sync_mongodb_to_csv(self): - """Sync MongoDB results to CSV for plotting compatibility. + def _sync_mongodb_to_file(self): + """Sync MongoDB results to the local NDJSON tuning log. - Downloads all results from MongoDB and writes them to the local CSV file in chronological order. This enables - the existing plotting functions to work seamlessly with distributed MongoDB data. + Downloads all results from MongoDB and writes them to the local NDJSON file in chronological order. This keeps + resume, mutation, and plotting on the same local source of truth when using distributed tuning. """ try: - # Get all results from MongoDB all_results = list(self.collection.find().sort("iteration", 1)) if not all_results: return - # Write to CSV - headers = ",".join(["fitness", *list(self.space.keys())]) + "\n" - with open(self.tune_csv, "w", encoding="utf-8") as f: - f.write(headers) + with open(self.tune_file, "w", encoding="utf-8") as f: for result in all_results: - fitness = result["fitness"] or 0.0 - hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()] - log_row = [round(fitness, 5), *hyp_values] - f.write(",".join(map(str, log_row)) + "\n") + f.write( + json.dumps( + self._result_record( + result["iteration"], + result["fitness"] or 0.0, + result.get("hyperparameters", {}), + result.get("datasets", {}), + result.get("save_dirs"), + ), + default=self._json_default, + ) + + "\n" + ) except Exception as e: - LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}") + LOGGER.warning(f"{self.prefix}MongoDB to NDJSON sync failed: {e}") + + def _load_local_results(self) -> list[dict]: + """Load local tuning results from the NDJSON log.""" + if not self.tune_file.exists(): + return [] + with open(self.tune_file, encoding="utf-8") as f: + return [json.loads(line) for line in f if line.strip()] + + def _local_results_to_array(self, results: list[dict], n: int | None = None) -> np.ndarray | None: + """Convert local NDJSON records to a fitness-plus-hyperparameters numpy array.""" + if not results: + return None + x = np.array( + [ + [r.get("fitness", 0.0)] + + [r.get("hyperparameters", {}).get(k, getattr(self.args, k)) for k in self.space] + for r in results + ], + dtype=float, + ) + if n is None: + return x + order = np.argsort(-x[:, 0]) + return x[order][:n] + + def _save_local_result(self, result: dict): + """Append one tuning result to the local NDJSON log.""" + with open(self.tune_file, "a", encoding="utf-8") as f: + f.write(json.dumps(result, default=self._json_default) + "\n") + + @staticmethod + def _best_metrics(result: dict) -> dict | None: + """Summarize best-result metrics for logging.""" + datasets = result.get("datasets", {}) + if len(datasets) == 1: + return next(iter(datasets.values())) + if len(datasets) > 1: + return {k: round(v.get("fitness") or 0.0, 5) for k, v in datasets.items()} + return None + + @staticmethod + def _dataset_names(data: list) -> list[str]: + """Create stable unique dataset names for logging and per-run directories.""" + stems = [Path(str(d)).stem for d in data] + totals, seen = Counter(stems), Counter() + names = [] + for stem in stems: + seen[stem] += 1 + names.append(f"{stem}-{seen[stem]}" if totals[stem] > 1 else stem) + return names @staticmethod def _crossover(x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray: @@ -308,13 +400,9 @@ class Tuner: elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]]) - # Fall back to CSV if MongoDB unavailable or empty - if x is None and self.tune_csv.exists(): - csv_data = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1) - if len(csv_data) > 0: - fitness = csv_data[:, 0] # first column - order = np.argsort(-fitness) - x = csv_data[order][:n] # top-n sorted by fitness DESC + # Fall back to local NDJSON if MongoDB unavailable or empty + if x is None: + x = self._local_results_to_array(self._load_local_results(), n=n) # Mutate if we have data, otherwise use defaults if x is not None: @@ -351,10 +439,10 @@ class Tuner: """Execute the hyperparameter evolution process when the Tuner instance is called. This method iterates through the specified number of iterations, performing the following steps: - 1. Sync MongoDB results to CSV (if using distributed mode) + 1. Sync MongoDB results to local NDJSON (if using distributed mode) 2. Mutate hyperparameters using the best previous results or defaults 3. Train a YOLO model with the mutated hyperparameters - 4. Log fitness scores and hyperparameters to MongoDB and/or CSV + 4. Log fitness scores and hyperparameters to MongoDB and/or NDJSON 5. Track the best performing configuration across all iterations Args: @@ -362,17 +450,17 @@ class Tuner: cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning. """ t0 = time.time() - best_save_dir, best_metrics = None, None + self.tune_dir.mkdir(parents=True, exist_ok=True) (self.tune_dir / "weights").mkdir(parents=True, exist_ok=True) + best_save_dirs = {} - # Sync MongoDB to CSV at startup for proper resume logic + # Sync MongoDB to local NDJSON at startup for proper resume logic if self.mongodb: - self._sync_mongodb_to_csv() + self._sync_mongodb_to_file() start = 0 - if self.tune_csv.exists(): - x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1) - start = x.shape[0] + if self.tune_file.exists(): + start = len(self._load_local_results()) LOGGER.info(f"{self.prefix}Resuming tuning run {self.tune_dir} from iteration {start + 1}...") for i in range(start, iterations): # Linearly decay sigma from 0.2 โ†’ 0.1 over first 300 iterations @@ -383,69 +471,100 @@ class Tuner: mutated_hyp = self._mutate(sigma=sigma_i) LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}") - metrics = {} train_args = {**vars(self.args), **mutated_hyp} - save_dir = get_save_dir(get_cfg(train_args)) - train_args["save_dir"] = str(save_dir) # pass save_dir to subprocess to ensure same path is used - weights_dir = save_dir / "weights" - try: - # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang) - launch = [__import__("sys").executable, "-m", "ultralytics.cfg.__init__"] # workaround yolo not found - cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())] - return_code = subprocess.run(cmd, check=True).returncode - ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt") - metrics = torch_load(ckpt_file)["train_metrics"] - assert return_code == 0, "training failed" + data = train_args.pop("data") + if not isinstance(data, (list, tuple)): + data = [data] + dataset_names = self._dataset_names(data) + save_dir = ( + [get_save_dir(get_cfg(train_args))] + if len(data) == 1 + else [get_save_dir(get_cfg(train_args), name=name) for name in dataset_names] + ) + weights_dir = [s / "weights" for s in save_dir] + metrics = {} + all_fitness = [] + dataset_metrics = {} + for j, (d, dataset) in enumerate(zip(data, dataset_names)): + metrics_i = {} + try: + train_args["data"] = d + train_args["save_dir"] = str(save_dir[j]) # pass save_dir to subprocess to ensure same path is used + # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang) + launch = [ + __import__("sys").executable, + "-m", + "ultralytics.cfg.__init__", + ] # workaround yolo not found + cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())] + return_code = subprocess.run(cmd, check=True).returncode + ckpt_file = weights_dir[j] / ("best.pt" if (weights_dir[j] / "best.pt").exists() else "last.pt") + metrics_i = torch_load(ckpt_file)["train_metrics"] + metrics = metrics_i + assert return_code == 0, "training failed" - # Cleanup - time.sleep(1) - gc.collect() - torch.cuda.empty_cache() + # Cleanup + time.sleep(1) + gc.collect() + torch.cuda.empty_cache() - except Exception as e: - LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}") + except Exception as e: + LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}") - # Save results - MongoDB takes precedence - fitness = metrics.get("fitness") or 0.0 + # Save results - MongoDB takes precedence + dataset_metrics[dataset] = metrics_i or {"fitness": 0.0} + all_fitness.append(dataset_metrics[dataset].get("fitness") or 0.0) + fitness = sum(all_fitness) / len(all_fitness) + result = self._result_record( + i + 1, + fitness, + mutated_hyp, + dataset_metrics, + {dataset: str(s) for dataset, s in zip(dataset_names, save_dir)}, + ) + stop_after_iteration = False if self.mongodb: - self._save_to_mongodb(fitness, mutated_hyp, metrics, i + 1) - self._sync_mongodb_to_csv() + self._save_to_mongodb(fitness, mutated_hyp, metrics, dataset_metrics, i + 1) + self._sync_mongodb_to_file() total_mongo_iterations = self.collection.count_documents({}) if total_mongo_iterations >= iterations: - LOGGER.info( - f"{self.prefix}Target iterations ({iterations}) reached in MongoDB ({total_mongo_iterations}). Stopping." - ) - break + stop_after_iteration = True else: - # Save to CSV only if no MongoDB - log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()] - headers = "" if self.tune_csv.exists() else (",".join(["fitness", *list(self.space.keys())]) + "\n") - with open(self.tune_csv, "a", encoding="utf-8") as f: - f.write(headers + ",".join(map(str, log_row)) + "\n") + self._save_local_result(result) # Get best results - x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1) + results = self._load_local_results() + x = self._local_results_to_array(results) fitness = x[:, 0] # first column best_idx = fitness.argmax() + best_result = results[best_idx] + current_best_save_dirs = best_result.get("save_dirs", {}) best_is_current = best_idx == i if best_is_current: - best_save_dir = str(save_dir) - best_metrics = {k: round(v, 5) for k, v in metrics.items()} - for ckpt in weights_dir.glob("*.pt"): - shutil.copy2(ckpt, self.tune_dir / "weights") - elif cleanup and best_save_dir: - shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space + if cleanup: + for s in best_save_dirs.values(): + if s not in current_best_save_dirs.values(): + shutil.rmtree(s, ignore_errors=True) + if len(data) == 1: + for ckpt in weights_dir[0].glob("*.pt"): + shutil.copy2(ckpt, self.tune_dir / "weights") + best_save_dirs = current_best_save_dirs + elif cleanup: + for s in save_dir: + shutil.rmtree(s, ignore_errors=True) # remove iteration dirs to reduce storage space + best_save_dirs = current_best_save_dirs # Plot tune results - plot_tune_results(str(self.tune_csv)) + plot_tune_results(str(self.tune_file)) # Save and print tune results header = ( f"{self.prefix}{i + 1}/{iterations} iterations complete โœ… ({time.time() - t0:.2f}s)\n" f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n" f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n" - f"{self.prefix}Best fitness metrics are {best_metrics}\n" - f"{self.prefix}Best fitness model is {best_save_dir}" + f"{self.prefix}Best fitness metrics are {self._best_metrics(best_result)}\n" + f"{self.prefix}Best fitness model is " + f"{self.tune_dir / 'weights' if len(best_result.get('datasets', {})) == 1 else 'not saved for multi-dataset tuning'}" ) LOGGER.info("\n" + header) data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])} @@ -455,3 +574,8 @@ class Tuner: header=remove_colorstr(header.replace(self.prefix, "# ")) + "\n", ) YAML.print(self.tune_dir / "best_hyperparameters.yaml") + if stop_after_iteration: + LOGGER.info( + f"{self.prefix}Target iterations ({iterations}) reached in MongoDB ({total_mongo_iterations}). Stopping." + ) + break diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 700a9a1513..b3b9831a23 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -2273,8 +2273,9 @@ class SAM3SemanticPredictor(SAM3Predictor): """Run inference on the extracted features with optional bounding boxes and labels.""" # NOTE: priority: bboxes > text > pre-set classes nc = 1 if bboxes is not None else len(text) if text is not None else len(self.model.names) - geometric_prompt = self._get_dummy_prompt(nc) + geometric_prompt = None if bboxes is not None: + geometric_prompt = self._get_dummy_prompt(nc) for i in range(len(bboxes)): geometric_prompt.append_boxes(bboxes[[i]], labels[[i]]) if text is None: diff --git a/ultralytics/models/sam/sam3/decoder.py b/ultralytics/models/sam/sam3/decoder.py index 32cb43d708..c1fbd838fd 100644 --- a/ultralytics/models/sam/sam3/decoder.py +++ b/ultralytics/models/sam/sam3/decoder.py @@ -330,7 +330,7 @@ class TransformerDecoder(nn.Module): # cache miss, will create compilation issue # In case we're not compiling, we'll still rely on the dict-based cache if feat_size not in self.coord_cache: - self.coord_cache[feat_size] = self._get_coords(H, W, reference_boxes.device) + self.coord_cache[feat_size] = self._get_coords(H, W, reference_boxes.device, reference_boxes.dtype) coords_h, coords_w = self.coord_cache[feat_size] assert coords_h.shape == (H,) @@ -522,7 +522,7 @@ class TransformerDecoder(nn.Module): # clamp to mitigate numerical issues if self.clamp_presence_logits: - intermediate_layer_presence_logits.clamp( + intermediate_layer_presence_logits.clamp_( min=-self.clamp_presence_logit_max_val, max=self.clamp_presence_logit_max_val, ) diff --git a/ultralytics/models/sam/sam3/sam3_image.py b/ultralytics/models/sam/sam3/sam3_image.py index 105aecaf86..92efe160c5 100644 --- a/ultralytics/models/sam/sam3/sam3_image.py +++ b/ultralytics/models/sam/sam3/sam3_image.py @@ -290,15 +290,18 @@ class SAM3SemanticModel(torch.nn.Module): self, backbone_out, batch=len(text_ids) ) backbone_out.update({k: v for k, v in self.text_embeddings.items()}) - with torch.profiler.record_function("SAM3Image._encode_prompt"): - prompt, prompt_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt) # index text features (note that regardless of early or late fusion, the batch size of # `txt_feats` is always the number of *prompts* in the encoder) txt_feats = backbone_out["language_features"][:, text_ids] txt_masks = backbone_out["language_mask"][text_ids] - # encode text - prompt = torch.cat([txt_feats, prompt], dim=0) - prompt_mask = torch.cat([txt_masks, prompt_mask], dim=1) + if geometric_prompt is not None: + with torch.profiler.record_function("SAM3Image._encode_prompt"): + geo_prompt, geo_mask = self._encode_prompt(img_feats, img_pos_embeds, vis_feat_sizes, geometric_prompt) + prompt = torch.cat([txt_feats, geo_prompt], dim=0) + prompt_mask = torch.cat([txt_masks, geo_mask], dim=1) + else: + prompt = txt_feats + prompt_mask = txt_masks # Run the encoder with torch.profiler.record_function("SAM3Image._run_encoder"): diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py index 89d70bd764..eea031d7ed 100644 --- a/ultralytics/models/yolo/detect/train.py +++ b/ultralytics/models/yolo/detect/train.py @@ -147,7 +147,25 @@ class DetectionTrainer(BaseTrainer): self.model.args = self.args # attach hyperparameters to model if getattr(self.model, "end2end"): self.model.set_head_attr(max_det=self.args.max_det) - # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc + + def set_class_weights(self): + """Compute and set class weights for handling class imbalance. + + Class weights are computed based on inverse class frequency in the training dataset, + raised to the power of cls_pw (0 < cls_pw <= 1 dampens, cls_pw > 1 amplifies). + Final weights are normalized so their mean equals 1.0. + """ + assert 0 <= self.args.cls_pw <= 1.0, "cls_pw must be in the range [0, 1]" + if self.args.cls_pw == 0.0: + return + classes = np.concatenate([lb["cls"].flatten() for lb in self.train_loader.dataset.labels], 0) + class_counts = np.bincount(classes.astype(int), minlength=self.data["nc"]).astype(np.float32) + class_counts = np.where(class_counts == 0, 1.0, class_counts) + + weights = (1.0 / class_counts) ** self.args.cls_pw # apply power directly + weights = weights / weights.mean() # normalize so mean equals 1.0 + self.model.class_weights = torch.from_numpy(weights).to(self.device) + LOGGER.info(f"Class weights: {self.model.class_weights.cpu().numpy().round(3)}") def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True): """Return a YOLO detection model. diff --git a/ultralytics/models/yolo/pose/train.py b/ultralytics/models/yolo/pose/train.py index 44ac7f4c17..2e9834ee31 100644 --- a/ultralytics/models/yolo/pose/train.py +++ b/ultralytics/models/yolo/pose/train.py @@ -8,7 +8,7 @@ from typing import Any from ultralytics.models import yolo from ultralytics.nn.tasks import PoseModel -from ultralytics.utils import DEFAULT_CFG +from ultralytics.utils import DEFAULT_CFG, RANK from ultralytics.utils.torch_utils import unwrap_model @@ -72,7 +72,11 @@ class PoseTrainer(yolo.detect.DetectionTrainer): (PoseModel): Initialized pose estimation model. """ model = PoseModel( - cfg, nc=self.data["nc"], ch=self.data["channels"], data_kpt_shape=self.data["kpt_shape"], verbose=verbose + cfg, + nc=self.data["nc"], + ch=self.data["channels"], + data_kpt_shape=self.data["kpt_shape"], + verbose=verbose and RANK == -1, ) if weights: model.load(weights) diff --git a/ultralytics/nn/backends/axelera.py b/ultralytics/nn/backends/axelera.py index 2720614f38..03222f0a25 100644 --- a/ultralytics/nn/backends/axelera.py +++ b/ultralytics/nn/backends/axelera.py @@ -28,7 +28,7 @@ class AxeleraBackend(BaseBackend): except ImportError: check_requirements( "axelera-rt==1.6.0rc3", - cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple", + cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple --pre", ) from axelera.runtime import op diff --git a/ultralytics/nn/backends/coreml.py b/ultralytics/nn/backends/coreml.py index 9f96c66ec7..d36dfc2e56 100644 --- a/ultralytics/nn/backends/coreml.py +++ b/ultralytics/nn/backends/coreml.py @@ -32,7 +32,9 @@ class CoreMLBackend(BaseBackend): LOGGER.info(f"Loading {weight} for CoreML inference...") self.model = ct.models.MLModel(weight) - self.dynamic = self.model.get_spec().description.input[0].type.HasField("multiArrayType") + spec = self.model.get_spec() + self.input_name = spec.description.input[0].name + self.dynamic = spec.description.input[0].type.HasField("multiArrayType") # Load metadata self.apply_metadata(dict(self.model.user_defined_metadata)) @@ -50,7 +52,7 @@ class CoreMLBackend(BaseBackend): h, w = im.shape[1:3] im = im.transpose(0, 3, 1, 2) if self.dynamic else Image.fromarray((im[0] * 255).astype("uint8")) - y = self.model.predict({"image": im}) + y = self.model.predict({self.input_name: im}) if "confidence" in y: # NMS included from ultralytics.utils.ops import xywh2xyxy diff --git a/ultralytics/nn/backends/openvino.py b/ultralytics/nn/backends/openvino.py index cd2e734973..842b630a7b 100644 --- a/ultralytics/nn/backends/openvino.py +++ b/ultralytics/nn/backends/openvino.py @@ -7,7 +7,7 @@ from pathlib import Path import numpy as np import torch -from ultralytics.utils import LOGGER +from ultralytics.utils import ARM64, LINUX, LOGGER from ultralytics.utils.checks import check_requirements from .base import BaseBackend @@ -31,14 +31,15 @@ class OpenVINOBackend(BaseBackend): import openvino as ov core = ov.Core() - device_name = "AUTO" + fallback_device = "CPU" if core.available_devices == ["CPU"] else "AUTO" + device_name = fallback_device if isinstance(self.device, str) and self.device.startswith("intel"): device_name = self.device.split(":")[1].upper() self.device = torch.device("cpu") if device_name not in core.available_devices: - LOGGER.warning(f"OpenVINO device '{device_name}' not available. Using 'AUTO' instead.") - device_name = "AUTO" + LOGGER.warning(f"OpenVINO device '{device_name}' not available. Using '{fallback_device}' instead.") + device_name = fallback_device w = Path(weight) if not w.is_file(): @@ -57,11 +58,15 @@ class OpenVINOBackend(BaseBackend): # Set inference mode self.inference_mode = "CUMULATIVE_THROUGHPUT" if self.dynamic and self.batch > 1 else "LATENCY" + config = {"PERFORMANCE_HINT": self.inference_mode} + if LINUX and ARM64 and device_name == "CPU": + config["EXECUTION_MODE_HINT"] = ov.properties.hint.ExecutionMode.ACCURACY + config["INFERENCE_PRECISION_HINT"] = ov.Type.f32 self.ov_compiled_model = core.compile_model( ov_model, device_name=device_name, - config={"PERFORMANCE_HINT": self.inference_mode}, + config=config, ) LOGGER.info( f"Using OpenVINO {self.inference_mode} mode for batch={self.batch} inference on " diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 20d1734e60..d4cc1c4fe8 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -1676,11 +1676,17 @@ class AAttn(nn.Module): self.num_heads = num_heads self.head_dim = head_dim = dim // num_heads - all_head_dim = head_dim * self.num_heads + self.all_head_dim = all_head_dim = head_dim * self.num_heads self.qkv = Conv(dim, all_head_dim * 3, 1, act=False) self.proj = Conv(all_head_dim, dim, 1, act=False) - self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False) + self.pe = Conv(all_head_dim, all_head_dim, 7, 1, 3, g=all_head_dim, act=False) + + def __setstate__(self, state): + """Add missing all_head_dim attribute to old checkpoints.""" + super().__setstate__(state) + if not hasattr(self, "all_head_dim"): + self.all_head_dim = self.head_dim * self.num_heads def forward(self, x: torch.Tensor) -> torch.Tensor: """Process the input tensor through the area-attention. @@ -1691,12 +1697,12 @@ class AAttn(nn.Module): Returns: (torch.Tensor): Output tensor after area-attention. """ - B, C, H, W = x.shape + B, _, H, W = x.shape N = H * W qkv = self.qkv(x).flatten(2).transpose(1, 2) if self.area > 1: - qkv = qkv.reshape(B * self.area, N // self.area, C * 3) + qkv = qkv.reshape(B * self.area, N // self.area, self.all_head_dim * 3) B, N, _ = qkv.shape q, k, v = ( qkv.view(B, N, self.num_heads, self.head_dim * 3) @@ -1710,12 +1716,12 @@ class AAttn(nn.Module): v = v.permute(0, 3, 1, 2) if self.area > 1: - x = x.reshape(B // self.area, N * self.area, C) - v = v.reshape(B // self.area, N * self.area, C) + x = x.reshape(B // self.area, N * self.area, self.all_head_dim) + v = v.reshape(B // self.area, N * self.area, self.all_head_dim) B, N, _ = x.shape - x = x.reshape(B, H, W, C).permute(0, 3, 1, 2).contiguous() - v = v.reshape(B, H, W, C).permute(0, 3, 1, 2).contiguous() + x = x.reshape(B, H, W, self.all_head_dim).permute(0, 3, 1, 2).contiguous() + v = v.reshape(B, H, W, self.all_head_dim).permute(0, 3, 1, 2).contiguous() x = x + self.pe(v) return self.proj(x) diff --git a/ultralytics/solutions/config.py b/ultralytics/solutions/config.py index 1eb23e881a..456d4ea798 100644 --- a/ultralytics/solutions/config.py +++ b/ultralytics/solutions/config.py @@ -23,7 +23,7 @@ class SolutionConfig: show_conf (bool): Whether to show confidence scores on the visual output. show_labels (bool): Whether to display class labels on visual output. region (list[tuple[int, int]], optional): Polygonal region or line for object counting. - colormap (int, optional): OpenCV colormap constant for visual overlays (e.g., cv2.COLORMAP_JET). + colormap (int, optional): OpenCV colormap constant for visual overlays (e.g., cv2.COLORMAP_DEEPGREEN). show_in (bool): Whether to display count number for objects entering the region. show_out (bool): Whether to display count number for objects leaving the region. up_angle (float): Upper angle threshold used in pose-based workouts monitoring. diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py index c44d5173a3..5d1398b3d7 100644 --- a/ultralytics/solutions/solutions.py +++ b/ultralytics/solutions/solutions.py @@ -466,7 +466,8 @@ class SolutionAnnotator(Annotator): Args: keypoints (list[list[float]]): Keypoints data to be plotted, each in format [x, y, confidence]. - indices (list[int], optional): Keypoint indices to be plotted. + indices (list[int], optional): Keypoint indices to be plotted. The drawing order follows the order of this + list. radius (int): Keypoint radius. conf_thresh (float): Confidence threshold for keypoints. @@ -478,7 +479,12 @@ class SolutionAnnotator(Annotator): Modifies self.im in-place. """ indices = indices or [2, 5, 7] - points = [(int(k[0]), int(k[1])) for i, k in enumerate(keypoints) if i in indices and k[2] >= conf_thresh] + n = len(keypoints) + points = [ + (int(keypoints[j][0]), int(keypoints[j][1])) + for j in indices + if 0 <= j < n and (float(keypoints[j][2]) if len(keypoints[j]) > 2 else 1.0) >= conf_thresh + ] # Draw lines between consecutive points for start, end in zip(points[:-1], points[1:]): diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py index bf8336dc30..59b9fca4a0 100644 --- a/ultralytics/trackers/byte_tracker.py +++ b/ultralytics/trackers/byte_tracker.py @@ -254,7 +254,7 @@ class BYTETracker: removed_stracks (list[STrack]): List of removed tracks. frame_id (int): The current frame ID. args (Namespace): Command-line arguments. - max_time_lost (int): The maximum frames for a track to be considered as 'lost'. + max_frames_lost (int): The maximum frames for a track to be considered as 'lost'. kalman_filter (KalmanFilterXYAH): Kalman Filter object. Methods: @@ -289,7 +289,7 @@ class BYTETracker: self.frame_id = 0 self.args = args - self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer) + self.max_frames_lost = args.track_buffer self.kalman_filter = self.get_kalmanfilter() self.reset_id() @@ -391,7 +391,7 @@ class BYTETracker: activated_stracks.append(track) # Step 5: Update state for track in self.lost_stracks: - if self.frame_id - track.end_frame > self.max_time_lost: + if self.frame_id - track.end_frame > self.max_frames_lost: track.mark_removed() removed_stracks.append(track) diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index faa2e3e5a9..f3ec0da3f5 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -323,7 +323,7 @@ class RF100Benchmark: yaml_data = YAML.load(path) yaml_data["train"] = "train/images" yaml_data["val"] = "valid/images" - YAML.dump(yaml_data, path) + YAML.save(path, yaml_data) def evaluate(self, yaml_path: str, val_log_file: str, eval_log_file: str, list_ind: int): """Evaluate model performance on validation results. diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py index 0c6cb2ec45..5b35c326bf 100644 --- a/ultralytics/utils/callbacks/base.py +++ b/ultralytics/utils/callbacks/base.py @@ -8,67 +8,69 @@ from copy import deepcopy def on_pretrain_routine_start(trainer): - """Called before the pretraining routine starts.""" + """Called at the beginning of the pre-training routine, before data loading and model setup.""" pass def on_pretrain_routine_end(trainer): - """Called after the pretraining routine ends.""" + """Called at the end of the pre-training routine, after data loading and model setup are complete.""" pass def on_train_start(trainer): - """Called when the training starts.""" + """Called when the training starts, before the first epoch begins.""" pass def on_train_epoch_start(trainer): - """Called at the start of each training epoch.""" + """Called at the start of each training epoch, before batch iteration begins.""" pass def on_train_batch_start(trainer): - """Called at the start of each training batch.""" + """Called at the start of each training batch, before the forward pass.""" pass def optimizer_step(trainer): - """Called when the optimizer takes a step.""" + """Called during the optimizer step. Reserved for custom integrations; not called by default.""" pass def on_before_zero_grad(trainer): - """Called before the gradients are set to zero.""" + """Called before the gradients are set to zero. Reserved for custom integrations; not called by default.""" pass def on_train_batch_end(trainer): - """Called at the end of each training batch.""" + """Called at the end of each training batch, after the backward pass. Optimizer step may be deferred by + accumulation. + """ pass def on_train_epoch_end(trainer): - """Called at the end of each training epoch.""" + """Called at the end of each training epoch, after all batches but before validation.""" pass def on_fit_epoch_end(trainer): - """Called at the end of each fit epoch (train + val).""" + """Called at the end of each fit epoch (train + val), after validation and any checkpoint save.""" pass def on_model_save(trainer): - """Called when the model is saved.""" + """Called when the model checkpoint is saved, after validation.""" pass def on_train_end(trainer): - """Called when the training ends.""" + """Called when the training ends, after final evaluation of the best model.""" pass def on_params_update(trainer): - """Called when the model parameters are updated.""" + """Called when the model parameters are updated. Reserved for custom integrations; not called by default.""" pass diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py index 9d8d1a9c4a..b7e8dc3634 100644 --- a/ultralytics/utils/callbacks/wb.py +++ b/ultralytics/utils/callbacks/wb.py @@ -129,13 +129,19 @@ def on_pretrain_routine_start(trainer): """Initialize and start wandb project if module is present.""" if not wb.run: from datetime import datetime + from pathlib import Path name = str(trainer.args.name).replace("/", "-").replace(" ", "_") + latest_run = Path(trainer.save_dir) / "wandb" / "latest-run" + resuming = trainer.args.resume and latest_run.exists() wb.init( project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics", name=name, config=vars(trainer.args), - id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # add unique id + id=latest_run.resolve().name.split("-", 2)[2] + if resuming + else f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", + resume="allow" if resuming else None, dir=str(trainer.save_dir), ) diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 4471793c4d..e6a217363b 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -478,7 +478,10 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=() text=True, ) return subprocess.check_output( - f"pip install --no-cache-dir {packages} {commands}", shell=True, stderr=subprocess.STDOUT, text=True + f'"{sys.executable}" -m pip install --no-cache-dir {packages} {commands}', + shell=True, + stderr=subprocess.STDOUT, + text=True, ) s = " ".join(f'"{x}"' for x in pkgs) # console string diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py index f36d1ffe0e..a17e18d90b 100644 --- a/ultralytics/utils/downloads.py +++ b/ultralytics/utils/downloads.py @@ -314,67 +314,73 @@ def safe_download( >>> link = "https://ultralytics.com/assets/bus.jpg" >>> path = safe_download(link) """ - gdrive = url.startswith("https://drive.google.com/") # check if the URL is a Google Drive link - if gdrive: - url, file = get_google_drive_file_info(url) - url = url.replace(" ", "%20") # encode spaces for curl/urllib compatibility + url = str(url) + if "://" not in url and Path(url).is_file(): # local file path ('://' check required in Windows Python<3.10) + f = Path(url) + else: + gdrive = url.startswith("https://drive.google.com/") # check if the URL is a Google Drive link + if gdrive: + url, file = get_google_drive_file_info(url) + url = url.replace(" ", "%20") # encode spaces for curl/urllib compatibility - f = Path(dir or ".") / (file or url2file(url)) # URL converted to filename - if "://" not in str(url) and Path(url).is_file(): # URL exists ('://' check required in Windows Python<3.10) - f = Path(url) # filename - elif not f.is_file(): # URL and file do not exist - uri = (url if gdrive else clean_url(url)).replace(ASSETS_URL, "https://ultralytics.com/assets") # clean - desc = f"Downloading {uri} to '{f}'" - f.parent.mkdir(parents=True, exist_ok=True) # make directory if missing - curl_installed = shutil.which("curl") - for i in range(retry + 1): - try: - if (curl or i > 0) and curl_installed: # curl download with retry, continue - s = "sS" * (not progress) # silent - r = subprocess.run(["curl", "-#", f"-{s}L", url, "-o", f, "--retry", "3", "-C", "-"]).returncode - assert r == 0, f"Curl return value {r}" - expected_size = None # Can't get size with curl - else: # urllib download - with request.urlopen(url) as response: - expected_size = int(response.getheader("Content-Length", 0)) - if i == 0 and expected_size > 1048576: - check_disk_space(expected_size, path=f.parent) - buffer_size = max(8192, min(1048576, expected_size // 1000)) if expected_size else 8192 - with TQDM( - total=expected_size, - desc=desc, - disable=not progress, - unit="B", - unit_scale=True, - unit_divisor=1024, - ) as pbar: - with open(f, "wb") as f_opened: - while True: - data = response.read(buffer_size) - if not data: - break - f_opened.write(data) - pbar.update(len(data)) + f = Path(dir or ".") / (file or url2file(url)) # URL converted to filename + if not f.is_file(): # URL and file do not exist + uri = (url if gdrive else clean_url(url)).replace(ASSETS_URL, "https://ultralytics.com/assets") # clean + desc = f"Downloading {uri} to '{f}'" + f.parent.mkdir(parents=True, exist_ok=True) # make directory if missing + curl_installed = shutil.which("curl") + for i in range(retry + 1): + try: + if (curl or i > 0) and curl_installed: # curl download with retry, continue + s = "sS" * (not progress) # silent + r = subprocess.run(["curl", "-#", f"-{s}L", url, "-o", f, "--retry", "3", "-C", "-"]).returncode + assert r == 0, f"Curl return value {r}" + expected_size = None # Can't get size with curl + else: # urllib download + with request.urlopen(url) as response: + expected_size = int(response.getheader("Content-Length", 0)) + if i == 0 and expected_size > 1048576: + check_disk_space(expected_size, path=f.parent) + buffer_size = max(8192, min(1048576, expected_size // 1000)) if expected_size else 8192 + with TQDM( + total=expected_size, + desc=desc, + disable=not progress, + unit="B", + unit_scale=True, + unit_divisor=1024, + ) as pbar: + with open(f, "wb") as f_opened: + while True: + data = response.read(buffer_size) + if not data: + break + f_opened.write(data) + pbar.update(len(data)) - if f.exists(): - file_size = f.stat().st_size - if file_size > min_bytes: - # Check if download is complete (only if we have expected_size) - if expected_size and file_size != expected_size: - LOGGER.warning( - f"Partial download: {file_size}/{expected_size} bytes ({file_size / expected_size * 100:.1f}%)" - ) - else: - break # success - f.unlink() # remove partial downloads - except MemoryError: - raise # Re-raise immediately - no point retrying if insufficient disk space - except Exception as e: - if i == 0 and not is_online(): - raise ConnectionError(emojis(f"โŒ Download failure for {uri}. Environment may be offline.")) from e - elif i >= retry: - raise ConnectionError(emojis(f"โŒ Download failure for {uri}. Retry limit reached. {e}")) from e - LOGGER.warning(f"Download failure, retrying {i + 1}/{retry} {uri}... {e}") + if f.exists(): + file_size = f.stat().st_size + if file_size > min_bytes: + # Check if download is complete (only if we have expected_size) + if expected_size and file_size != expected_size: + LOGGER.warning( + f"Partial download: {file_size}/{expected_size} bytes ({file_size / expected_size * 100:.1f}%)" + ) + else: + break # success + f.unlink() # remove partial downloads + except MemoryError: + raise # Re-raise immediately - no point retrying if insufficient disk space + except Exception as e: + if i == 0 and not is_online(): + raise ConnectionError( + emojis(f"โŒ Download failure for {uri}. Environment may be offline.") + ) from e + elif i >= retry: + raise ConnectionError( + emojis(f"โŒ Download failure for {uri}. Retry limit reached. {e}") + ) from e + LOGGER.warning(f"Download failure, retrying {i + 1}/{retry} {uri}... {e}") if unzip and f.exists() and f.suffix in {"", ".zip", ".tar", ".gz"}: from zipfile import is_zipfile diff --git a/ultralytics/utils/export/axelera.py b/ultralytics/utils/export/axelera.py index 27ef066f52..35cd24f516 100644 --- a/ultralytics/utils/export/axelera.py +++ b/ultralytics/utils/export/axelera.py @@ -16,24 +16,26 @@ from ultralytics.utils.checks import check_requirements def torch2axelera( model: torch.nn.Module, - file: str | Path, + output_dir: Path | str, calibration_dataset: torch.utils.data.DataLoader, transform_fn: Callable[[Any], np.ndarray], + model_name: str = "model", metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Convert a YOLO model to Axelera format. Args: model (torch.nn.Module): Source YOLO model for quantization. - file (str | Path): Source model file path used to derive output names. + output_dir (Path | str): Directory to save the exported Axelera model. calibration_dataset (torch.utils.data.DataLoader): Calibration dataloader for quantization. transform_fn (Callable[[Any], np.ndarray]): Calibration preprocessing transform function. + model_name (str, optional): Name for the compiled model. Defaults to "model". metadata (dict | None, optional): Optional metadata to save as YAML. Defaults to None. prefix (str, optional): Prefix for log messages. Defaults to "". Returns: - (Path): Path to exported Axelera model directory. + (str): Path to exported Axelera model directory. """ prev_protobuf = os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" @@ -42,7 +44,7 @@ def torch2axelera( except ImportError: check_requirements( "axelera-devkit==1.6.0rc3", - cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple", + cmds="--extra-index-url https://software.axelera.ai/artifactory/api/pypi/axelera-pypi/simple --pre", ) from axelera import compiler @@ -51,10 +53,8 @@ def torch2axelera( LOGGER.info(f"\n{prefix} starting export with Axelera compiler...") - file = Path(file) - model_name = file.stem - export_path = Path(f"{model_name}_axelera_model") - export_path.mkdir(exist_ok=True) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) axelera_model_metadata = extract_ultralytics_metadata(model) config = CompilerConfig( @@ -71,22 +71,22 @@ def torch2axelera( config=config, transform_fn=transform_fn, ) - compiler.compile(model=qmodel, config=config, output_dir=export_path) + compiler.compile(model=qmodel, config=config, output_dir=output_dir) for artifact in [f"{model_name}.axm", "compiler_config_final.toml"]: artifact_path = Path(artifact) if artifact_path.exists(): - artifact_path.replace(export_path / artifact_path.name) + artifact_path.replace(output_dir / artifact_path.name) # Remove intermediate compiler artifacts, keeping only the compiled model and config. keep_suffixes = {".axm"} keep_names = {"compiler_config_final.toml", "metadata.yaml"} - for f in export_path.iterdir(): + for f in output_dir.iterdir(): if f.is_file() and f.suffix not in keep_suffixes and f.name not in keep_names: f.unlink() if metadata is not None: - YAML.save(export_path / "metadata.yaml", metadata) + YAML.save(output_dir / "metadata.yaml", metadata) # Restore original PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION value if prev_protobuf is None: @@ -94,4 +94,4 @@ def torch2axelera( else: os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = prev_protobuf - return export_path + return str(output_dir) diff --git a/ultralytics/utils/export/coreml.py b/ultralytics/utils/export/coreml.py index 12c19dd129..6c6c72f15f 100644 --- a/ultralytics/utils/export/coreml.py +++ b/ultralytics/utils/export/coreml.py @@ -46,7 +46,7 @@ class IOSDetectModel(nn.Module): def pipeline_coreml( model: Any, - output_shape: tuple, + output_shape: tuple[int, ...], metadata: dict, mlmodel: bool = False, iou: float = 0.45, @@ -59,7 +59,7 @@ def pipeline_coreml( Args: model: CoreML model. - output_shape (tuple): Output shape tuple from the exporter. + output_shape (tuple[int, ...]): Output shape tuple from the exporter. metadata (dict): Model metadata. mlmodel (bool): Whether the model is an MLModel (vs MLProgram). iou (float): IoU threshold for NMS. @@ -168,13 +168,13 @@ def torch2coreml( inputs: list, im: torch.Tensor, classifier_names: list[str] | None, - coreml_file: Path | str | None = None, + output_file: Path | str | None = None, mlmodel: bool = False, half: bool = False, int8: bool = False, metadata: dict | None = None, prefix: str = "", -): +) -> Any: """Export a PyTorch model to CoreML ``.mlpackage`` or ``.mlmodel`` format. Args: @@ -182,7 +182,7 @@ def torch2coreml( inputs (list): CoreML input descriptions for the model. im (torch.Tensor): Example input tensor for tracing. classifier_names (list[str] | None): Class names for classifier config, or None if not a classifier. - coreml_file (Path | str | None): Output file path, or None to skip saving. + output_file (Path | str | None): Output file path, or None to skip saving. mlmodel (bool): Whether to export as ``.mlmodel`` (neural network) instead of ``.mlpackage`` (ML program). half (bool): Whether to quantize to FP16. int8 (bool): Whether to quantize to INT8. @@ -229,14 +229,14 @@ def torch2coreml( ct_model.version = m.pop("version", "") ct_model.user_defined_metadata.update({k: str(v) for k, v in m.items()}) - if coreml_file is not None: + if output_file is not None: try: - ct_model.save(str(coreml_file)) # save *.mlpackage + ct_model.save(str(output_file)) # save *.mlpackage except Exception as e: LOGGER.warning( f"{prefix} CoreML export to *.mlpackage failed ({e}), reverting to *.mlmodel export. " f"Known coremltools Python 3.11 and Windows bugs https://github.com/apple/coremltools/issues/1928." ) - coreml_file = Path(coreml_file).with_suffix(".mlmodel") - ct_model.save(str(coreml_file)) + output_file = Path(output_file).with_suffix(".mlmodel") + ct_model.save(str(output_file)) return ct_model diff --git a/ultralytics/utils/export/engine.py b/ultralytics/utils/export/engine.py index a2ce93e458..ab619761c4 100644 --- a/ultralytics/utils/export/engine.py +++ b/ultralytics/utils/export/engine.py @@ -44,46 +44,54 @@ def best_onnx_opset(onnx: types.ModuleType, cuda: bool = False) -> int: @ThreadingLocked() def torch2onnx( - torch_model: torch.nn.Module, - im: torch.Tensor, - onnx_file: str, + model: torch.nn.Module, + im: torch.Tensor | tuple[torch.Tensor, ...], + output_file: Path | str, opset: int = 14, - input_names: list[str] = ["images"], - output_names: list[str] = ["output0"], - dynamic: bool | dict = False, -) -> None: + input_names: list[str] | None = None, + output_names: list[str] | None = None, + dynamic: dict | None = None, +) -> str: """Export a PyTorch model to ONNX format. Args: - torch_model (torch.nn.Module): The PyTorch model to export. - im (torch.Tensor): Example input tensor for the model. - onnx_file (str): Path to save the exported ONNX file. + model (torch.nn.Module): The PyTorch model to export. + im (torch.Tensor | tuple[torch.Tensor, ...]): Example input tensor(s) for tracing. + output_file (Path | str): Path to save the exported ONNX file. opset (int): ONNX opset version to use for export. - input_names (list[str]): List of input tensor names. - output_names (list[str]): List of output tensor names. - dynamic (bool | dict, optional): Whether to enable dynamic axes. + input_names (list[str] | None): List of input tensor names. Defaults to ``["images"]``. + output_names (list[str] | None): List of output tensor names. Defaults to ``["output0"]``. + dynamic (dict | None): Dictionary specifying dynamic axes for inputs and outputs. + + Returns: + (str): Path to the exported ONNX file. Notes: Setting `do_constant_folding=True` may cause issues with DNN inference for torch>=1.12. """ + if input_names is None: + input_names = ["images"] + if output_names is None: + output_names = ["output0"] kwargs = {"dynamo": False} if TORCH_2_4 else {} torch.onnx.export( - torch_model, + model, im, - onnx_file, + output_file, verbose=False, opset_version=opset, do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False input_names=input_names, output_names=output_names, - dynamic_axes=dynamic or None, + dynamic_axes=dynamic, **kwargs, ) + return str(output_file) def onnx2engine( onnx_file: str, - engine_file: str | None = None, + output_file: Path | str | None = None, workspace: int | None = None, half: bool = False, int8: bool = False, @@ -94,12 +102,12 @@ def onnx2engine( metadata: dict | None = None, verbose: bool = False, prefix: str = "", -) -> None: +) -> str: """Export a YOLO model to TensorRT engine format. Args: onnx_file (str): Path to the ONNX file to be converted. - engine_file (str | None): Path to save the generated TensorRT engine file. + output_file (Path | str | None): Path to save the generated TensorRT engine file. workspace (int | None): Workspace size in GB for TensorRT. half (bool, optional): Enable FP16 precision. int8 (bool, optional): Enable INT8 precision. @@ -111,6 +119,9 @@ def onnx2engine( verbose (bool, optional): Enable verbose logging. prefix (str, optional): Prefix for log messages. + Returns: + (str): Path to the exported engine file. + Raises: ValueError: If DLA is enabled on non-Jetson devices or required precision is not set. RuntimeError: If the ONNX file cannot be parsed. @@ -122,7 +133,7 @@ def onnx2engine( """ import tensorrt as trt - engine_file = engine_file or Path(onnx_file).with_suffix(".engine") + output_file = output_file or Path(onnx_file).with_suffix(".engine") logger = trt.Logger(trt.Logger.INFO) if verbose: @@ -178,7 +189,7 @@ def onnx2engine( if int8 and not is_trt10: # deprecated in TensorRT 10, causes internal errors config.set_calibration_profile(profile) - LOGGER.info(f"{prefix} building {'INT8' if int8 else 'FP' + ('16' if half else '32')} engine as {engine_file}") + LOGGER.info(f"{prefix} building {'INT8' if int8 else 'FP' + ('16' if half else '32')} engine as {output_file}") if int8: config.set_flag(trt.BuilderFlag.INT8) config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED @@ -263,16 +274,17 @@ def onnx2engine( engine = builder.build_serialized_network(network, config) if engine is None: raise RuntimeError("TensorRT engine build failed, check logs for errors") - with open(engine_file, "wb") as t: + with open(output_file, "wb") as t: if metadata is not None: meta = json.dumps(metadata) t.write(len(meta).to_bytes(4, byteorder="little", signed=True)) t.write(meta.encode()) t.write(engine) else: - with builder.build_engine(network, config) as engine, open(engine_file, "wb") as t: + with builder.build_engine(network, config) as engine, open(output_file, "wb") as t: if metadata is not None: meta = json.dumps(metadata) t.write(len(meta).to_bytes(4, byteorder="little", signed=True)) t.write(meta.encode()) t.write(engine.serialize()) + return str(output_file) diff --git a/ultralytics/utils/export/executorch.py b/ultralytics/utils/export/executorch.py index 5606be8acc..15e00805b9 100644 --- a/ultralytics/utils/export/executorch.py +++ b/ultralytics/utils/export/executorch.py @@ -39,8 +39,8 @@ def _executorch_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) - def torch2executorch( model: torch.nn.Module, - file: Path | str, - sample_input: torch.Tensor, + im: torch.Tensor, + output_dir: Path | str, metadata: dict | None = None, prefix: str = "", ) -> str: @@ -48,8 +48,8 @@ def torch2executorch( Args: model (torch.nn.Module): The PyTorch model to export. - file (Path | str): Source model file path used to derive output names. - sample_input (torch.Tensor): Example input tensor for tracing/export. + im (torch.Tensor): Example input tensor for tracing/export. + output_dir (Path | str): Directory to save the exported ExecuTorch model. metadata (dict | None, optional): Optional metadata to save as YAML. prefix (str, optional): Prefix for log messages. @@ -62,13 +62,12 @@ def torch2executorch( LOGGER.info(f"\n{prefix} starting export with ExecuTorch {executorch_version.__version__}...") - file = Path(file) - output_dir = Path(str(file).replace(file.suffix, "_executorch_model")) + output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - pte_file = output_dir / file.with_suffix(".pte").name + pte_file = output_dir / "model.pte" et_program = to_edge_transform_and_lower( - torch.export.export(model, (sample_input,)), + torch.export.export(model, (im,)), partitioner=[XnnpackPartitioner()], ).to_executorch() pte_file.write_bytes(et_program.buffer) diff --git a/ultralytics/utils/export/imx.py b/ultralytics/utils/export/imx.py index ba8e462d71..21689891fa 100644 --- a/ultralytics/utils/export/imx.py +++ b/ultralytics/utils/export/imx.py @@ -203,7 +203,7 @@ class NMSWrapper(torch.nn.Module): def torch2imx( model: torch.nn.Module, - file: Path | str, + output_dir: Path | str, conf: float, iou: float, max_det: int, @@ -211,7 +211,7 @@ def torch2imx( gptq: bool = False, dataset=None, prefix: str = "", -): +) -> str: """Export YOLO model to IMX format for deployment on Sony IMX500 devices. This function quantizes a YOLO model using Model Compression Toolkit (MCT) and exports it to IMX format compatible @@ -220,7 +220,7 @@ def torch2imx( Args: model (torch.nn.Module): The YOLO model to export. Must be YOLOv8n or YOLO11n. - file (Path | str): Output file path for the exported model. + output_dir (Path | str): Directory to save the exported IMX model. conf (float): Confidence threshold for NMS post-processing. iou (float): IoU threshold for NMS post-processing. max_det (int): Maximum number of detections to return. @@ -231,7 +231,7 @@ def torch2imx( prefix (str, optional): Logging prefix string. Defaults to "". Returns: - (Path): Path to the exported IMX model directory. + (str): Path to the exported IMX model directory. Raises: ValueError: If the model is not a supported YOLOv8n or YOLO11n variant. @@ -239,7 +239,7 @@ def torch2imx( Examples: >>> from ultralytics import YOLO >>> model = YOLO("yolo11n.pt") - >>> path = torch2imx(model, "model.imx", conf=0.25, iou=0.7, max_det=300) + >>> path = torch2imx(model, "output_dir/", conf=0.25, iou=0.7, max_det=300) Notes: - Requires model_compression_toolkit, onnx, edgemdt_tpc, and edge-mdt-cl packages @@ -309,9 +309,9 @@ def torch2imx( task=model.task, ) - f = Path(str(file).replace(file.suffix, "_imx_model")) - f.mkdir(exist_ok=True) - onnx_model = f / Path(str(file.name).replace(file.suffix, "_imx.onnx")) # js dir + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + onnx_model = output_dir / "model_imx.onnx" with onnx_export_patch(): mct.exporter.pytorch_export_model( @@ -319,7 +319,7 @@ def torch2imx( ) model_onnx = onnx.load(onnx_model) # load onnx model - for k, v in metadata.items(): + for k, v in (metadata or {}).items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) @@ -334,12 +334,12 @@ def torch2imx( raise FileNotFoundError("imxconv-pt not found. Install with: pip install imx500-converter[pt]") subprocess.run( - [str(imxconv), "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"], + [str(imxconv), "-i", str(onnx_model), "-o", str(output_dir), "--no-input-persistency", "--overwrite-output"], check=True, ) # Needed for imx models. - with open(f / "labels.txt", "w", encoding="utf-8") as file: - file.writelines([f"{name}\n" for _, name in model.names.items()]) + with open(output_dir / "labels.txt", "w", encoding="utf-8") as labels_file: + labels_file.writelines([f"{name}\n" for _, name in model.names.items()]) - return f + return str(output_dir) diff --git a/ultralytics/utils/export/mnn.py b/ultralytics/utils/export/mnn.py index 3cebf890ec..0142f83385 100644 --- a/ultralytics/utils/export/mnn.py +++ b/ultralytics/utils/export/mnn.py @@ -9,8 +9,8 @@ from ultralytics.utils import LOGGER def onnx2mnn( - f_onnx: str, - file: Path | str, + onnx_file: str, + output_file: Path | str, half: bool = False, int8: bool = False, metadata: dict | None = None, @@ -19,8 +19,8 @@ def onnx2mnn( """Convert an ONNX model to MNN format. Args: - f_onnx (str): Path to the source ONNX file. - file (Path | str): Source model path used to derive the output ``.mnn`` path. + onnx_file (str): Path to the source ONNX file. + output_file (Path | str): Path to save the exported MNN model. half (bool): Whether to enable FP16 conversion. int8 (bool): Whether to enable INT8 weight quantization. metadata (dict | None): Optional metadata embedded via ``--bizCode``. @@ -33,23 +33,31 @@ def onnx2mnn( from ultralytics.utils.torch_utils import TORCH_1_10 assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults" - assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}" + assert Path(onnx_file).exists(), f"failed to export ONNX file: {onnx_file}" check_requirements("MNN>=2.9.6") import MNN from MNN.tools import mnnconvert LOGGER.info(f"\n{prefix} starting export with MNN {MNN.version()}...") - file = Path(file) - f = str(file.with_suffix(".mnn")) # MNN model file - mnn_args = ["", "-f", "ONNX", "--modelFile", f_onnx, "--MNNModel", f, "--bizCode", json.dumps(metadata or {})] + mnn_args = [ + "", + "-f", + "ONNX", + "--modelFile", + onnx_file, + "--MNNModel", + str(output_file), + "--bizCode", + json.dumps(metadata or {}), + ] if int8: mnn_args.extend(("--weightQuantBits", "8")) if half: mnn_args.append("--fp16") mnnconvert.convert(mnn_args) # Remove scratch file created during model convert optimize - convert_scratch = file.parent / ".__convert_external_data.bin" + convert_scratch = Path(output_file).parent / ".__convert_external_data.bin" if convert_scratch.exists(): convert_scratch.unlink() - return f + return str(output_file) diff --git a/ultralytics/utils/export/ncnn.py b/ultralytics/utils/export/ncnn.py index 28d33517d1..177e267ee7 100644 --- a/ultralytics/utils/export/ncnn.py +++ b/ultralytics/utils/export/ncnn.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path import torch @@ -13,7 +12,7 @@ from ultralytics.utils import LOGGER, YAML def torch2ncnn( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_dir: Path | str, half: bool = False, metadata: dict | None = None, device: torch.device | None = None, @@ -24,7 +23,7 @@ def torch2ncnn( Args: model (torch.nn.Module): The PyTorch model to export. im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model path used to derive the output directory. + output_dir (Path | str): Directory to save the exported NCNN model. half (bool): Whether to enable FP16 export. metadata (dict | None): Optional metadata saved as ``metadata.yaml``. device (torch.device | None): Device the model lives on. @@ -41,23 +40,22 @@ def torch2ncnn( import pnnx LOGGER.info(f"\n{prefix} starting export with NCNN {ncnn.__version__} and PNNX {pnnx.__version__}...") - file = Path(file) - f = Path(str(file).replace(file.suffix, f"_ncnn_model{os.sep}")) + output_dir = Path(output_dir) ncnn_args = dict( - ncnnparam=(f / "model.ncnn.param").as_posix(), - ncnnbin=(f / "model.ncnn.bin").as_posix(), - ncnnpy=(f / "model_ncnn.py").as_posix(), + ncnnparam=(output_dir / "model.ncnn.param").as_posix(), + ncnnbin=(output_dir / "model.ncnn.bin").as_posix(), + ncnnpy=(output_dir / "model_ncnn.py").as_posix(), ) pnnx_args = dict( - ptpath=(f / "model.pt").as_posix(), - pnnxparam=(f / "model.pnnx.param").as_posix(), - pnnxbin=(f / "model.pnnx.bin").as_posix(), - pnnxpy=(f / "model_pnnx.py").as_posix(), - pnnxonnx=(f / "model.pnnx.onnx").as_posix(), + ptpath=(output_dir / "model.pt").as_posix(), + pnnxparam=(output_dir / "model.pnnx.param").as_posix(), + pnnxbin=(output_dir / "model.pnnx.bin").as_posix(), + pnnxpy=(output_dir / "model_pnnx.py").as_posix(), + pnnxonnx=(output_dir / "model.pnnx.onnx").as_posix(), ) - f.mkdir(exist_ok=True) # make ncnn_model directory + output_dir.mkdir(parents=True, exist_ok=True) # make ncnn_model directory device_type = device.type if device is not None else "cpu" pnnx.export(model, inputs=im, **ncnn_args, **pnnx_args, fp16=half, device=device_type) @@ -65,5 +63,5 @@ def torch2ncnn( Path(f_debug).unlink(missing_ok=True) if metadata: - YAML.save(f / "metadata.yaml", metadata) # add metadata.yaml - return str(f) + YAML.save(output_dir / "metadata.yaml", metadata) # add metadata.yaml + return str(output_dir) diff --git a/ultralytics/utils/export/openvino.py b/ultralytics/utils/export/openvino.py index c804a982d5..d15f07dbda 100644 --- a/ultralytics/utils/export/openvino.py +++ b/ultralytics/utils/export/openvino.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path from typing import Any @@ -13,25 +12,25 @@ from ultralytics.utils import LOGGER def torch2openvino( model: torch.nn.Module, - im: torch.Tensor, - file: Path | str | None = None, + im: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor, ...], + output_dir: Path | str | None = None, dynamic: bool = False, half: bool = False, int8: bool = False, calibration_dataset: Any | None = None, ignored_scope: dict | None = None, prefix: str = "", -) -> str: +) -> Any: """Export a PyTorch model to OpenVINO format with optional INT8 quantization. Args: model (torch.nn.Module): The model to export (may be NMS-wrapped). - im (torch.Tensor): Example input tensor. - file (Path | str | None): Source model path used to derive output directory. + im (torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor, ...]): Example input tensor(s) for tracing. + output_dir (Path | str | None): Directory to save the exported OpenVINO model. dynamic (bool): Whether to use dynamic input shapes. half (bool): Whether to compress to FP16. int8 (bool): Whether to apply INT8 quantization. - calibration_dataset (nn.Dataset): Dataset for nncf.Dataset (required when ``int8=True``). + calibration_dataset (nncf.Dataset | None): Dataset for INT8 calibration (required when ``int8=True``). ignored_scope (dict | None): Kwargs passed to ``nncf.IgnoredScope`` for head patterns. prefix (str): Prefix for log messages. @@ -42,7 +41,8 @@ def torch2openvino( LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...") - ov_model = ov.convert_model(model, input=None if dynamic else [im.shape], example_input=im) + input_shape = [i.shape for i in im] if isinstance(im, (list, tuple)) else im.shape + ov_model = ov.convert_model(model, input=None if dynamic else input_shape, example_input=im) if int8: import nncf @@ -53,10 +53,9 @@ def torch2openvino( ignored_scope=ignored_scope, ) - if file is not None: - file = Path(file) - suffix = f"_{'int8_' if int8 else ''}openvino_model{os.sep}" - f = str(file).replace(file.suffix, suffix) - f_ov = str(Path(f) / file.with_suffix(".xml").name) - ov.save_model(ov_model, f_ov, compress_to_fp16=half) + if output_dir is not None: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + output_file = output_dir / "model.xml" + ov.save_model(ov_model, output_file, compress_to_fp16=half) return ov_model diff --git a/ultralytics/utils/export/paddle.py b/ultralytics/utils/export/paddle.py index 841a01a919..7d3f467019 100644 --- a/ultralytics/utils/export/paddle.py +++ b/ultralytics/utils/export/paddle.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from pathlib import Path import torch @@ -13,7 +12,7 @@ from ultralytics.utils import ARM64, IS_JETSON, LOGGER, YAML def torch2paddle( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_dir: Path | str, metadata: dict | None = None, prefix: str = "", ) -> str: @@ -22,7 +21,7 @@ def torch2paddle( Args: model (torch.nn.Module): The PyTorch model to export. im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model path used to derive the output directory. + output_dir (Path | str): Directory to save the exported PaddlePaddle model. metadata (dict | None): Optional metadata saved as ``metadata.yaml``. prefix (str): Prefix for log messages. @@ -47,10 +46,8 @@ def torch2paddle( from x2paddle.convert import pytorch2paddle LOGGER.info(f"\n{prefix} starting export with X2Paddle {x2paddle.__version__}...") - file = Path(file) - f = str(file).replace(file.suffix, f"_paddle_model{os.sep}") - pytorch2paddle(module=model, save_dir=f, jit_type="trace", input_examples=[im]) # export + pytorch2paddle(module=model, save_dir=output_dir, jit_type="trace", input_examples=[im]) # export if metadata: - YAML.save(Path(f) / "metadata.yaml", metadata) # add metadata.yaml - return f + YAML.save(Path(output_dir) / "metadata.yaml", metadata) # add metadata.yaml + return str(output_dir) diff --git a/ultralytics/utils/export/rknn.py b/ultralytics/utils/export/rknn.py index 2f51a66bce..1c8da6ba9d 100644 --- a/ultralytics/utils/export/rknn.py +++ b/ultralytics/utils/export/rknn.py @@ -8,26 +8,28 @@ from ultralytics.utils import IS_COLAB, LOGGER, YAML def onnx2rknn( - f_onnx: str, + onnx_file: str, + output_dir: Path | str, name: str = "rk3588", metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Export an ONNX model to RKNN format for Rockchip NPUs. Args: - f_onnx (str): Path to the source ONNX file (already exported, opset <=19). + onnx_file (str): Path to the source ONNX file (already exported, opset <=19). + output_dir (Path | str): Directory to save the exported RKNN model. name (str): Target platform name (e.g. ``"rk3588"``). metadata (dict | None): Metadata saved as ``metadata.yaml``. prefix (str): Prefix for log messages. Returns: - (Path): Path to the exported ``_rknn_model`` directory. + (str): Path to the exported ``_rknn_model`` directory. """ from ultralytics.utils.checks import check_requirements LOGGER.info(f"\n{prefix} starting export with rknn-toolkit2...") - check_requirements("rknn-toolkit2") + check_requirements("rknn-toolkit2>=2.3.2") check_requirements("onnx<1.19.0") # fix AttributeError: module 'onnx' has no attribute 'mapping' if IS_COLAB: @@ -38,14 +40,14 @@ def onnx2rknn( from rknn.api import RKNN - export_path = Path(f"{Path(f_onnx).stem}_rknn_model") - export_path.mkdir(exist_ok=True) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) rknn = RKNN(verbose=False) rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform=name) - rknn.load_onnx(model=f_onnx) + rknn.load_onnx(model=onnx_file) rknn.build(do_quantization=False) # TODO: Add quantization support - rknn.export_rknn(str(export_path / f"{Path(f_onnx).stem}-{name}.rknn")) + rknn.export_rknn(str(output_dir / f"{Path(onnx_file).stem}-{name}.rknn")) if metadata: - YAML.save(export_path / "metadata.yaml", metadata) - return export_path + YAML.save(output_dir / "metadata.yaml", metadata) + return str(output_dir) diff --git a/ultralytics/utils/export/tensorflow.py b/ultralytics/utils/export/tensorflow.py index c70a709875..322edb76c9 100644 --- a/ultralytics/utils/export/tensorflow.py +++ b/ultralytics/utils/export/tensorflow.py @@ -59,19 +59,19 @@ def _tf_kpts_decode(self, kpts: torch.Tensor, is_pose26: bool = False) -> torch. def onnx2saved_model( onnx_file: str, - output_dir: Path, + output_dir: Path | str, int8: bool = False, - images: np.ndarray = None, + images: np.ndarray | None = None, disable_group_convolution: bool = False, - prefix="", + prefix: str = "", ): """Convert an ONNX model to TensorFlow SavedModel format using onnx2tf. Args: onnx_file (str): ONNX file path. - output_dir (Path): Output directory path for the SavedModel. + output_dir (Path | str): Output directory path for the SavedModel. int8 (bool, optional): Enable INT8 quantization. Defaults to False. - images (np.ndarray, optional): Calibration images for INT8 quantization in BHWC format. + images (np.ndarray | None, optional): Calibration images for INT8 quantization in BHWC format. disable_group_convolution (bool, optional): Disable group convolution optimization. Defaults to False. prefix (str, optional): Logging prefix. Defaults to "". @@ -82,6 +82,7 @@ def onnx2saved_model( - Requires onnx2tf package. Downloads calibration data if INT8 quantization is enabled. - Removes temporary files and renames quantized models after conversion. """ + output_dir = Path(output_dir) # Pre-download calibration file to fix https://github.com/PINTO0309/onnx2tf/issues/545 onnx2tf_file = Path("calibration_image_sample_data_20x128x128x3_float32.npy") if not onnx2tf_file.exists(): @@ -118,7 +119,7 @@ def onnx2saved_model( verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873 output_integer_quantized_tflite=int8, custom_input_op_name_np_data_path=np_data, - enable_batchmatmul_unfold=True and not int8, # fix lower no. of detected objects on GPU delegate + enable_batchmatmul_unfold=not int8, # fix lower no. of detected objects on GPU delegate output_signaturedefs=True, # fix error with Attention block group convolution disable_group_convolution=disable_group_convolution, # fix error with group convolution ) @@ -133,14 +134,17 @@ def onnx2saved_model( return keras_model -def keras2pb(keras_model, file: Path, prefix=""): +def keras2pb(keras_model, output_file: Path | str, prefix: str = "") -> str: """Convert a Keras model to TensorFlow GraphDef (.pb) format. Args: keras_model (keras.Model): Keras model to convert to frozen graph format. - file (Path): Output file path (suffix will be changed to .pb). + output_file (Path | str): Output file path (suffix will be changed to .pb). prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported ``.pb`` file. + Notes: Creates a frozen graph by converting variables to constants for inference optimization. """ @@ -152,10 +156,14 @@ def keras2pb(keras_model, file: Path, prefix=""): m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) frozen_func = convert_variables_to_constants_v2(m) frozen_func.graph.as_graph_def() - tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(file.parent), name=file.name, as_text=False) + output_file = Path(output_file) + tf.io.write_graph( + graph_or_graph_def=frozen_func.graph, logdir=str(output_file.parent), name=output_file.name, as_text=False + ) + return str(output_file) -def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str = ""): +def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str = "") -> str: """Convert a TensorFlow Lite model to Edge TPU format using the Edge TPU compiler. Args: @@ -163,6 +171,9 @@ def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str output_dir (str | Path): Output directory path for the compiled Edge TPU model. prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported Edge TPU model file. + Notes: Requires the Edge TPU compiler to be installed. The function compiles the TFLite model for optimal performance on Google's Edge TPU hardware accelerator. @@ -180,9 +191,10 @@ def tflite2edgetpu(tflite_file: str | Path, output_dir: str | Path, prefix: str ) LOGGER.info(f"{prefix} running '{cmd}'") subprocess.run(cmd, shell=True) + return str(Path(output_dir) / f"{Path(tflite_file).stem}_edgetpu.tflite") -def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = False, prefix: str = ""): +def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = False, prefix: str = "") -> str: """Convert a TensorFlow GraphDef (.pb) model to TensorFlow.js format. Args: @@ -192,6 +204,9 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals int8 (bool, optional): Enable INT8 quantization. Defaults to False. prefix (str, optional): Logging prefix. Defaults to "". + Returns: + (str): Path to the exported TensorFlow.js model directory. + Notes: Requires tensorflowjs package. Uses tensorflowjs_converter command-line tool for conversion. Handles spaces in file paths and warns if output directory contains spaces. @@ -204,8 +219,8 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals LOGGER.info(f"\n{prefix} starting export with tensorflowjs {tfjs.__version__}...") gd = tf.Graph().as_graph_def() # TF GraphDef - with open(pb_file, "rb") as file: - gd.ParseFromString(file.read()) + with open(pb_file, "rb") as f: + gd.ParseFromString(f.read()) outputs = ",".join(gd_outputs(gd)) LOGGER.info(f"\n{prefix} output node names: {outputs}") @@ -220,6 +235,7 @@ def pb2tfjs(pb_file: str, output_dir: str, half: bool = False, int8: bool = Fals if " " in output_dir: LOGGER.warning(f"{prefix} your model may not work correctly with spaces in path '{output_dir}'.") + return str(output_dir) def gd_outputs(gd): diff --git a/ultralytics/utils/export/torchscript.py b/ultralytics/utils/export/torchscript.py index 6805eac217..077b69f075 100644 --- a/ultralytics/utils/export/torchscript.py +++ b/ultralytics/utils/export/torchscript.py @@ -13,35 +13,34 @@ from ultralytics.utils import LOGGER, TORCH_VERSION def torch2torchscript( model: torch.nn.Module, im: torch.Tensor, - file: Path | str, + output_file: Path | str, optimize: bool = False, metadata: dict | None = None, prefix: str = "", -) -> Path: +) -> str: """Export a PyTorch model to TorchScript format. Args: model (torch.nn.Module): The PyTorch model to export (may be NMS-wrapped). im (torch.Tensor): Example input tensor for tracing. - file (Path | str): Source model file path used to derive output path. + output_file (Path | str): Path to save the exported TorchScript model. optimize (bool): Whether to optimize for mobile deployment. metadata (dict | None): Optional metadata to embed in the TorchScript archive. prefix (str): Prefix for log messages. Returns: - (Path): Path to the exported ``.torchscript`` file. + (str): Path to the exported ``.torchscript`` file. """ LOGGER.info(f"\n{prefix} starting export with torch {TORCH_VERSION}...") - file = Path(file) - f = file.with_suffix(".torchscript") + output_file = str(output_file) ts = torch.jit.trace(model, im, strict=False) extra_files = {"config.txt": json.dumps(metadata or {})} # torch._C.ExtraFilesMap() if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html LOGGER.info(f"{prefix} optimizing for mobile...") from torch.utils.mobile_optimizer import optimize_for_mobile - optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) + optimize_for_mobile(ts)._save_for_lite_interpreter(output_file, _extra_files=extra_files) else: - ts.save(str(f), _extra_files=extra_files) - return f + ts.save(output_file, _extra_files=extra_files) + return output_file diff --git a/ultralytics/utils/logger.py b/ultralytics/utils/logger.py index 9df889149d..7c28575af5 100644 --- a/ultralytics/utils/logger.py +++ b/ultralytics/utils/logger.py @@ -270,6 +270,10 @@ class ConsoleLogger: """Flush the wrapped stream to propagate buffered output promptly during console capture.""" self.original.flush() + def isatty(self): + """Delegate isatty check to the original stream.""" + return self.original.isatty() + class _LogHandler(logging.Handler): """Lightweight logging handler.""" diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index 68402ec3fb..0b14597e75 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -349,6 +349,11 @@ class v8DetectionLoss: self.use_dfl = m.reg_max > 1 + # Class weights for handling imbalanced datasets + self.class_weights = getattr(model, "class_weights", None) + if self.class_weights is not None: + self.class_weights = self.class_weights.to(device).view(1, 1, -1) + self.assigner = TaskAlignedAssigner( topk=tal_topk, num_classes=self.nc, @@ -422,8 +427,11 @@ class v8DetectionLoss: target_scores_sum = max(target_scores.sum(), 1) - # Cls loss - loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + # Cls loss with optional class weighting + bce_loss = self.bce(pred_scores, target_scores.to(dtype)) # (bs, num_anchors, nc) + if self.class_weights is not None: + bce_loss *= self.class_weights + loss[1] = bce_loss.sum() / target_scores_sum # BCE # Bbox loss if fg_mask.sum(): diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index cbdd334702..ea08f3d1f1 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -728,8 +728,8 @@ def compute_ap(recall: list[float], precision: list[float]) -> tuple[float, np.n mrec (np.ndarray): Modified recall curve with sentinel values added at the beginning and end. """ # Append sentinel values to beginning and end - mrec = np.concatenate(([0.0], recall, [1.0])) - mpre = np.concatenate(([1.0], precision, [0.0])) + mrec = np.concatenate(([0.0], recall, [recall[-1] if len(recall) else 1.0], [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0], [0.0])) # Compute the precision envelope mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index bbd30399f8..aeb2806126 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -70,7 +70,7 @@ class Profile(contextlib.ContextDecorator): return time.perf_counter() -def segment2box(segment, width: int = 640, height: int = 640): +def segment2box(segment: np.ndarray, width: int = 640, height: int = 640) -> np.ndarray: """Convert segment coordinates to bounding box coordinates. Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies @@ -99,22 +99,29 @@ def segment2box(segment, width: int = 640, height: int = 640): ) # xyxy -def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False): +def scale_boxes( + img1_shape: tuple[int, int], + boxes: torch.Tensor | np.ndarray, + img0_shape: tuple[int, int], + ratio_pad: tuple | None = None, + padding: bool = True, + xywh: bool = False, +) -> torch.Tensor | np.ndarray: """Rescale bounding boxes from one image shape to another. Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports both xyxy and xywh box formats. Args: - img1_shape (tuple): Shape of the source image (height, width). - boxes (torch.Tensor): Bounding boxes to rescale in format (N, 4). - img0_shape (tuple): Shape of the target image (height, width). + img1_shape (tuple[int, int]): Shape of the source image (height, width). + boxes (torch.Tensor | np.ndarray): Bounding boxes to rescale in format (N, 4). + img0_shape (tuple[int, int]): Shape of the target image (height, width). ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes. padding (bool): Whether boxes are based on YOLO-style augmented images with padding. xywh (bool): Whether box format is xywh (True) or xyxy (False). Returns: - (torch.Tensor): Rescaled bounding boxes in the same format as input. + (torch.Tensor | np.ndarray): Rescaled bounding boxes in the same format as input. """ if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new @@ -464,7 +471,7 @@ def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.Tensor: boxes = boxes.to(masks.device) n, h, w = masks.shape if n < 50 and not masks.is_cuda: # faster for fewer masks (predict) - for i, (x1, y1, x2, y2) in enumerate(boxes.round().int()): + for i, (x1, y1, x2, y2) in enumerate(boxes.clamp(min=0).round().int()): masks[i, :y1] = 0 masks[i, y2:] = 0 masks[i, :, :x1] = 0 diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py index c0a07a7514..c071ff7440 100644 --- a/ultralytics/utils/plotting.py +++ b/ultralytics/utils/plotting.py @@ -940,20 +940,19 @@ def plt_color_scatter(v, f, bins: int = 20, cmap: str = "viridis", alpha: float @plt_settings() -def plot_tune_results(csv_file: str = "tune_results.csv", exclude_zero_fitness_points: bool = True): - """Plot the evolution results stored in a 'tune_results.csv' file. The function generates a scatter plot for each - key in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on - the plots. +def plot_tune_results(results_file: str = "tune_results.ndjson", exclude_zero_fitness_points: bool = True): + """Plot the evolution results stored in a tuning NDJSON file. Args: - csv_file (str, optional): Path to the CSV file containing the tuning results. + results_file (str, optional): Path to the NDJSON file containing the tuning results. exclude_zero_fitness_points (bool, optional): Don't include points with zero fitness in tuning plots. Examples: - >>> plot_tune_results("path/to/tune_results.csv") + >>> plot_tune_results("path/to/tune_results.ndjson") """ + import json + import matplotlib.pyplot as plt # scope for faster 'import ultralytics' - import polars as pl from scipy.ndimage import gaussian_filter1d def _save_one_file(file): @@ -962,19 +961,27 @@ def plot_tune_results(csv_file: str = "tune_results.csv", exclude_zero_fitness_p plt.close() LOGGER.info(f"Saved {file}") - # Scatter plots for each hyperparameter - csv_file = Path(csv_file) - data = pl.read_csv(csv_file, infer_schema_length=None) - num_metrics_columns = 1 - keys = [x.strip() for x in data.columns][num_metrics_columns:] - x = data.to_numpy() - fitness = x[:, 0] # fitness + results_file = Path(results_file) + with open(results_file, encoding="utf-8") as f: + records = [json.loads(line) for line in f if line.strip()] + if not records: + return + + keys = list(records[0].get("hyperparameters", {})) + x = np.array( + [[r.get("fitness", 0.0)] + [r.get("hyperparameters", {}).get(k, np.nan) for k in keys] for r in records], + dtype=float, + ) + len(x) + all_fitness = x[:, 0] # fitness + zero_mask = slice(None) if exclude_zero_fitness_points: - mask = fitness > 0 # exclude zero-fitness points - x, fitness = x[mask], fitness[mask] - if len(fitness) == 0: + zero_mask = all_fitness > 0 # exclude zero-fitness points + x, all_fitness = x[zero_mask], all_fitness[zero_mask] + if len(all_fitness) == 0: LOGGER.warning("No valid fitness values to plot (all iterations may have failed)") return + fitness = all_fitness.copy() # Iterative sigma rejection on lower bound only for _ in range(3): # max 3 iterations mean, std = fitness.mean(), fitness.std() @@ -987,7 +994,7 @@ def plot_tune_results(csv_file: str = "tune_results.csv", exclude_zero_fitness_p n = math.ceil(len(keys) ** 0.5) # columns and rows in plot plt.figure(figsize=(10, 10), tight_layout=True) for i, k in enumerate(keys): - v = x[:, i + num_metrics_columns] + v = x[:, i + 1] mu = v[j] # best single result plt.subplot(n, n, i + 1) plt_color_scatter(v, fitness, cmap="viridis", alpha=0.8, edgecolors="none") @@ -996,19 +1003,23 @@ def plot_tune_results(csv_file: str = "tune_results.csv", exclude_zero_fitness_p plt.tick_params(axis="both", labelsize=8) # Set axis label size to 8 if i % n != 0: plt.yticks([]) - _save_one_file(csv_file.with_name("tune_scatter_plots.png")) + _save_one_file(results_file.with_name("tune_scatter_plots.png")) # Fitness vs iteration - x = range(1, len(fitness) + 1) + x = range(1, len(all_fitness) + 1) plt.figure(figsize=(10, 6), tight_layout=True) - plt.plot(x, fitness, marker="o", linestyle="none", label="fitness") - plt.plot(x, gaussian_filter1d(fitness, sigma=3), ":", label="smoothed", linewidth=2) # smoothing line + for dataset in sorted({k for r in records for k in r.get("datasets", {})}): + y = np.array([r.get("datasets", {}).get(dataset, {}).get("fitness", np.nan) for r in records], dtype=float) + if exclude_zero_fitness_points and not isinstance(zero_mask, slice): + y = y[zero_mask] + plt.plot(x, y, "o", markersize=5, alpha=0.8, label=dataset) + plt.plot(x, gaussian_filter1d(all_fitness, sigma=3), ":", color="0.35", label="smoothed mean", linewidth=2) plt.title("Fitness vs Iteration") plt.xlabel("Iteration") plt.ylabel("Fitness") plt.grid(True) plt.legend() - _save_one_file(csv_file.with_name("tune_fitness.png")) + _save_one_file(results_file.with_name("tune_fitness.png")) @plt_settings() diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py index fc4cdd35a4..1daa1a68b1 100644 --- a/ultralytics/utils/tuner.py +++ b/ultralytics/utils/tuner.py @@ -392,6 +392,7 @@ def run_ray_tune( "warmup_momentum": tune.uniform(0.0, 0.95), # warmup initial momentum "box": tune.uniform(1.0, 20.0), # box loss gain "cls": tune.uniform(0.1, 4.0), # cls loss gain (scale with pixels) + "cls_pw": tune.uniform(0.0, 1.0), # cls power weight (scale with pixels) "dfl": tune.uniform(0.4, 12.0), # dfl loss gain "hsv_h": tune.uniform(0.0, 0.1), # image HSV-Hue augmentation (fraction) "hsv_s": tune.uniform(0.0, 0.9), # image HSV-Saturation augmentation (fraction)