diff --git a/docs/en/guides/custom-trainer.md b/docs/en/guides/custom-trainer.md
index d4d5b5be9f..2d41034f4f 100644
--- a/docs/en/guides/custom-trainer.md
+++ b/docs/en/guides/custom-trainer.md
@@ -93,6 +93,7 @@ This logs the mean F1 score across all classes and a per-class breakdown after e
     | Attribute | Description |
     |---|---|
     | `f1` | F1 score per class |
+    | `image_metrics` | Per-image metrics dictionary with precision, recall, F1, TP, FP, and FN |
     | `p` | Precision per class |
     | `r` | Recall per class |
     | `ap50` | AP at IoU 0.5 per class |
diff --git a/docs/en/guides/model-evaluation-insights.md b/docs/en/guides/model-evaluation-insights.md
index c4bb46fcfd..2a54d435a5 100644
--- a/docs/en/guides/model-evaluation-insights.md
+++ b/docs/en/guides/model-evaluation-insights.md
@@ -104,6 +104,7 @@ If you want to get a deeper understanding of your YOLO26 model's performance, yo
         print("Mean results for different metrics:", results.box.mean_results)
         print("Mean precision:", results.box.mp)
         print("Mean recall:", results.box.mr)
+        print("Per-image metrics:", results.box.image_metrics)
         print("Precision:", results.box.p)
         print("Precision curve:", results.box.p_curve)
         print("Precision values:", results.box.prec_values)
@@ -112,7 +113,10 @@ If you want to get a deeper understanding of your YOLO26 model's performance, yo
         print("Recall curve:", results.box.r_curve)
         ```
 
-The results object also includes speed metrics like preprocess time, inference time, loss, and postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLO26 model for better performance, making it more effective for your specific use case.
+The results object also includes `image_metrics`, a per-image dictionary keyed by image filename with `precision`,
+`recall`, `f1`, `tp`, `fp`, and `fn`, as well as speed metrics like preprocess time, inference time, loss, and
+postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLO26 model for better performance,
+making it more effective for your specific use case.
 
 ## How Does Fine-Tuning Work?
 
diff --git a/docs/en/modes/val.md b/docs/en/modes/val.md
index 000e4cf640..fefff908f7 100644
--- a/docs/en/modes/val.md
+++ b/docs/en/modes/val.md
@@ -70,6 +70,7 @@ Validate a trained YOLO26n model [accuracy](https://www.ultralytics.com/glossary
         metrics.box.map50  # map50
         metrics.box.map75  # map75
         metrics.box.maps  # a list containing mAP50-95 for each category
+        metrics.box.image_metrics  # per-image metrics dictionary with precision, recall, F1, TP, FP, and FN
         ```
 
     === "CLI"
@@ -137,6 +138,42 @@ The below examples showcase YOLO model validation with custom arguments in Pytho
     print(results.confusion_matrix.to_df())
     ```
 
+!!! tip "Per-Image Precision, Recall, and F1"
+
+    Validation stores per-image precision, recall, F1, TP, FP, and FN metrics (at IoU threshold 0.5) for all tasks
+    except classification. Access them through `results.box.image_metrics` for detection and OBB, `results.seg.image_metrics`
+    for segmentation, and `results.pose.image_metrics` for pose after validation completes.
+
+    ```python
+    from ultralytics import YOLO
+
+    # Load a model
+    model = YOLO("yolo26n.pt")
+
+    # Validate and access per-image metrics
+    results = model.val(data="coco8.yaml")
+
+    # image_metrics is a dictionary with image filenames as keys
+    print(results.box.image_metrics)
+    # Output: {'image1.jpg': {'precision': 0.85, 'recall': 0.92, 'f1': 0.88, 'tp': 17, 'fp': 3, 'fn': 1}, ...}
+
+    # Access metrics for a specific image
+    results.box.image_metrics["image1.jpg"]  # {'precision': 0.85, 'recall': 0.92, 'f1': 0.88, 'tp': 17, 'fp': 3, 'fn': 1}
+    ```
+
+    Each entry in `image_metrics` contains the following keys:
+
+    | Key         | Description                                       |
+    |-------------|---------------------------------------------------|
+    | `precision` | Precision score for the image (`tp / (tp + fp)`). |
+    | `recall`    | Recall score for the image (`tp / (tp + fn)`).    |
+    | `f1`        | Harmonic mean of precision and recall.            |
+    | `tp`        | Number of true positives for the image.           |
+    | `fp`        | Number of false positives for the image.          |
+    | `fn`        | Number of false negatives for the image.          |
+
+    This feature is available for detection, segmentation, pose, and OBB tasks.
+
 | Method      | Return Type            | Description                                                                |
 | ----------- | ---------------------- | -------------------------------------------------------------------------- |
 | `summary()` | `List[Dict[str, Any]]` | Converts validation results to a summarized dictionary.                    |
@@ -187,6 +224,7 @@ print(metrics.box.map)  # mAP50-95
 print(metrics.box.map50)  # mAP50
 print(metrics.box.map75)  # mAP75
 print(metrics.box.maps)  # list of mAP50-95 for each category
+print(metrics.box.image_metrics)  # per-image metrics dictionary with precision, recall, F1, TP, FP, and FN
 ```
 
 For a complete performance evaluation, it's crucial to review all these metrics. For more details, refer to the [Key Features of Val Mode](#key-features-of-val-mode).
diff --git a/docs/en/tasks/detect.md b/docs/en/tasks/detect.md
index 17ebd09925..9e80fcac7e 100644
--- a/docs/en/tasks/detect.md
+++ b/docs/en/tasks/detect.md
@@ -97,6 +97,7 @@ Validate trained YOLO26n model [accuracy](https://www.ultralytics.com/glossary/a
         metrics.box.map50  # map50
         metrics.box.map75  # map75
         metrics.box.maps  # a list containing mAP50-95 for each category
+        metrics.box.image_metrics  # per-image metrics dictionary with precision, recall, F1, TP, FP, and FN
         ```
 
     === "CLI"
diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md
index aa71d9136f..26c6b4c83c 100644
--- a/docs/en/tasks/obb.md
+++ b/docs/en/tasks/obb.md
@@ -127,6 +127,7 @@ Validate trained YOLO26n-obb model [accuracy](https://www.ultralytics.com/glossa
         metrics.box.map50  # map50(B)
         metrics.box.map75  # map75(B)
         metrics.box.maps  # a list containing mAP50-95(B) for each category
+        metrics.box.image_metrics  # per-image metrics dictionary with precision, recall, F1, TP, FP, and FN
         ```
 
     === "CLI"
diff --git a/docs/en/tasks/pose.md b/docs/en/tasks/pose.md
index e9746711be..fed3c44c4a 100644
--- a/docs/en/tasks/pose.md
+++ b/docs/en/tasks/pose.md
@@ -120,10 +120,12 @@ Validate trained YOLO26n-pose model [accuracy](https://www.ultralytics.com/gloss
         metrics.box.map50  # map50
         metrics.box.map75  # map75
         metrics.box.maps  # a list containing mAP50-95 for each category
+        metrics.box.image_metrics  # per-image metrics dictionary for box with precision, recall, F1, TP, FP, and FN
         metrics.pose.map  # map50-95(P)
         metrics.pose.map50  # map50(P)
         metrics.pose.map75  # map75(P)
         metrics.pose.maps  # a list containing mAP50-95(P) for each category
+        metrics.pose.image_metrics  # per-image metrics dictionary for pose with precision, recall, F1, TP, FP, and FN
         ```
 
     === "CLI"
diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md
index 795ba4440f..e63970e8f7 100644
--- a/docs/en/tasks/segment.md
+++ b/docs/en/tasks/segment.md
@@ -98,10 +98,12 @@ Validate trained YOLO26n-seg model [accuracy](https://www.ultralytics.com/glossa
         metrics.box.map50  # map50(B)
         metrics.box.map75  # map75(B)
         metrics.box.maps  # a list containing mAP50-95(B) for each category
+        metrics.box.image_metrics  # per-image metrics dictionary for det with precision, recall, F1, TP, FP, and FN
         metrics.seg.map  # map50-95(M)
         metrics.seg.map50  # map50(M)
         metrics.seg.map75  # map75(M)
         metrics.seg.maps  # a list containing mAP50-95(M) for each category
+        metrics.seg.image_metrics  # per-image metrics dictionary for seg with precision, recall, F1, TP, FP, and FN
         ```
 
     === "CLI"
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index dd953fc09d..aeea1940bb 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 
-__version__ = "8.4.39"
+__version__ = "8.4.40"
 
 import importlib
 import os
diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py
index 826916e2cf..9ce790287e 100644
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@@ -96,6 +96,8 @@ class DetectionValidator(BaseValidator):
         self.seen = 0
         self.jdict = []
         self.metrics.names = model.names
+        self.metrics.clear_stats()
+        self.metrics.clear_image_metrics()
         self.confusion_matrix = ConfusionMatrix(names=model.names, save_matches=self.args.plots and self.args.visualize)
 
     def get_desc(self) -> str:
@@ -186,6 +188,7 @@ class DetectionValidator(BaseValidator):
                     "target_img": np.unique(cls),
                     "conf": np.zeros(0) if no_pred else predn["conf"].cpu().numpy(),
                     "pred_cls": np.zeros(0) if no_pred else predn["cls"].cpu().numpy(),
+                    "im_name": Path(pbatch["im_file"]).name,
                 }
             )
             # Evaluate
@@ -219,6 +222,19 @@ class DetectionValidator(BaseValidator):
         self.metrics.confusion_matrix = self.confusion_matrix
         self.metrics.save_dir = self.save_dir
 
+    def _gather_image_metrics(self, metric) -> None:
+        """Gather per-image metrics from all GPUs for a single metric object."""
+        if RANK == 0:
+            gathered_image_metrics = [None] * dist.get_world_size()
+            dist.gather_object(metric.image_metrics, gathered_image_metrics, dst=0)
+            metric.clear_image_metrics()
+            for image_metrics in gathered_image_metrics:
+                if image_metrics:
+                    metric.image_metrics.update(image_metrics)
+        elif RANK > 0:
+            dist.gather_object(metric.image_metrics, None, dst=0)
+            metric.clear_image_metrics()
+
     def gather_stats(self) -> None:
         """Gather stats from all GPUs."""
         if RANK == 0:
@@ -234,10 +250,12 @@ class DetectionValidator(BaseValidator):
             for jdict in gathered_jdict:
                 self.jdict.extend(jdict)
             self.metrics.stats = merged_stats
+            self._gather_image_metrics(self.metrics.box)
             self.seen = len(self.dataloader.dataset)  # total image count from dataset
         elif RANK > 0:
             dist.gather_object(self.metrics.stats, None, dst=0)
             dist.gather_object(self.jdict, None, dst=0)
+            self._gather_image_metrics(self.metrics.box)
             self.jdict = []
             self.metrics.clear_stats()
 
diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py
index 04a355fbb9..02b8d3a76c 100644
--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@@ -185,6 +185,11 @@ class PoseValidator(DetectionValidator):
         tp.update({"tp_p": tp_p})  # update tp with kpts IoU
         return tp
 
+    def gather_stats(self) -> None:
+        """Gather stats from all GPUs."""
+        super().gather_stats()  # gather stats from DetectionValidator
+        self._gather_image_metrics(self.metrics.pose)
+
     def save_one_txt(self, predn: dict[str, torch.Tensor], save_conf: bool, shape: tuple[int, int], file: Path) -> None:
         """Save YOLO pose detections to a text file in normalized coordinates.
 
diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py
index a66c7ad09d..386b7cbfc3 100644
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@@ -141,6 +141,11 @@ class SegmentationValidator(DetectionValidator):
         prepared_batch["masks"] = masks
         return prepared_batch
 
+    def gather_stats(self) -> None:
+        """Gather stats from all GPUs."""
+        super().gather_stats()  # gather stats from DetectionValidator
+        self._gather_image_metrics(self.metrics.seg)
+
     def _process_batch(self, preds: dict[str, torch.Tensor], batch: dict[str, Any]) -> dict[str, np.ndarray]:
         """Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
 
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index 2c790f77b0..894e43fdf8 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -879,6 +879,7 @@ class Metric(SimpleClass):
         self.all_ap = []  # (nc, 10)
         self.ap_class_index = []  # (nc, )
         self.nc = 0
+        self.image_metrics = {}
 
     @property
     def ap50(self) -> np.ndarray | list:
@@ -993,6 +994,10 @@ class Metric(SimpleClass):
             self.prec_values,
         ) = results
 
+    def clear_image_metrics(self) -> None:
+        """Clear stored per-image metrics from the current validation run."""
+        self.image_metrics.clear()
+
     @property
     def curves(self) -> list:
         """Return a list of curves for accessing specific metrics curves."""
@@ -1008,6 +1013,33 @@ class Metric(SimpleClass):
             [self.px, self.r_curve, "Confidence", "Recall"],
         ]
 
+    def update_image_metrics(self, tp: np.ndarray, target_cls: np.ndarray, pred_cls: np.ndarray, im_name: str) -> None:
+        """Update per-image precision, recall, F1, TP, FP, and FN at IoU threshold 0.5.
+
+        Args:
+            tp (np.ndarray): True positive array of shape (num_preds, num_iou_thresholds), where the first column (IoU
+                >= 0.5) is used.
+            target_cls (np.ndarray): Ground truth class labels for the image.
+            pred_cls (np.ndarray): Predicted class labels for the image.
+            im_name (str): The image filename used as the per-image key.
+        """
+        # Use the default IoU=0.5 column to match the validator's image-level matching policy.
+        tp = int(tp[:, 0].sum())
+        num_preds = pred_cls.shape[0]
+        num_targets = target_cls.shape[0]
+        fp = num_preds - tp
+        fn = num_targets - tp
+        precision = tp / num_preds if num_preds else 0
+        recall = tp / num_targets if num_targets else 0
+        self.image_metrics[im_name] = {
+            "precision": float(precision),
+            "recall": float(recall),
+            "f1": float(2 * (precision * recall) / (precision + recall)) if (precision + recall) else 0.0,
+            "tp": int(tp),
+            "fp": int(fp),
+            "fn": int(fn),
+        }
+
 
 class DetMetrics(SimpleClass, DataExportMixin):
     """Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP).
@@ -1059,6 +1091,7 @@ class DetMetrics(SimpleClass, DataExportMixin):
         """
         for k in self.stats.keys():
             self.stats[k].append(stat[k])
+        self.box.update_image_metrics(stat["tp"], stat["target_cls"], stat["pred_cls"], stat["im_name"])
 
     def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> dict[str, np.ndarray]:
         """Process predicted results for object detection and update metrics.
@@ -1096,6 +1129,10 @@ class DetMetrics(SimpleClass, DataExportMixin):
         for v in self.stats.values():
             v.clear()
 
+    def clear_image_metrics(self) -> None:
+        """Clear stored per-image metrics."""
+        self.box.clear_image_metrics()
+
     @property
     def keys(self) -> list[str]:
         """Return a list of keys for accessing specific metrics."""
@@ -1211,6 +1248,21 @@ class SegmentMetrics(DetMetrics):
         self.seg = Metric()
         self.stats["tp_m"] = []  # add additional stats for masks
 
+    def update_stats(self, stat: dict[str, Any]) -> None:
+        """Update statistics by appending new values to existing stat collections.
+
+        Args:
+            stat (dict[str, Any]): Dictionary containing new statistical values to append. Keys should match existing
+                keys in self.stats.
+        """
+        super().update_stats(stat)  # update box stats
+        self.seg.update_image_metrics(stat["tp_m"], stat["target_cls"], stat["pred_cls"], stat["im_name"])
+
+    def clear_image_metrics(self) -> None:
+        """Clear stored per-image metrics."""
+        super().clear_image_metrics()
+        self.seg.clear_image_metrics()
+
     def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> dict[str, np.ndarray]:
         """Process the detection and segmentation metrics over the given set of predictions.
 
@@ -1347,6 +1399,21 @@ class PoseMetrics(DetMetrics):
         self.pose = Metric()
         self.stats["tp_p"] = []  # add additional stats for pose
 
+    def update_stats(self, stat: dict[str, Any]) -> None:
+        """Update statistics by appending new values to existing stat collections.
+
+        Args:
+            stat (dict[str, Any]): Dictionary containing new statistical values to append. Keys should match existing
+                keys in self.stats.
+        """
+        super().update_stats(stat)  # update box stats
+        self.pose.update_image_metrics(stat["tp_p"], stat["target_cls"], stat["pred_cls"], stat["im_name"])
+
+    def clear_image_metrics(self) -> None:
+        """Clear stored per-image metrics."""
+        super().clear_image_metrics()
+        self.pose.clear_image_metrics()
+
     def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> dict[str, np.ndarray]:
         """Process the detection and pose metrics over the given set of predictions.