refactor: 🚀 switch core data ops from pandas to polars (training, benchmarks, plotting, W&B) (#21619)

Signed-off-by: Onuralp SEZER <onuralp@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Onuralp SEZER <onuralp@ultralytics.com>
2026-04-21 14:07:18 +00:00 · 2025-08-26 13:22:44 +08:00 · 2025-08-26 13:22:44 +08:00 · bcb1961a9b
commit bcb1961a9b
parent 676a8bbb82
12 changed files with 71 additions and 159 deletions
--- a/docs/en/modes/predict.md
+++ b/docs/en/modes/predict.md
@ -516,12 +516,9 @@ All Ultralytics `predict()` calls will return a list of `Results` objects:
 | `save_txt()`  | `str`                  | Saves detection results to a text file and returns the path to the saved file.            |
 | `save_crop()` | `None`                 | Saves cropped detection images to specified directory.                                    |
 | `summary()`   | `List[Dict[str, Any]]` | Converts inference results to a summarized dictionary with optional normalization.        |
-| `to_df()`     | `DataFrame`            | Converts detection results to a Pandas DataFrame.                                         |
+| `to_df()`     | `DataFrame`            | Converts detection results to a Polars DataFrame.                                         |
 | `to_csv()`    | `str`                  | Converts detection results to CSV format.                                                 |
-| `to_xml()`    | `str`                  | Converts detection results to XML format.                                                 |
-| `to_html()`   | `str`                  | Converts detection results to HTML format.                                                |
 | `to_json()`   | `str`                  | Converts detection results to JSON format.                                                |
-| `to_sql()`    | `None`                 | Converts detection results to SQL-compatible format and saves to database.                |

 For more details see the [`Results` class documentation](../reference/engine/results.md).

--- a/docs/en/modes/val.md
+++ b/docs/en/modes/val.md
@ -93,7 +93,7 @@ Each of these settings plays a vital role in the validation process, allowing fo
    allowfullscreen>
  </iframe>
  <br>
-  <strong>Watch:</strong> How to Export Model Validation Results in CSV, JSON, SQL, Pandas DataFrame & More
+  <strong>Watch:</strong> How to Export Model Validation Results in CSV, JSON, SQL, Polars DataFrame & More
 </p>

 <a href="https://github.com/ultralytics/notebooks/blob/main/notebooks/how-to-export-the-validation-results-into-dataframe-csv-sql-and-other-formats.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Explore model validation and different export methods in Google Colab"></a>
@ -133,15 +133,12 @@ The below examples showcase YOLO model validation with custom arguments in Pytho
    print(results.confusion_matrix.to_df())
    ```

-| Method      | Return Type            | Description                                                                      |
-| ----------- | ---------------------- | -------------------------------------------------------------------------------- |
-| `summary()` | `List[Dict[str, Any]]` | Converts validation results to a summarized dictionary.                          |
-| `to_df()`   | `DataFrame`            | Returns the validation results as a structured Pandas DataFrame.                 |
-| `to_csv()`  | `str`                  | Exports the validation results in CSV format and returns the CSV string.         |
-| `to_xml()`  | `str`                  | Exports the validation results in XML format and returns the XML string.         |
-| `to_html()` | `str`                  | Exports the validation results in HTML table format and returns the HTML string. |
-| `to_json()` | `str`                  | Exports the validation results in JSON format and returns the JSON string.       |
-| `to_sql()`  | `str`                  | Exports the validation results in SQl database.                                  |
+| Method      | Return Type            | Description                                                                |
+| ----------- | ---------------------- | -------------------------------------------------------------------------- |
+| `summary()` | `List[Dict[str, Any]]` | Converts validation results to a summarized dictionary.                    |
+| `to_df()`   | `DataFrame`            | Returns the validation results as a structured Polars DataFrame.           |
+| `to_csv()`  | `str`                  | Exports the validation results in CSV format and returns the CSV string.   |
+| `to_json()` | `str`                  | Exports the validation results in JSON format and returns the JSON string. |

 For more details see the [`DataExportMixin` class documentation](../reference/utils/__init__.md/#ultralytics.utils.DataExportMixin).

--- a/docs/en/quickstart.md
+++ b/docs/en/quickstart.md
@ -167,7 +167,7 @@ While the standard installation methods cover most use cases, you might need a m
        2.  **Manually install dependencies:** You need to install all required packages listed in the `pyproject.toml` file, substituting or modifying versions as needed. For the headless OpenCV example:
            ```bash
            # Install other core dependencies
-            pip install torch torchvision numpy matplotlib pandas pyyaml pillow psutil requests scipy seaborn ultralytics-thop
+            pip install torch torchvision numpy matplotlib polars pyyaml pillow psutil requests scipy seaborn ultralytics-thop

            # Install headless OpenCV instead of the default
            pip install opencv-python-headless
@ -237,7 +237,7 @@ While the standard installation methods cover most use cases, you might need a m
            # Core dependencies
            numpy
            matplotlib
-            pandas
+            polars
            pyyaml
            Pillow
            psutil
--- a/pyproject.toml
+++ b/pyproject.toml
@ -73,7 +73,7 @@ dependencies = [
    "torchvision>=0.9.0",
    "psutil", # system utilization
    "py-cpuinfo", # display CPU info
-    "pandas>=1.1.4",
+    "polars",
    "ultralytics-thop>=2.0.0", # FLOPs computation https://github.com/ultralytics/thop
 ]

@ -118,7 +118,6 @@ extra = [
    "faster-coco-eval>=1.6.7", # COCO mAP
 ]
 typing = [
-    "pandas-stubs",
    "scipy-stubs",
    "types-pillow",
    "types-psutil",
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -209,16 +209,11 @@ def test_val(task: str, weight: str, data: str) -> None:
        metrics = model.val(data=data, imgsz=32, plots=plots)
        metrics.to_df()
        metrics.to_csv()
-        metrics.to_xml()
-        metrics.to_html()
        metrics.to_json()
-        metrics.to_sql()
-        metrics.confusion_matrix.to_df()  # Tests for confusion matrix export
+        # Tests for confusion matrix export
+        metrics.confusion_matrix.to_df()
        metrics.confusion_matrix.to_csv()
-        metrics.confusion_matrix.to_xml()
-        metrics.confusion_matrix.to_html()
        metrics.confusion_matrix.to_json()
-        metrics.confusion_matrix.to_sql()


 def test_train_scratch():
@ -304,10 +299,7 @@ def test_results(model: str):
        r.save_crop(save_dir=TMP / "runs/tests/crops/")
        r.to_df(decimals=3)  # Align to_ methods: https://docs.ultralytics.com/modes/predict/#working-with-results
        r.to_csv()
-        r.to_xml()
-        r.to_html()
        r.to_json(normalize=True)
-        r.to_sql()
        r.plot(pil=True, save=True, filename=TMP / "results_plot_save.jpg")
        r.plot(conf=True, boxes=True)
        print(r, len(r), r.path)  # print after methods
--- a/ultralytics/cfg/datasets/SKU-110K.yaml
+++ b/ultralytics/cfg/datasets/SKU-110K.yaml
@ -24,7 +24,7 @@ download: |
  from pathlib import Path

  import numpy as np
-  import pandas as pd
+  import polars as pl

  from ultralytics.utils import TQDM
  from ultralytics.utils.downloads import download
@ -45,7 +45,7 @@ download: |
  # Convert labels
  names = "image", "x1", "y1", "x2", "y2", "class", "image_width", "image_height"  # column names
  for d in "annotations_train.csv", "annotations_val.csv", "annotations_test.csv":
-      x = pd.read_csv(dir / "annotations" / d, names=names).values  # annotations
+      x = pl.read_csv(dir / "annotations" / d, names=names).to_numpy()  # annotations
      images, unique_images = x[:, 0], np.unique(x[:, 0])
      with open((dir / d).with_suffix(".txt").__str__().replace("annotations_", ""), "w", encoding="utf-8") as f:
          f.writelines(f"./images/{s}\n" for s in unique_images)
--- a/ultralytics/engine/results.py
+++ b/ultralytics/engine/results.py
@ -222,12 +222,9 @@ class Results(SimpleClass, DataExportMixin):
        save_txt: Save detection results to a text file.
        save_crop: Save cropped detection images to specified directory.
        summary: Convert inference results to a summarized dictionary.
-        to_df: Convert detection results to a Pandas Dataframe.
+        to_df: Convert detection results to a Polars Dataframe.
        to_json: Convert detection results to JSON format.
        to_csv: Convert detection results to a CSV format.
-        to_xml: Convert detection results to XML format.
-        to_html: Convert detection results to HTML format.
-        to_sql: Convert detection results to an SQL-compatible format.

    Examples:
        >>> results = model("path/to/image.jpg")
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@ -540,10 +540,10 @@ class BaseTrainer:
            torch.cuda.empty_cache()

    def read_results_csv(self):
-        """Read results.csv into a dictionary using pandas."""
-        import pandas as pd  # scope for faster 'import ultralytics'
+        """Read results.csv into a dictionary using polars."""
+        import polars as pl  # scope for faster 'import ultralytics'

-        return pd.read_csv(self.csv).to_dict(orient="list")
+        return pl.read_csv(self.csv).to_dict(as_series=False)

    def _model_train(self):
        """Set model in training mode."""
--- a/ultralytics/utils/init.py
+++ b/ultralytics/utils/init.py
@ -134,17 +134,14 @@ class DataExportMixin:
    Mixin class for exporting validation metrics or prediction results in various formats.

    This class provides utilities to export performance metrics (e.g., mAP, precision, recall) or prediction results
-    from classification, object detection, segmentation, or pose estimation tasks into various formats: Pandas
-    DataFrame, CSV, XML, HTML, JSON and SQLite (SQL).
+    from classification, object detection, segmentation, or pose estimation tasks into various formats: Polars
+    DataFrame, CSV and JSON.

    Methods:
-        to_df: Convert summary to a Pandas DataFrame.
+        to_df: Convert summary to a Polars DataFrame.
        to_csv: Export results as a CSV string.
-        to_xml: Export results as an XML string (requires `lxml`).
-        to_html: Export results as an HTML table.
        to_json: Export results as a JSON string.
        tojson: Deprecated alias for `to_json()`.
-        to_sql: Export results to an SQLite database.

    Examples:
        >>> model = YOLO("yolo11n.pt")
@ -152,12 +149,11 @@ class DataExportMixin:
        >>> df = results.to_df()
        >>> print(df)
        >>> csv_data = results.to_csv()
-        >>> results.to_sql(table_name="yolo_results")
    """

    def to_df(self, normalize=False, decimals=5):
        """
-        Create a pandas DataFrame from the prediction results summary or validation metrics.
+        Create a polars DataFrame from the prediction results summary or validation metrics.

        Args:
            normalize (bool, optional): Normalize numerical values for easier comparison.
@ -166,13 +162,13 @@ class DataExportMixin:
        Returns:
            (DataFrame): DataFrame containing the summary data.
        """
-        import pandas as pd  # scope for faster 'import ultralytics'
+        import polars as pl  # scope for faster 'import ultralytics'

-        return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals))
+        return pl.DataFrame(self.summary(normalize=normalize, decimals=decimals))

    def to_csv(self, normalize=False, decimals=5):
        """
-        Export results to CSV string format.
+        Export results or metrics to CSV string format.

        Args:
           normalize (bool, optional): Normalize numeric values.
@ -181,44 +177,25 @@ class DataExportMixin:
        Returns:
           (str): CSV content as string.
        """
-        return self.to_df(normalize=normalize, decimals=decimals).to_csv()
+        import polars as pl

-    def to_xml(self, normalize=False, decimals=5):
-        """
-        Export results to XML format.
-
-        Args:
-            normalize (bool, optional): Normalize numeric values.
-            decimals (int, optional): Decimal precision.
-
-        Returns:
-            (str): XML string.
-
-        Notes:
-            Requires `lxml` package to be installed.
-        """
        df = self.to_df(normalize=normalize, decimals=decimals)
-        return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(parser="etree")

-    def to_html(self, normalize=False, decimals=5, index=False):
-        """
-        Export results to HTML table format.
+        try:
+            return df.write_csv()
+        except Exception:
+            # Minimal string conversion for any remaining complex types
+            def _to_str_simple(v):
+                if v is None:
+                    return ""
+                if isinstance(v, (dict, list, tuple, set)):
+                    return repr(v)
+                return str(v)

-        Args:
-            normalize (bool, optional): Normalize numeric values.
-            decimals (int, optional): Decimal precision.
-            index (bool, optional): Whether to include index column in the HTML table.
-
-        Returns:
-            (str): HTML representation of the results.
-        """
-        df = self.to_df(normalize=normalize, decimals=decimals)
-        return "<table></table>" if df.empty else df.to_html(index=index)
-
-    def tojson(self, normalize=False, decimals=5):
-        """Deprecated version of to_json()."""
-        LOGGER.warning("'result.tojson()' is deprecated, replace with 'result.to_json()'.")
-        return self.to_json(normalize, decimals)
+            df_str = df.select(
+                [pl.col(c).map_elements(_to_str_simple, return_dtype=pl.String).alias(c) for c in df.columns]
+            )
+            return df_str.write_csv()

    def to_json(self, normalize=False, decimals=5):
        """
@ -231,52 +208,7 @@ class DataExportMixin:
        Returns:
            (str): JSON-formatted string of the results.
        """
-        return self.to_df(normalize=normalize, decimals=decimals).to_json(orient="records", indent=2)
-
-    def to_sql(self, normalize=False, decimals=5, table_name="results", db_path="results.db"):
-        """
-        Save results to an SQLite database.
-
-        Args:
-            normalize (bool, optional): Normalize numeric values.
-            decimals (int, optional): Decimal precision.
-            table_name (str, optional): Name of the SQL table.
-            db_path (str, optional): SQLite database file path.
-        """
-        df = self.to_df(normalize, decimals)
-        if df.empty or df.columns.empty:  # Exit if df is None or has no columns (i.e., no schema)
-            return
-
-        import sqlite3
-
-        conn = sqlite3.connect(db_path)
-        cursor = conn.cursor()
-
-        # Dynamically create table schema based on summary to support prediction and validation results export
-        columns = []
-        for col in df.columns:
-            sample_val = df[col].dropna().iloc[0] if not df[col].dropna().empty else ""
-            if isinstance(sample_val, dict):
-                col_type = "TEXT"
-            elif isinstance(sample_val, (float, int)):
-                col_type = "REAL"
-            else:
-                col_type = "TEXT"
-            columns.append(f'"{col}" {col_type}')  # Quote column names to handle special characters like hyphens
-
-        # Create table (Drop table from db if it's already exist)
-        cursor.execute(f'DROP TABLE IF EXISTS "{table_name}"')
-        cursor.execute(f'CREATE TABLE "{table_name}" (id INTEGER PRIMARY KEY AUTOINCREMENT, {", ".join(columns)})')
-
-        for _, row in df.iterrows():
-            values = [json.dumps(v) if isinstance(v, dict) else v for v in row]
-            column_names = ", ".join(f'"{col}"' for col in df.columns)
-            placeholders = ", ".join("?" for _ in df.columns)
-            cursor.execute(f'INSERT INTO "{table_name}" ({column_names}) VALUES ({placeholders})', values)
-
-        conn.commit()
-        conn.close()
-        LOGGER.info(f"Results saved to SQL table '{table_name}' in '{db_path}'.")
+        return self.to_df(normalize=normalize, decimals=decimals).write_json()


 class SimpleClass:
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@ -77,7 +77,7 @@ def benchmark(
        **kwargs (Any): Additional keyword arguments for exporter.

    Returns:
-        (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, metric,
+        (polars.DataFrame): A polars DataFrame with benchmark results for each format, including file size, metric,
            and inference time.

    Examples:
@ -88,10 +88,11 @@ def benchmark(
    imgsz = check_imgsz(imgsz)
    assert imgsz[0] == imgsz[1] if isinstance(imgsz, list) else True, "benchmark() only supports square imgsz."

-    import pandas as pd  # scope for faster 'import ultralytics'
+    import polars as pl  # scope for faster 'import ultralytics'

-    pd.options.display.max_columns = 10
-    pd.options.display.width = 120
+    pl.Config.set_tbl_cols(10)
+    pl.Config.set_tbl_width_chars(120)
+    pl.Config.set_tbl_hide_dataframe_shape(True)
    device = select_device(device, verbose=False)
    if isinstance(model, (str, Path)):
        model = YOLO(model)
@ -193,20 +194,20 @@ def benchmark(

    # Print results
    check_yolo(device=device)  # print system info
-    df = pd.DataFrame(y, columns=["Format", "Status❔", "Size (MB)", key, "Inference time (ms/im)", "FPS"])
+    df = pl.DataFrame(y, schema=["Format", "Status❔", "Size (MB)", key, "Inference time (ms/im)", "FPS"])

    name = model.model_name
    dt = time.time() - t0
    legend = "Benchmarks legend:  - ✅ Success  - ❎ Export passed but validation failed  - ❌️ Export failed"
-    s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({dt:.2f}s)\n{legend}\n{df.fillna('-')}\n"
+    s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({dt:.2f}s)\n{legend}\n{df.fill_null('-')}\n"
    LOGGER.info(s)
    with open("benchmarks.log", "a", errors="ignore", encoding="utf-8") as f:
        f.write(s)

    if verbose and isinstance(verbose, float):
-        metrics = df[key].array  # values to compare to floor
+        metrics = df[key].to_numpy()  # values to compare to floor
        floor = verbose  # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n
-        assert all(x > floor for x in metrics if pd.notna(x)), f"Benchmark failure: metric(s) < floor {floor}"
+        assert all(x > floor for x in metrics if not np.isnan(x)), f"Benchmark failure: metric(s) < floor {floor}"

    return df

--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@ -34,13 +34,19 @@ def _custom_table(x, y, classes, title="Precision Recall Curve", x_title="Recall
    Returns:
        (wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization.
    """
-    import pandas  # scope for faster 'import ultralytics'
+    import polars as pl  # scope for faster 'import ultralytics'
+    import polars.selectors as cs
+
+    df = pl.DataFrame({"class": classes, "y": y, "x": x}).with_columns(cs.numeric().round(3))
+    data = df.select(["class", "y", "x"]).rows()

-    df = pandas.DataFrame({"class": classes, "y": y, "x": x}).round(3)
    fields = {"x": "x", "y": "y", "class": "class"}
    string_fields = {"title": title, "x-axis-title": x_title, "y-axis-title": y_title}
    return wb.plot_table(
-        "wandb/area-under-curve/v0", wb.Table(dataframe=df), fields=fields, string_fields=string_fields
+        "wandb/area-under-curve/v0",
+        wb.Table(data=data, columns=["class", "y", "x"]),
+        fields=fields,
+        string_fields=string_fields,
    )


--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@ -557,7 +557,7 @@ class Annotator:
        return width, height, width * height


-@TryExcept()  # known issue https://github.com/ultralytics/yolov5/issues/5395
+@TryExcept()
@plt_settings()
 def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
    """
@ -571,7 +571,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
        on_plot (Callable, optional): Function to call after plot is saved.
    """
    import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
-    import pandas
+    import polars
    from matplotlib.colors import LinearSegmentedColormap

    # Filter matplotlib>=3.7.2 warning
@ -582,16 +582,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
    LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
    nc = int(cls.max() + 1)  # number of classes
    boxes = boxes[:1000000]  # limit to 1M boxes
-    x = pandas.DataFrame(boxes, columns=["x", "y", "width", "height"])
-
-    try:  # Seaborn correlogram
-        import seaborn
-
-        seaborn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
-        plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
-        plt.close()
-    except ImportError:
-        pass  # Skip if seaborn is not installed
+    x = polars.DataFrame(boxes, schema=["x", "y", "width", "height"])

    # Matplotlib labels
    subplot_3_4_color = LinearSegmentedColormap.from_list("white_blue", ["white", "blue"])
@ -608,7 +599,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
    boxes = np.column_stack([0.5 - boxes[:, 2:4] / 2, 0.5 + boxes[:, 2:4] / 2]) * 1000
    img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
    for cls, box in zip(cls[:500], boxes[:500]):
-        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
+        ImageDraw.Draw(img).rectangle(box.tolist(), width=1, outline=colors(cls))  # plot
    ax[1].imshow(img)
    ax[1].axis("off")

@ -878,7 +869,7 @@ def plot_results(
        >>> plot_results("path/to/results.csv", segment=True)
    """
    import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
-    import pandas as pd
+    import polars as pl
    from scipy.ndimage import gaussian_filter1d

    save_dir = Path(file).parent if file else Path(dir)
@ -899,11 +890,11 @@ def plot_results(
    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
    for f in files:
        try:
-            data = pd.read_csv(f)
+            data = pl.read_csv(f)
            s = [x.strip() for x in data.columns]
-            x = data.values[:, 0]
+            x = data.select(data.columns[0]).to_numpy().flatten()
            for i, j in enumerate(index):
-                y = data.values[:, j].astype("float")
+                y = data.select(data.columns[j]).to_numpy().flatten().astype("float")
                # y[y == 0] = np.nan  # don't show zero values
                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=8)  # actual results
                ax[i].plot(x, gaussian_filter1d(y, sigma=3), ":", label="smooth", linewidth=2)  # smoothing line
@ -965,7 +956,7 @@ def plot_tune_results(csv_file: str = "tune_results.csv"):
        >>> plot_tune_results("path/to/tune_results.csv")
    """
    import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
-    import pandas as pd
+    import polars as pl
    from scipy.ndimage import gaussian_filter1d

    def _save_one_file(file):
@ -976,10 +967,10 @@ def plot_tune_results(csv_file: str = "tune_results.csv"):

    # Scatter plots for each hyperparameter
    csv_file = Path(csv_file)
-    data = pd.read_csv(csv_file)
+    data = pl.read_csv(csv_file)
    num_metrics_columns = 1
    keys = [x.strip() for x in data.columns][num_metrics_columns:]
-    x = data.values
+    x = data.to_numpy()
    fitness = x[:, 0]  # fitness
    j = np.argmax(fitness)  # max fitness index
    n = math.ceil(len(keys) ** 0.5)  # columns and rows in plot