[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
2026-04-21 13:37:39 +00:00 · 2026-01-08 11:35:21 +00:00 · 2026-01-08 11:35:21 +00:00 · 3620564025
commit 3620564025
parent 16a2d901fa
2 changed files with 11 additions and 3 deletions
--- a/tests/test_raw_text.py
+++ b/tests/test_raw_text.py
@ -125,8 +125,12 @@ def test_raw_text_loader():
        ), "input_ids and attention_mask should have same length"

        # Verify labels field exists (for causal LM training)
-        assert "labels" in tokenized_dataset.column_names, "Dataset should have 'labels' column"
-        assert first_sample["labels"] == first_sample["input_ids"], "labels should match input_ids"
+        assert (
+            "labels" in tokenized_dataset.column_names
+        ), "Dataset should have 'labels' column"
+        assert (
+            first_sample["labels"] == first_sample["input_ids"]
+        ), "labels should match input_ids"

        # Test constructor validation
        try:
--- a/unsloth/dataprep/raw_text.py
+++ b/unsloth/dataprep/raw_text.py
@ -97,7 +97,11 @@ class RawTextDataLoader:
            # Labels are same as input_ids for causal LM training
            labels = [list(ids) for ids in input_ids]
            return Dataset.from_dict(
-                {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
+                {
+                    "input_ids": input_ids,
+                    "attention_mask": attention_mask,
+                    "labels": labels,
+                }
            )
        else:
            # If chunks are text strings (backward compatibility)