[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
This commit is contained in:
pre-commit-ci[bot] 2026-01-08 11:35:21 +00:00
parent 16a2d901fa
commit 3620564025
2 changed files with 11 additions and 3 deletions

View file

@ -125,8 +125,12 @@ def test_raw_text_loader():
), "input_ids and attention_mask should have same length"
# Verify labels field exists (for causal LM training)
assert "labels" in tokenized_dataset.column_names, "Dataset should have 'labels' column"
assert first_sample["labels"] == first_sample["input_ids"], "labels should match input_ids"
assert (
"labels" in tokenized_dataset.column_names
), "Dataset should have 'labels' column"
assert (
first_sample["labels"] == first_sample["input_ids"]
), "labels should match input_ids"
# Test constructor validation
try:

View file

@ -97,7 +97,11 @@ class RawTextDataLoader:
# Labels are same as input_ids for causal LM training
labels = [list(ids) for ids in input_ids]
return Dataset.from_dict(
{"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
{
"input_ids": input_ids,
"attention_mask": attention_mask,
"labels": labels,
}
)
else:
# If chunks are text strings (backward compatibility)