mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
16a2d901fa
commit
3620564025
2 changed files with 11 additions and 3 deletions
|
|
@ -125,8 +125,12 @@ def test_raw_text_loader():
|
|||
), "input_ids and attention_mask should have same length"
|
||||
|
||||
# Verify labels field exists (for causal LM training)
|
||||
assert "labels" in tokenized_dataset.column_names, "Dataset should have 'labels' column"
|
||||
assert first_sample["labels"] == first_sample["input_ids"], "labels should match input_ids"
|
||||
assert (
|
||||
"labels" in tokenized_dataset.column_names
|
||||
), "Dataset should have 'labels' column"
|
||||
assert (
|
||||
first_sample["labels"] == first_sample["input_ids"]
|
||||
), "labels should match input_ids"
|
||||
|
||||
# Test constructor validation
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -97,7 +97,11 @@ class RawTextDataLoader:
|
|||
# Labels are same as input_ids for causal LM training
|
||||
labels = [list(ids) for ids in input_ids]
|
||||
return Dataset.from_dict(
|
||||
{"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
|
||||
{
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"labels": labels,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# If chunks are text strings (backward compatibility)
|
||||
|
|
|
|||
Loading…
Reference in a new issue