▶ written content (6,872 chars)
from data_designer.config import (
CategorySamplerParams,
CodeLang,
CodeValidatorParams,
DataDesignerConfigBuilder,
LLMCodeColumnConfig,
LLMJudgeColumnConfig,
LLMTextColumnConfig,
SamplerColumnConfig,
Score,
ValidationColumnConfig,
ValidatorType,
)
def load_config_builder() -> DataDesignerConfigBuilder:
builder = DataDesignerConfigBuilder()
# --- Difficulty level ---
builder.add_column(
SamplerColumnConfig(
name="difficulty",
sampler_type="category",
params=CategorySamplerParams(
values=["beginner", "intermediate", "advanced"],
weights=[0.35, 0.40, 0.25],
),
)
)
# --- Subtopic ---
builder.add_column(
SamplerColumnConfig(
name="subtopic",
sampler_type="category",
params=CategorySamplerParams(
values=[
"data cleaning",
"exploratory analysis",
"aggregation and groupby operations",
"visualization with matplotlib/seaborn",
"statistical testing",
"feature engineering",
"working with messy or missing data",
],
),
)
)
# --- Natural language instruction ---
builder.add_column(
LLMTextColumnConfig(
name="instruction",
model_alias="nvidia-text",
prompt=(
"Write a clear, specific natural-language instruction for a {{ difficulty }}-level "
"Python data science task in the area of {{ subtopic }}.\n\n"
"The instruction should describe a concrete problem that a data scientist might encounter "
"and should be solvable in a single self-contained Python script. "
"Do NOT include any code in your response — only the task description.\n\n"
"Vary the complexity to match the difficulty level:\n"
"- beginner: simple operations using pandas, basic plots, straightforward cleaning\n"
"- intermediate: multi-step workflows, combining libraries, moderate data manipulation\n"
"- advanced: complex pipelines, statistical modeling, performance optimization, "
"advanced visualization techniques\n\n"
"Return only the instruction text, nothing else."
),
)
)
# --- Python solution ---
builder.add_column(
LLMCodeColumnConfig(
name="solution",
model_alias="nvidia-reasoning",
code_lang=CodeLang.PYTHON,
prompt=(
"Write a complete, self-contained Python solution for the following "
"{{ difficulty }}-level data science task:\n\n"
"{{ instruction }}\n\n"
"Requirements:\n"
"- The code must be syntactically valid and runnable as-is.\n"
"- Include all necessary imports.\n"
"- Generate or define any sample data needed (do not load from external files).\n"
"- Use appropriate libraries (pandas, numpy, matplotlib, seaborn, scipy, "
"scikit-learn, etc.) as needed.\n"
"- Add brief inline comments explaining key steps.\n"
"- Follow Python best practices and PEP 8 style.\n"
),
)
)
# --- Code validation ---
builder.add_column(
ValidationColumnConfig(
name="code_validation",
target_columns=["solution"],
validator_type=ValidatorType.CODE,
validator_params=CodeValidatorParams(code_lang=CodeLang.PYTHON),
)
)
# --- LLM-as-judge evaluation ---
builder.add_column(
LLMJudgeColumnConfig(
name="quality_score",
model_alias="nvidia-reasoning",
prompt=(
"Evaluate the following Python data science solution against its instruction.\n\n"
"**Difficulty:** {{ difficulty }}\n"
"**Subtopic:** {{ subtopic }}\n\n"
"**Instruction:**\n{{ instruction }}\n\n"
"**Solution:**\n```python\n{{ solution }}\n```\n\n"
"Score the solution on each dimension below. Be rigorous but fair."
),
scores=[
Score(
name="correctness",
description=(
"Does the code correctly implement what the instruction asks for? "
"Consider logical correctness, proper use of libraries, and whether "
"the output would match expectations."
),
options={
1: "Completely wrong or does not address the instruction",
2: "Major errors that would produce incorrect results",
3: "Partially correct but has notable issues",
4: "Mostly correct with minor issues",
5: "Fully correct and complete implementation",
},
),
Score(
name="code_quality",
description=(
"Assess the code style, readability, and Pythonic practices. "
"Consider variable naming, structure, comments, and adherence "
"to best practices."
),
options={
1: "Very poor quality, unreadable or badly structured",
2: "Below average, multiple style issues",
3: "Acceptable but could be improved",
4: "Good quality, clean and well-structured",
5: "Excellent quality, exemplary Python code",
},
),
Score(
name="instruction_clarity",
description=(
"How clear and well-defined is the original instruction? "
"Consider whether the task is unambiguous, specific enough to implement, "
"and appropriately scoped for the stated difficulty level."
),
options={
1: "Vague or incomprehensible instruction",
2: "Unclear, missing important details",
3: "Adequate but could be more specific",
4: "Clear and well-defined",
5: "Excellent clarity, perfectly scoped for difficulty level",
},
),
],
)
)
return builder