nanochat/manifests/cpt_recipe_v1.json
2026-03-24 20:52:36 -04:00

32 lines
1.0 KiB
JSON

{
"name": "cpt_recipe_v1",
"goal": "Continue pretraining nanochat-d24 on a safe, high-value mixture for a 48-hour run.",
"backbone_dataset": {
"repo_id": "karpathy/climbmix-400b-shuffle",
"role": "primary",
"notes": "Use ClimbMix as the main pretraining backbone because it is already the best-performing dataset in this fork."
},
"optional_augmentations": [
{
"repo_id": "nvidia/Nemotron-Pretraining-Code-v2",
"role": "code",
"notes": "Selective code augmentation only."
},
{
"repo_id": "nvidia/Nemotron-CC-Math-v1",
"role": "math_science",
"notes": "Selective math/science augmentation only."
},
{
"repo_id": "nvidia/Nemotron-Pretraining-Specialized-v1.1",
"role": "specialized_web",
"notes": "Use sparingly for targeted specialization."
}
],
"do_not_do": [
"Do not replace ClimbMix wholesale.",
"Do not attempt architecture changes during this 48-hour continuation run.",
"Do not rely on continued pretraining for current-events freshness."
]
}