mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-24 08:38:05 +00:00
32 lines
1.0 KiB
JSON
32 lines
1.0 KiB
JSON
{
|
|
"name": "cpt_recipe_v1",
|
|
"goal": "Continue pretraining nanochat-d24 on a safe, high-value mixture for a 48-hour run.",
|
|
"backbone_dataset": {
|
|
"repo_id": "karpathy/climbmix-400b-shuffle",
|
|
"role": "primary",
|
|
"notes": "Use ClimbMix as the main pretraining backbone because it is already the best-performing dataset in this fork."
|
|
},
|
|
"optional_augmentations": [
|
|
{
|
|
"repo_id": "nvidia/Nemotron-Pretraining-Code-v2",
|
|
"role": "code",
|
|
"notes": "Selective code augmentation only."
|
|
},
|
|
{
|
|
"repo_id": "nvidia/Nemotron-CC-Math-v1",
|
|
"role": "math_science",
|
|
"notes": "Selective math/science augmentation only."
|
|
},
|
|
{
|
|
"repo_id": "nvidia/Nemotron-Pretraining-Specialized-v1.1",
|
|
"role": "specialized_web",
|
|
"notes": "Use sparingly for targeted specialization."
|
|
}
|
|
],
|
|
"do_not_do": [
|
|
"Do not replace ClimbMix wholesale.",
|
|
"Do not attempt architecture changes during this 48-hour continuation run.",
|
|
"Do not rely on continued pretraining for current-events freshness."
|
|
]
|
|
}
|