mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-21 15:18:04 +00:00
27 lines
775 B
JSON
27 lines
775 B
JSON
{
|
|
"name": "rl_recipe_v1",
|
|
"goal": "Lightweight tool-tuning stage after SFT.",
|
|
"seed_eval_data": "seed_data/tool_eval_seed.jsonl",
|
|
"reward_components": [
|
|
"valid tool call",
|
|
"correct tool choice",
|
|
"answer contains expected answer fragment",
|
|
"citation required when using web_search",
|
|
"penalty for unnecessary tool calls"
|
|
],
|
|
"external_templates": [
|
|
{
|
|
"repo_id": "nvidia/Nemotron-RL-Agentic-Conversational-Tool-Use-Pivot-v1",
|
|
"role": "tool_use_structure"
|
|
},
|
|
{
|
|
"repo_id": "nvidia/Nemotron-RL-Agentic-Function-Calling-Pivot-v1",
|
|
"role": "function_call_structure"
|
|
}
|
|
],
|
|
"notes": [
|
|
"Keep RL narrow; do not turn this into broad RLHF.",
|
|
"Use the local tool eval dataset for dry runs before GPU."
|
|
]
|
|
}
|