nanochat/seed_data/tool_eval_seed.jsonl
2026-03-24 20:52:36 -04:00

4 lines
879 B
JSON

{"conversation": {"messages": [{"role": "user", "content": "What is 12% of 250?"}, {"role": "assistant", "content": "45"}]}, "checks": {"must_call": "calculator", "must_not_call": ["web_search"], "answer_contains": ["45"]}}
{"conversation": {"messages": [{"role": "user", "content": "Find the official nanochat repository and cite the link."}, {"role": "assistant", "content": "https://github.com/karpathy/nanochat"}]}, "checks": {"must_call": "web_search", "citation_required": true, "answer_contains": ["github.com/karpathy/nanochat"]}}
{"conversation": {"messages": [{"role": "user", "content": "Explain what overfitting means in one sentence."}, {"role": "assistant", "content": "Overfitting means a model memorizes training patterns too closely and generalizes poorly to new data."}]}, "checks": {"must_not_call": ["web_search"], "answer_contains": ["generalizes poorly"]}}