mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 12:22:18 +00:00
- Add NANOCHAT_LANG environment variable to switch languages - Implement JapaneseInstructTask and JCommonsenseQA tasks - Update dataset.py to support Japanese prompts and data loading - Add Japanese evaluation in chat_eval.py and tok_eval.py - Include speedrun_spark_ja.sh for Japanese training runs - Add comprehensive test suite for Japanese support - Include Kiro specification documents (requirements, design, tasks) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
59 lines
2.0 KiB
Python
59 lines
2.0 KiB
Python
"""
|
|
JCommonsenseQA from JGLUE benchmark.
|
|
https://huggingface.co/datasets/sbintuitions/JCommonsenseQA
|
|
|
|
A Japanese commonsense question answering dataset with 5 choices.
|
|
Used for evaluating Japanese language understanding.
|
|
"""
|
|
|
|
from datasets import load_dataset
|
|
from tasks.common import Task, render_mc
|
|
|
|
|
|
class JCommonsenseQA(Task):
|
|
"""
|
|
JCommonsenseQA: Japanese Commonsense Question Answering.
|
|
A 5-choice multiple choice task from JGLUE benchmark.
|
|
"""
|
|
|
|
def __init__(self, split="validation", **kwargs):
|
|
super().__init__(**kwargs)
|
|
assert split in ["train", "validation"], "JCommonsenseQA split must be train|validation"
|
|
self.ds = load_dataset("sbintuitions/JCommonsenseQA", split=split).shuffle(seed=42)
|
|
self.letters = ["A", "B", "C", "D", "E"]
|
|
|
|
@property
|
|
def eval_type(self):
|
|
return 'categorical'
|
|
|
|
def num_examples(self):
|
|
return len(self.ds)
|
|
|
|
def get_example(self, index):
|
|
row = self.ds[index]
|
|
question = row["question"]
|
|
# Collect choices from choice0 to choice4
|
|
choices = [row[f"choice{i}"] for i in range(5)]
|
|
label = row["label"] # 0-4
|
|
answer_letter = self.letters[label]
|
|
|
|
# Create the user message with multiple choice format
|
|
user_message = render_mc(question, self.letters, choices)
|
|
messages = [
|
|
{"role": "user", "content": user_message},
|
|
{"role": "assistant", "content": answer_letter}
|
|
]
|
|
|
|
conversation = {
|
|
"messages": messages,
|
|
"letters": self.letters, # useful during evaluation
|
|
}
|
|
return conversation
|
|
|
|
def evaluate(self, conversation, assistant_response):
|
|
# Check if the assistant's response matches the expected answer
|
|
assert assistant_response in conversation['letters'], \
|
|
f"JCommonsenseQA answer {assistant_response} must be one of {conversation['letters']}"
|
|
expected_answer = conversation['messages'][-1]['content'] # e.g., "A"
|
|
return assistant_response == expected_answer
|