nanochat/tasks/jcommonsenseqa.py
karaage0703 e1e836763e Add Japanese language support for nanochat
- Add NANOCHAT_LANG environment variable to switch languages
- Implement JapaneseInstructTask and JCommonsenseQA tasks
- Update dataset.py to support Japanese prompts and data loading
- Add Japanese evaluation in chat_eval.py and tok_eval.py
- Include speedrun_spark_ja.sh for Japanese training runs
- Add comprehensive test suite for Japanese support
- Include Kiro specification documents (requirements, design, tasks)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-01 21:29:45 +09:00

59 lines
2.0 KiB
Python

"""
JCommonsenseQA from JGLUE benchmark.
https://huggingface.co/datasets/sbintuitions/JCommonsenseQA
A Japanese commonsense question answering dataset with 5 choices.
Used for evaluating Japanese language understanding.
"""
from datasets import load_dataset
from tasks.common import Task, render_mc
class JCommonsenseQA(Task):
"""
JCommonsenseQA: Japanese Commonsense Question Answering.
A 5-choice multiple choice task from JGLUE benchmark.
"""
def __init__(self, split="validation", **kwargs):
super().__init__(**kwargs)
assert split in ["train", "validation"], "JCommonsenseQA split must be train|validation"
self.ds = load_dataset("sbintuitions/JCommonsenseQA", split=split).shuffle(seed=42)
self.letters = ["A", "B", "C", "D", "E"]
@property
def eval_type(self):
return 'categorical'
def num_examples(self):
return len(self.ds)
def get_example(self, index):
row = self.ds[index]
question = row["question"]
# Collect choices from choice0 to choice4
choices = [row[f"choice{i}"] for i in range(5)]
label = row["label"] # 0-4
answer_letter = self.letters[label]
# Create the user message with multiple choice format
user_message = render_mc(question, self.letters, choices)
messages = [
{"role": "user", "content": user_message},
{"role": "assistant", "content": answer_letter}
]
conversation = {
"messages": messages,
"letters": self.letters, # useful during evaluation
}
return conversation
def evaluate(self, conversation, assistant_response):
# Check if the assistant's response matches the expected answer
assert assistant_response in conversation['letters'], \
f"JCommonsenseQA answer {assistant_response} must be one of {conversation['letters']}"
expected_answer = conversation['messages'][-1]['content'] # e.g., "A"
return assistant_response == expected_answer