fix(eval): use UTF-8 when reading CORE JSONL and writing CSV

This commit is contained in:
Andrej 2025-11-03 06:38:33 -08:00 committed by GitHub
commit a83646e098
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -88,7 +88,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1):
# Load data for this task # Load data for this task
data_path = os.path.join(data_base_path, task_meta['dataset_uri']) data_path = os.path.join(data_base_path, task_meta['dataset_uri'])
with open(data_path, 'r') as f: with open(data_path, 'r', encoding='utf-8') as f:
data = [json.loads(line.strip()) for line in f] data = [json.loads(line.strip()) for line in f]
# shuffle the data because in many cases it appears ordered but we want # shuffle the data because in many cases it appears ordered but we want
@ -184,7 +184,7 @@ def main():
results = out["results"] results = out["results"]
centered_results = out["centered_results"] centered_results = out["centered_results"]
core_metric = out["core_metric"] core_metric = out["core_metric"]
with open(output_csv_path, 'w') as f: with open(output_csv_path, 'w', encoding='utf-8', newline='') as f:
f.write(f"{'Task':<35}, {'Accuracy':<10}, {'Centered':<10}\n") f.write(f"{'Task':<35}, {'Accuracy':<10}, {'Centered':<10}\n")
for label in results: for label in results:
f.write(f"{label:<35}, {results[label]:<10.6f}, {centered_results[label]:<10.6f}\n") f.write(f"{label:<35}, {results[label]:<10.6f}, {centered_results[label]:<10.6f}\n")