fix(eval): use UTF-8 when reading CORE JSONL and writing CSV

2025-12-06 04:12:13 +00:00 · 2025-11-03 06:38:33 -08:00 · 2025-11-03 06:38:33 -08:00 · a83646e098
commit a83646e098
parent 8681922328 226953b841
1 changed files with 2 additions and 2 deletions
--- a/scripts/base_eval.py
+++ b/scripts/base_eval.py
@ -88,7 +88,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1):
        # Load data for this task
        data_path = os.path.join(data_base_path, task_meta['dataset_uri'])
-        with open(data_path, 'r') as f:
+        with open(data_path, 'r', encoding='utf-8') as f:
            data = [json.loads(line.strip()) for line in f]
        # shuffle the data because in many cases it appears ordered but we want
@ -184,7 +184,7 @@ def main():
        results = out["results"]
        centered_results = out["centered_results"]
        core_metric = out["core_metric"]
-        with open(output_csv_path, 'w') as f:
+        with open(output_csv_path, 'w', encoding='utf-8', newline='') as f:
            f.write(f"{'Task':<35}, {'Accuracy':<10}, {'Centered':<10}\n")
            for label in results:
                f.write(f"{label:<35}, {results[label]:<10.6f}, {centered_results[label]:<10.6f}\n")