From 226953b841f322bf88cb0f2af460a897f00393a2 Mon Sep 17 00:00:00 2001 From: Dipesh Babu Date: Mon, 3 Nov 2025 01:20:56 -0500 Subject: [PATCH] fix: open JSONL and results CSV with UTF-8 encoding for portability --- scripts/base_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/base_eval.py b/scripts/base_eval.py index 21f7bac..a987049 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -88,7 +88,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1): # Load data for this task data_path = os.path.join(data_base_path, task_meta['dataset_uri']) - with open(data_path, 'r') as f: + with open(data_path, 'r', encoding='utf-8') as f: data = [json.loads(line.strip()) for line in f] # shuffle the data because in many cases it appears ordered but we want @@ -184,7 +184,7 @@ def main(): results = out["results"] centered_results = out["centered_results"] core_metric = out["core_metric"] - with open(output_csv_path, 'w') as f: + with open(output_csv_path, 'w', encoding='utf-8', newline='') as f: f.write(f"{'Task':<35}, {'Accuracy':<10}, {'Centered':<10}\n") for label in results: f.write(f"{label:<35}, {results[label]:<10.6f}, {centered_results[label]:<10.6f}\n")