From 226953b841f322bf88cb0f2af460a897f00393a2 Mon Sep 17 00:00:00 2001
From: Dipesh Babu <dipeshmahato@outlook.com>
Date: Mon, 3 Nov 2025 01:20:56 -0500
Subject: [PATCH] fix: open JSONL and results CSV with UTF-8 encoding for
 portability

---
 scripts/base_eval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/base_eval.py b/scripts/base_eval.py
index 21f7bac..a987049 100644
--- a/scripts/base_eval.py
+++ b/scripts/base_eval.py
@@ -88,7 +88,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1):
 
         # Load data for this task
         data_path = os.path.join(data_base_path, task_meta['dataset_uri'])
-        with open(data_path, 'r') as f:
+        with open(data_path, 'r', encoding='utf-8') as f:
             data = [json.loads(line.strip()) for line in f]
 
         # shuffle the data because in many cases it appears ordered but we want
@@ -184,7 +184,7 @@ def main():
         results = out["results"]
         centered_results = out["centered_results"]
         core_metric = out["core_metric"]
-        with open(output_csv_path, 'w') as f:
+        with open(output_csv_path, 'w', encoding='utf-8', newline='') as f:
             f.write(f"{'Task':<35}, {'Accuracy':<10}, {'Centered':<10}\n")
             for label in results:
                 f.write(f"{label:<35}, {results[label]:<10.6f}, {centered_results[label]:<10.6f}\n")