From e22fc6f2fac0c3d5f3ecd3ba6b09f7d694014b64 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 3 Nov 2025 21:46:39 +0100 Subject: [PATCH] few more explicit UTF-8 encodings --- dev/gen_synthetic_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/gen_synthetic_data.py b/dev/gen_synthetic_data.py index 13e5f55..73f4ac9 100644 --- a/dev/gen_synthetic_data.py +++ b/dev/gen_synthetic_data.py @@ -37,7 +37,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from nanochat.common import get_base_dir -api_key = open("openroutertoken.txt").read().strip() +api_key = open("openroutertoken.txt", 'r', encoding='utf-8').read().strip() url = "https://openrouter.ai/api/v1/chat/completions" headers = { @@ -45,7 +45,7 @@ headers = { "Content-Type": "application/json" } -readme = open("README.md").read().strip() +readme = open("README.md", 'r', encoding='utf-8').read().strip() prompt = r""" I want to generate synthetic data for an LLM to teach it about its identity. Here is the identity I want: