From 6d6651e2dfd04c3d5971a550d48ce0e6b10e525b Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Thu, 25 Dec 2025 20:09:05 +0200 Subject: [PATCH] refactor: refactor path operations --- nanochat/dataset.py | 9 ++------- scripts/base_eval.py | 2 +- tasks/customjson.py | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/nanochat/dataset.py b/nanochat/dataset.py index 575d30f..4c73aa5 100644 --- a/nanochat/dataset.py +++ b/nanochat/dataset.py @@ -29,14 +29,9 @@ DATA_DIR.mkdir(parents=True, exist_ok=True) # ----------------------------------------------------------------------------- # These functions are useful utilities to other modules, can/should be imported -def list_parquet_files(data_dir = None): +def list_parquet_files(data_dir = DATA_DIR): """ Looks into a data dir and returns full paths to all parquet files. """ - data_dir = DATA_DIR if data_dir is None else data_dir - parquet_files = sorted([ - f.name for f in data_dir.iterdir() - if f.name.endswith('.parquet') and not f.name.endswith('.tmp') - ]) - parquet_paths = [data_dir / f for f in parquet_files] + parquet_paths = sorted(data_dir.glob('*.parquet')) return parquet_paths def parquets_iter_batched(split, start=0, step=1): diff --git a/scripts/base_eval.py b/scripts/base_eval.py index c7da07c..2790808 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -42,7 +42,7 @@ def place_eval_bundle(file_path): with zipfile.ZipFile(file_path, 'r') as zip_ref: zip_ref.extractall(tmpdir) extracted_bundle_dir = Path(tmpdir) / "eval_bundle" - shutil.move(str(extracted_bundle_dir), str(eval_bundle_dir)) + shutil.move(extracted_bundle_dir, eval_bundle_dir) print0(f"Placed eval_bundle directory at {eval_bundle_dir}") def evaluate_model(model, tokenizer, device, max_per_task=-1): diff --git a/tasks/customjson.py b/tasks/customjson.py index 94266f0..9cfe445 100644 --- a/tasks/customjson.py +++ b/tasks/customjson.py @@ -14,9 +14,9 @@ class CustomJSON(Task): Example line: [{"role":"user","content":"Hi"},{"role":"assistant","content":"Hello"}] """ - def __init__(self, filepath, **kwargs): + def __init__(self, filepath: Path, **kwargs): super().__init__(**kwargs) - self.filepath = Path(filepath) + self.filepath = filepath self.conversations = [] # Load all conversations from the JSONL file