diff --git a/nanochat/common.py b/nanochat/common.py index 813fa9b..61bef5a 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -66,7 +66,7 @@ def download_file_with_lock(url, filename, postprocess_fn=None): """ base_dir = get_base_dir() file_path = base_dir / filename - lock_path = Path(str(file_path) + ".lock") + lock_path = file_path.with_name(f"{file_path.name}.lock") if file_path.exists(): return file_path diff --git a/nanochat/dataset.py b/nanochat/dataset.py index 8bc5ce5..575d30f 100644 --- a/nanochat/dataset.py +++ b/nanochat/dataset.py @@ -9,7 +9,6 @@ For details of how the dataset was prepared, see `repackage_data_reference.py`. import argparse import time -from pathlib import Path import requests import pyarrow.parquet as pq from multiprocessing import Pool @@ -78,7 +77,7 @@ def download_single_file(index): response = requests.get(url, stream=True, timeout=30) response.raise_for_status() # Write to temporary file first - temp_path = Path(str(filepath) + ".tmp") + temp_path = filepath.with_name(f"{filepath.name}.tmp") with temp_path.open('wb') as f: for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks if chunk: @@ -91,7 +90,7 @@ def download_single_file(index): except (requests.RequestException, IOError) as e: print(f"Attempt {attempt}/{max_attempts} failed for {filename}: {e}") # Clean up any partial files - for path in [Path(str(filepath) + ".tmp"), filepath]: + for path in [filepath.with_name(f"{filepath.name}.tmp"), filepath]: try: path.unlink(missing_ok=True) except: