From fedb4c1285ca6e4fe04b3d50cc2a5c864e7e8ae5 Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Thu, 25 Dec 2025 19:12:57 +0200 Subject: [PATCH] refactor: simplify file deletion using Path.unlink(missing_ok=True) --- dev/gen_synthetic_data.py | 3 +-- nanochat/dataset.py | 9 ++++----- nanochat/report.py | 6 ++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/dev/gen_synthetic_data.py b/dev/gen_synthetic_data.py index 2e9117c..5dece29 100644 --- a/dev/gen_synthetic_data.py +++ b/dev/gen_synthetic_data.py @@ -348,8 +348,7 @@ num_workers = 4 output_file = get_base_dir() / "identity_conversations.jsonl" # Wipe the file clean first to reset it -if output_file.exists(): - output_file.unlink() +output_file.unlink(missing_ok=True) print(f"Saving to {output_file}") # Use ThreadPoolExecutor to generate conversations in parallel diff --git a/nanochat/dataset.py b/nanochat/dataset.py index ef08d96..8bc5ce5 100644 --- a/nanochat/dataset.py +++ b/nanochat/dataset.py @@ -92,11 +92,10 @@ def download_single_file(index): print(f"Attempt {attempt}/{max_attempts} failed for {filename}: {e}") # Clean up any partial files for path in [Path(str(filepath) + ".tmp"), filepath]: - if path.exists(): - try: - path.unlink() - except: - pass + try: + path.unlink(missing_ok=True) + except: + pass # Try a few times with exponential backoff: 2^attempt seconds if attempt < max_attempts: wait_time = 2 ** attempt diff --git a/nanochat/report.py b/nanochat/report.py index 32dc028..ef19704 100644 --- a/nanochat/report.py +++ b/nanochat/report.py @@ -363,12 +363,10 @@ class Report: # Remove section files for file_name in EXPECTED_FILES: file_path = self.report_dir / file_name - if file_path.exists(): - file_path.unlink() + file_path.unlink(missing_ok=True) # Remove report.md if it exists report_file = self.report_dir / "report.md" - if report_file.exists(): - report_file.unlink() + report_file.unlink(missing_ok=True) # Generate and write the header section with start timestamp header_file = self.report_dir / "header.md" header = generate_header()