refactor: replace open-write-close patterns with pathlib methods

This commit is contained in:
Tsvika Shapira 2025-12-25 20:09:05 +02:00
parent 8f004e4b95
commit 70eb760b1f
3 changed files with 3 additions and 6 deletions

View File

@ -85,8 +85,7 @@ def download_file_with_lock(url, filename, postprocess_fn=None):
content = response.read() # bytes
# Write to local file
with file_path.open('wb') as f:
f.write(content)
file_path.write_bytes(content)
print(f"Downloaded to {file_path}")
# Run the postprocess function if provided

View File

@ -392,6 +392,5 @@ def get_token_bytes(device="cpu"):
tokenizer_dir = base_dir / "tokenizer"
token_bytes_path = tokenizer_dir / "token_bytes.pt"
assert token_bytes_path.exists(), f"Token bytes not found at {token_bytes_path}? It gets written by tok_train.py"
with token_bytes_path.open("rb") as f:
token_bytes = torch.load(f, map_location=device)
token_bytes = torch.load(token_bytes_path, map_location=device)
return token_bytes

View File

@ -85,8 +85,7 @@ for token_id in range(vocab_size):
token_bytes.append(id_bytes)
token_bytes = torch.tensor(token_bytes, dtype=torch.int32, device='cpu')
token_bytes_path = tokenizer_dir / "token_bytes.pt"
with token_bytes_path.open("wb") as f:
torch.save(token_bytes, f)
torch.save(token_bytes, token_bytes_path)
print(f"Saved token_bytes to {token_bytes_path}")
# Log to report