mirror of
https://github.com/karpathy/nanochat.git
synced 2026-04-30 04:50:22 +00:00
refactor: replace open-write-close patterns with pathlib methods
This commit is contained in:
parent
8f004e4b95
commit
70eb760b1f
|
|
@ -85,8 +85,7 @@ def download_file_with_lock(url, filename, postprocess_fn=None):
|
|||
content = response.read() # bytes
|
||||
|
||||
# Write to local file
|
||||
with file_path.open('wb') as f:
|
||||
f.write(content)
|
||||
file_path.write_bytes(content)
|
||||
print(f"Downloaded to {file_path}")
|
||||
|
||||
# Run the postprocess function if provided
|
||||
|
|
|
|||
|
|
@ -392,6 +392,5 @@ def get_token_bytes(device="cpu"):
|
|||
tokenizer_dir = base_dir / "tokenizer"
|
||||
token_bytes_path = tokenizer_dir / "token_bytes.pt"
|
||||
assert token_bytes_path.exists(), f"Token bytes not found at {token_bytes_path}? It gets written by tok_train.py"
|
||||
with token_bytes_path.open("rb") as f:
|
||||
token_bytes = torch.load(f, map_location=device)
|
||||
token_bytes = torch.load(token_bytes_path, map_location=device)
|
||||
return token_bytes
|
||||
|
|
|
|||
|
|
@ -85,8 +85,7 @@ for token_id in range(vocab_size):
|
|||
token_bytes.append(id_bytes)
|
||||
token_bytes = torch.tensor(token_bytes, dtype=torch.int32, device='cpu')
|
||||
token_bytes_path = tokenizer_dir / "token_bytes.pt"
|
||||
with token_bytes_path.open("wb") as f:
|
||||
torch.save(token_bytes, f)
|
||||
torch.save(token_bytes, token_bytes_path)
|
||||
print(f"Saved token_bytes to {token_bytes_path}")
|
||||
|
||||
# Log to report
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user