mirror of
https://github.com/karpathy/nanochat.git
synced 2026-02-17 09:00:22 +00:00
refactor: refactor path operations
This commit is contained in:
parent
70eb760b1f
commit
6d6651e2df
|
|
@ -29,14 +29,9 @@ DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|||
# -----------------------------------------------------------------------------
|
||||
# These functions are useful utilities to other modules, can/should be imported
|
||||
|
||||
def list_parquet_files(data_dir = None):
|
||||
def list_parquet_files(data_dir = DATA_DIR):
|
||||
""" Looks into a data dir and returns full paths to all parquet files. """
|
||||
data_dir = DATA_DIR if data_dir is None else data_dir
|
||||
parquet_files = sorted([
|
||||
f.name for f in data_dir.iterdir()
|
||||
if f.name.endswith('.parquet') and not f.name.endswith('.tmp')
|
||||
])
|
||||
parquet_paths = [data_dir / f for f in parquet_files]
|
||||
parquet_paths = sorted(data_dir.glob('*.parquet'))
|
||||
return parquet_paths
|
||||
|
||||
def parquets_iter_batched(split, start=0, step=1):
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ def place_eval_bundle(file_path):
|
|||
with zipfile.ZipFile(file_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(tmpdir)
|
||||
extracted_bundle_dir = Path(tmpdir) / "eval_bundle"
|
||||
shutil.move(str(extracted_bundle_dir), str(eval_bundle_dir))
|
||||
shutil.move(extracted_bundle_dir, eval_bundle_dir)
|
||||
print0(f"Placed eval_bundle directory at {eval_bundle_dir}")
|
||||
|
||||
def evaluate_model(model, tokenizer, device, max_per_task=-1):
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ class CustomJSON(Task):
|
|||
Example line: [{"role":"user","content":"Hi"},{"role":"assistant","content":"Hello"}]
|
||||
"""
|
||||
|
||||
def __init__(self, filepath, **kwargs):
|
||||
def __init__(self, filepath: Path, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.filepath = Path(filepath)
|
||||
self.filepath = filepath
|
||||
self.conversations = []
|
||||
|
||||
# Load all conversations from the JSONL file
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user