From d6829284c4912d5186469c4a624127c24b231d34 Mon Sep 17 00:00:00 2001 From: askerlee Date: Tue, 13 Jan 2026 22:20:22 +0800 Subject: [PATCH] Allow local install and model loading --- .gitignore | 1 + pyproject.toml | 3 +++ scripts/base_eval.py | 7 +++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d82809a1..740d38b6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ eval_bundle/ .claude CLAUDE.md wandb/ +*.egg-info/ diff --git a/pyproject.toml b/pyproject.toml index 87a967f7..3f0c1e82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,3 +71,6 @@ conflicts = [ { extra = "gpu" }, ], ] + +[tool.setuptools] +packages = ["nanochat"] diff --git a/scripts/base_eval.py b/scripts/base_eval.py index bd83ff36..672faec0 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -134,13 +134,16 @@ def load_hf_model(hf_path: str, device): print0(f"Loading model from: {hf_path}") # Load the model from transformers import AutoModelForCausalLM - model = AutoModelForCausalLM.from_pretrained(hf_path) + model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True) model.to(device) model.eval() max_seq_len = 1024 if "openai-community/gpt2" in hf_path else None model = ModelWrapper(model, max_seq_len=max_seq_len) # Load the tokenizer - tokenizer = HuggingFaceTokenizer.from_pretrained(hf_path) + if os.path.exists(hf_path): + tokenizer = HuggingFaceTokenizer.from_directory(hf_path) + else: + tokenizer = HuggingFaceTokenizer.from_pretrained(hf_path) return model, tokenizer # -----------------------------------------------------------------------------