From ee79f29fbd16a102b404d19384b21b3bbe074159 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 4 Jan 2026 01:38:15 +0000 Subject: [PATCH] replace files-to-prompt with git ls-files for bloat metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit files-to-prompt was including untracked files (knowledge/, dev scripts, etc.) which inflated the bloat metrics. now we use git ls-files to only count tracked source files, which is more accurate and removes an external dependency. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- nanochat/report.py | 26 ++++++++++++++++++++------ pyproject.toml | 1 - uv.lock | 14 -------------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/nanochat/report.py b/nanochat/report.py index 0b0ebd7..c0f8b2b 100644 --- a/nanochat/report.py +++ b/nanochat/report.py @@ -160,12 +160,26 @@ Generated: {timestamp} """ - # bloat metrics: package all of the source code and assess its weight - packaged = run_command('files-to-prompt . -e py -e md -e rs -e html -e toml -e sh --ignore "*target*" --cxml') - num_chars = len(packaged) - num_lines = len(packaged.split('\n')) - num_files = len([x for x in packaged.split('\n') if x.startswith('')]) - num_tokens = num_chars // 4 # assume approximately 4 chars per token + # bloat metrics: count lines/chars in git-tracked source files only + extensions = ['py', 'md', 'rs', 'html', 'toml', 'sh'] + git_patterns = ' '.join(f"'*.{ext}'" for ext in extensions) + files_output = run_command(f"git ls-files -- {git_patterns}") + file_list = [f for f in (files_output or '').split('\n') if f] + num_files = len(file_list) + num_lines = 0 + num_chars = 0 + if num_files > 0: + wc_output = run_command(f"git ls-files -- {git_patterns} | xargs wc -lc 2>/dev/null") + if wc_output: + total_line = wc_output.strip().split('\n')[-1] + parts = total_line.split() + if 'total' in parts: + num_lines = int(parts[0]) + num_chars = int(parts[1]) + elif len(parts) >= 2: + num_lines = int(parts[0]) + num_chars = int(parts[1]) + num_tokens = num_chars // 4 # assume approximately 4 chars per token # count dependencies via uv.lock uv_lock_lines = 0 diff --git a/pyproject.toml b/pyproject.toml index d88516f..1762fa4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,6 @@ requires-python = ">=3.10" dependencies = [ "datasets>=4.0.0", "fastapi>=0.117.1", - "files-to-prompt>=0.6", "psutil>=7.1.0", "regex>=2025.9.1", "rustbpe>=0.1.0", diff --git a/uv.lock b/uv.lock index 275f8d2..da41d65 100644 --- a/uv.lock +++ b/uv.lock @@ -341,18 +341,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" }, ] -[[package]] -name = "files-to-prompt" -version = "0.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b9/4f/81fc86a88dc9e0cf6ea1ac2c561c0ac48b46d314cbbc2db5c8844b4b448b/files_to_prompt-0.6.tar.gz", hash = "sha256:9af57eecbdb29d3cce034c186493ffc6c1205ea4f5abde6fb32ccb1d96eae40c", size = 12236, upload-time = "2025-02-19T05:58:28.2Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/99/0efff50ce810119d99eaa2fc0c7bbf66e4197e2defb89242f6e848004902/files_to_prompt-0.6-py3-none-any.whl", hash = "sha256:83d9a8b33246a10233218716a5c78034da4f5614748eda2f0ab94f1117801337", size = 10873, upload-time = "2025-02-19T05:58:26.728Z" }, -] - [[package]] name = "frozenlist" version = "1.7.0" @@ -752,7 +740,6 @@ source = { virtual = "." } dependencies = [ { name = "datasets" }, { name = "fastapi" }, - { name = "files-to-prompt" }, { name = "psutil" }, { name = "regex" }, { name = "rustbpe" }, @@ -785,7 +772,6 @@ dev = [ requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "fastapi", specifier = ">=0.117.1" }, - { name = "files-to-prompt", specifier = ">=0.6" }, { name = "psutil", specifier = ">=7.1.0" }, { name = "regex", specifier = ">=2025.9.1" }, { name = "rustbpe", specifier = ">=0.1.0" },