mirror of
https://github.com/karpathy/nanochat.git
synced 2026-01-20 18:34:14 +00:00
replace files-to-prompt with git ls-files for bloat metrics
files-to-prompt was including untracked files (knowledge/, dev scripts, etc.) which inflated the bloat metrics. now we use git ls-files to only count tracked source files, which is more accurate and removes an external dependency. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
da8b7ea4cb
commit
ee79f29fbd
|
|
@ -160,12 +160,26 @@ Generated: {timestamp}
|
|||
|
||||
"""
|
||||
|
||||
# bloat metrics: package all of the source code and assess its weight
|
||||
packaged = run_command('files-to-prompt . -e py -e md -e rs -e html -e toml -e sh --ignore "*target*" --cxml')
|
||||
num_chars = len(packaged)
|
||||
num_lines = len(packaged.split('\n'))
|
||||
num_files = len([x for x in packaged.split('\n') if x.startswith('<source>')])
|
||||
num_tokens = num_chars // 4 # assume approximately 4 chars per token
|
||||
# bloat metrics: count lines/chars in git-tracked source files only
|
||||
extensions = ['py', 'md', 'rs', 'html', 'toml', 'sh']
|
||||
git_patterns = ' '.join(f"'*.{ext}'" for ext in extensions)
|
||||
files_output = run_command(f"git ls-files -- {git_patterns}")
|
||||
file_list = [f for f in (files_output or '').split('\n') if f]
|
||||
num_files = len(file_list)
|
||||
num_lines = 0
|
||||
num_chars = 0
|
||||
if num_files > 0:
|
||||
wc_output = run_command(f"git ls-files -- {git_patterns} | xargs wc -lc 2>/dev/null")
|
||||
if wc_output:
|
||||
total_line = wc_output.strip().split('\n')[-1]
|
||||
parts = total_line.split()
|
||||
if 'total' in parts:
|
||||
num_lines = int(parts[0])
|
||||
num_chars = int(parts[1])
|
||||
elif len(parts) >= 2:
|
||||
num_lines = int(parts[0])
|
||||
num_chars = int(parts[1])
|
||||
num_tokens = num_chars // 4 # assume approximately 4 chars per token
|
||||
|
||||
# count dependencies via uv.lock
|
||||
uv_lock_lines = 0
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ requires-python = ">=3.10"
|
|||
dependencies = [
|
||||
"datasets>=4.0.0",
|
||||
"fastapi>=0.117.1",
|
||||
"files-to-prompt>=0.6",
|
||||
"psutil>=7.1.0",
|
||||
"regex>=2025.9.1",
|
||||
"rustbpe>=0.1.0",
|
||||
|
|
|
|||
14
uv.lock
14
uv.lock
|
|
@ -341,18 +341,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "files-to-prompt"
|
||||
version = "0.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "click" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b9/4f/81fc86a88dc9e0cf6ea1ac2c561c0ac48b46d314cbbc2db5c8844b4b448b/files_to_prompt-0.6.tar.gz", hash = "sha256:9af57eecbdb29d3cce034c186493ffc6c1205ea4f5abde6fb32ccb1d96eae40c", size = 12236, upload-time = "2025-02-19T05:58:28.2Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/99/0efff50ce810119d99eaa2fc0c7bbf66e4197e2defb89242f6e848004902/files_to_prompt-0.6-py3-none-any.whl", hash = "sha256:83d9a8b33246a10233218716a5c78034da4f5614748eda2f0ab94f1117801337", size = 10873, upload-time = "2025-02-19T05:58:26.728Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "frozenlist"
|
||||
version = "1.7.0"
|
||||
|
|
@ -752,7 +740,6 @@ source = { virtual = "." }
|
|||
dependencies = [
|
||||
{ name = "datasets" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "files-to-prompt" },
|
||||
{ name = "psutil" },
|
||||
{ name = "regex" },
|
||||
{ name = "rustbpe" },
|
||||
|
|
@ -785,7 +772,6 @@ dev = [
|
|||
requires-dist = [
|
||||
{ name = "datasets", specifier = ">=4.0.0" },
|
||||
{ name = "fastapi", specifier = ">=0.117.1" },
|
||||
{ name = "files-to-prompt", specifier = ">=0.6" },
|
||||
{ name = "psutil", specifier = ">=7.1.0" },
|
||||
{ name = "regex", specifier = ">=2025.9.1" },
|
||||
{ name = "rustbpe", specifier = ">=0.1.0" },
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user