Add pre-commit hooks for code formatting (not yet executed)

Adds pre-commit configuration to automate code formatting and linting.

This includes:
- ruff-check for linting
- ruff-format for code formatting
- pre-commit-hooks for various checks (whitespace, large files, etc.)
- codespell for fixing common misspellings in text

Special configurations added to `pyproject.toml`:
- Line length: 120 (more flexible than black's 88)
- Quote style: preserve (keeps existing quotes)
Rename ruff hook (avoid using legacy alias)
This commit is contained in:
Eyal Frishman 2025-12-05 18:26:55 +02:00
parent 4a87a0d19f
commit 6587063479
3 changed files with 148 additions and 26 deletions

27
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,27 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: check-added-large-files
args: [--maxkb=128]
- id: fix-byte-order-marker
- id: check-case-conflict
- id: check-merge-conflict
- id: check-yaml
- id: end-of-file-fixer
- id: mixed-line-ending
args: [--fix=lf]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.8
hooks:
- id: ruff-check
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1 # Use the latest stable version
hooks:
- id: codespell
additional_dependencies:
- tomli

View File

@ -32,6 +32,7 @@ manifest-path = "rustbpe/Cargo.toml"
dev = [
"maturin>=1.9.4",
"pytest>=8.0.0",
"pre-commit>=3.8.0",
]
[tool.pytest.ini_options]
@ -75,3 +76,28 @@ conflicts = [
{ extra = "gpu" },
],
]
[tool.ruff]
target-version = "py310"
line-length = 120
fix = true
unsafe-fixes = true
[tool.ruff.lint]
select = [
"F", # Pyflakes (unused imports) - replaces autoflake
"I", # isort - replaces isort
"UP", # pyupgrade - replaces pyupgrade
]
[tool.ruff.lint.isort]
known-first-party = ["nanochat"]
[tool.ruff.format]
quote-style = "preserve"
[tool.codespell]
write-changes = true
interactive = 1
skip = "tests/*,dev/*,scripts/tok_eval.py,tasks/spellingbee.py"
ignore-words-list = "re-use,astroid"

69
uv.lock
View File

@ -188,6 +188,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "cfgv"
version = "3.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
@ -306,6 +315,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload-time = "2024-01-27T23:42:14.239Z" },
]
[[package]]
name = "distlib"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
]
[[package]]
name = "exceptiongroup"
version = "1.3.0"
@ -528,6 +546,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/39/7b/bb06b061991107cd8783f300adff3e7b7f284e330fd82f507f2a1417b11d/huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a", size = 561452, upload-time = "2025-08-08T09:14:50.159Z" },
]
[[package]]
name = "identify"
version = "2.6.15"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311, upload-time = "2025-10-02T17:43:40.631Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183, upload-time = "2025-10-02T17:43:39.137Z" },
]
[[package]]
name = "idna"
version = "3.10"
@ -802,6 +829,7 @@ gpu = [
[package.dev-dependencies]
dev = [
{ name = "maturin" },
{ name = "pre-commit" },
{ name = "pytest" },
]
@ -826,6 +854,7 @@ provides-extras = ["cpu", "gpu"]
[package.metadata.requires-dev]
dev = [
{ name = "maturin", specifier = ">=1.9.4" },
{ name = "pre-commit", specifier = ">=3.8.0" },
{ name = "pytest", specifier = ">=8.0.0" },
]
@ -872,6 +901,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
]
[[package]]
name = "nodeenv"
version = "1.9.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
]
[[package]]
name = "numpy"
version = "1.26.4"
@ -1131,6 +1169,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pre-commit"
version = "4.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cfgv" },
{ name = "identify" },
{ name = "nodeenv" },
{ name = "pyyaml" },
{ name = "virtualenv" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f4/9b/6a4ffb4ed980519da959e1cf3122fc6cb41211daa58dbae1c73c0e519a37/pre_commit-4.5.0.tar.gz", hash = "sha256:dc5a065e932b19fc1d4c653c6939068fe54325af8e741e74e88db4d28a4dd66b", size = 198428, upload-time = "2025-11-22T21:02:42.304Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5d/c4/b2d28e9d2edf4f1713eb3c29307f1a63f3d67cf09bdda29715a36a68921a/pre_commit-4.5.0-py2.py3-none-any.whl", hash = "sha256:25e2ce09595174d9c97860a95609f9f852c0614ba602de3561e267547f2335e1", size = 226429, upload-time = "2025-11-22T21:02:40.836Z" },
]
[[package]]
name = "propcache"
version = "0.3.2"
@ -2016,6 +2070,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/96/06/5cc0542b47c0338c1cb676b348e24a1c29acabc81000bced518231dded6f/uvicorn-0.36.0-py3-none-any.whl", hash = "sha256:6bb4ba67f16024883af8adf13aba3a9919e415358604ce46780d3f9bdc36d731", size = 67675, upload-time = "2025-09-20T01:07:12.984Z" },
]
[[package]]
name = "virtualenv"
version = "20.35.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "distlib" },
{ name = "filelock" },
{ name = "platformdirs" },
{ name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/20/28/e6f1a6f655d620846bd9df527390ecc26b3805a0c5989048c210e22c5ca9/virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c", size = 6028799, upload-time = "2025-10-29T06:57:40.511Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/79/0c/c05523fa3181fdf0c9c52a6ba91a23fbf3246cc095f26f6516f9c60e6771/virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b", size = 6005095, upload-time = "2025-10-29T06:57:37.598Z" },
]
[[package]]
name = "wandb"
version = "0.21.3"