Merge 33ddc13ed4 into bc1fca39f3

mqa -> gqa to reduce confusion
Improve configurator: add testable parse_args() and ConfigManager class
2025-12-06 04:12:13 +00:00 · 2025-11-16 01:48:27 +01:00 · 2025-11-15 15:43:37 +00:00 · 2025-10-23 09:56:52 +03:00
2 changed files with 66 additions and 40 deletions
--- a/nanochat/configurator.py
+++ b/nanochat/configurator.py
@ -1,56 +1,82 @@
 """
-Poor Man's Configurator. Probably a terrible idea. Example usage:
+Poor Man's Configurator v3. Clean refactored version. Example usage:
 $ python train.py config/override_file.py --batch_size=32
-this will first run config/override_file.py, then override batch_size to 32

-The code in this file will be run as follows from e.g. train.py:
->>> exec(open('configurator.py').read())
-
-So it's not a Python module, it's just shuttling this code away from train.py
-The code in this script then overrides the globals()
-
-I know people are not going to love this, I just really dislike configuration
-complexity and having to prepend config. to every single variable. If someone
-comes up with a better simple Python solution I am all ears.
+Improved version with better separation of concerns and cleaner architecture
+while maintaining the same simple usage pattern.
 """

 import os
 import sys
 from ast import literal_eval

+
 def print0(s="", **kwargs):
-    ddp_rank = int(os.environ.get('RANK', 0))
-    if ddp_rank == 0:
+    """Print only from rank 0 in distributed settings"""
+    if int(os.environ.get("RANK", 0)) == 0:
        print(s, **kwargs)

-for arg in sys.argv[1:]:
-    if '=' not in arg:
-        # assume it's the name of a config file
-        assert not arg.startswith('--')
+
+class ConfigManager:
+    """Clean configurator with explicit global injection control"""
+
+    def __init__(self):
+        self.config = {}
+
+    def load(self, config_file=None, **overrides):
+        """Load configuration from file and apply overrides"""
+        if config_file:
+            with open(config_file) as f:
+                exec(f.read(), {}, self.config)
+        self.config.update(overrides)
+        return self
+
+    def inject_globals(self):
+        """Inject config vars into global namespace"""
+        for k, v in self.config.items():
+            if not k.startswith("_"):
+                globals()[k] = v
+
+
+def parse_args(args=None):
+    """Parse command line arguments like original version"""
+    args = args or sys.argv
+    config_file, overrides = None, {}
+
+    for arg in args[1:]:
+        if "=" not in arg:
+            # Config file
+            assert not arg.startswith("--"), f"Invalid config file: {arg}"
            config_file = arg
            print0(f"Overriding config with {config_file}:")
            with open(config_file) as f:
                print0(f.read())
-        exec(open(config_file).read())
        else:
-        # assume it's a --key=value argument
-        assert arg.startswith('--')
-        key, val = arg.split('=')
+            # Key=value override
+            assert arg.startswith("--"), f"Override must start with --: {arg}"
+            key, val = arg.split("=", 1)
            key = key[2:]
-        if key in globals():
+            
+            if key not in globals():
+                raise ValueError(f"Unknown config key: {key}")
+            
+            # Try to parse the value
            try:
-                # attempt to eval it it (e.g. if bool, number, or etc)
                attempt = literal_eval(val)
            except (SyntaxError, ValueError):
-                # if that goes wrong, just use the string
                attempt = val
-            # ensure the types match ok
+            
+            # Type check if global has a non-None value
            if globals()[key] is not None:
-                attempt_type = type(attempt)
-                default_type = type(globals()[key])
+                default_type, attempt_type = type(globals()[key]), type(attempt)
                assert attempt_type == default_type, f"Type mismatch: {attempt_type} != {default_type}"
-            # cross fingers
+            
            print0(f"Overriding: {key} = {attempt}")
-            globals()[key] = attempt
-        else:
-            raise ValueError(f"Unknown config key: {key}")
+            overrides[key] = attempt
+
+    return config_file, overrides
+
+
+# Execute configuration
+config_file, overrides = parse_args()
+ConfigManager().load(config_file, **overrides).inject_globals()
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -8,7 +8,7 @@ Notable features:
 - norm after token embedding
 - no learnable params in rmsnorm
 - no bias in linear layers
- Multi-Query Attention (MQA) support for more efficient inference
+- Group-Query Attention (GQA) support for more efficient inference
 """

 import math
@ -29,7 +29,7 @@ class GPTConfig:
    vocab_size: int = 50304
    n_layer: int = 12
    n_head: int = 6 # number of query heads
-    n_kv_head: int = 6 # number of key/value heads (MQA)
+    n_kv_head: int = 6 # number of key/value heads (GQA)
    n_embd: int = 768
Author	SHA1	Message	Date
Sermet Pekin	cbeed243b1	Merge `33ddc13ed4` into `bc1fca39f3`	2025-11-16 01:48:27 +01:00
Andrej Karpathy	bc1fca39f3	mqa -> gqa to reduce confusion	2025-11-15 15:43:37 +00:00
Sermet Pekin	33ddc13ed4	Improve configurator: add testable parse_args() and ConfigManager class Refactored configurator for better architecture and control over global variable injection.	2025-10-23 09:56:52 +03:00