Merge 7950813a41 into bc1fca39f3

mqa -> gqa to reduce confusion
make changes more minimal
2025-12-06 04:12:13 +00:00 · 2025-11-15 22:31:03 +04:00 · 2025-11-15 15:43:37 +00:00 · 2025-11-14 09:49:34 +01:00 · 2025-10-17 02:56:41 +00:00
2 changed files with 13 additions and 10 deletions
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -8,7 +8,7 @@ Notable features:
 - norm after token embedding
 - no learnable params in rmsnorm
 - no bias in linear layers
- Multi-Query Attention (MQA) support for more efficient inference
+- Group-Query Attention (GQA) support for more efficient inference
 """

 import math
@ -29,7 +29,7 @@ class GPTConfig:
    vocab_size: int = 50304
    n_layer: int = 12
    n_head: int = 6 # number of query heads
-    n_kv_head: int = 6 # number of key/value heads (MQA)
+    n_kv_head: int = 6 # number of key/value heads (GQA)
    n_embd: int = 768


--- a/scripts/chat_web.py
+++ b/scripts/chat_web.py
@ -38,7 +38,7 @@ import asyncio
 import logging
 import random
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse
 from pydantic import BaseModel
@ -72,6 +72,7 @@ parser.add_argument('-p', '--port', type=int, default=8000, help='Port to run th
 parser.add_argument('-d', '--dtype', type=str, default='bfloat16', choices=['float32', 'bfloat16'])
 parser.add_argument('--device-type', type=str, default='', choices=['cuda', 'cpu', 'mps'], help='Device type for evaluation: cuda|cpu|mps. empty => autodetect')
 parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind the server to')
+parser.add_argument('--root-path', type=str, default='', help='ASGI root path for proxy/gateway configurations')
 args = parser.parse_args()

 # Configure logging for conversation traffic
@ -240,19 +241,21 @@ app.add_middleware(
 )

@app.get("/")
-async def root():
-    """Serve the chat UI."""
+async def root(request: Request):
+    """Serve the chat UI, dynamically injecting the proxy path."""
    ui_html_path = os.path.join("nanochat", "ui.html")
    with open(ui_html_path, "r", encoding="utf-8") as f:
        html_content = f.read()
-    # Replace the API_URL to use the same origin
+
+    # Get the prefix provided by the proxy/ASGI server.
+    proxy_prefix = request.scope.get('root_path', '').rstrip('/')
+
    html_content = html_content.replace(
-        "const API_URL = `http://${window.location.hostname}:8000`;",
-        "const API_URL = '';"
+        "const API_URL = '';",
+        f"const API_URL = '{proxy_prefix}';"
    )
    return HTMLResponse(content=html_content)

-
@app.get("/logo.svg")
 async def logo():
    """Serve the NanoChat logo for favicon and header."""
@ -412,4 +415,4 @@ if __name__ == "__main__":
    import uvicorn
    print(f"Starting NanoChat Web Server")
    print(f"Temperature: {args.temperature}, Top-k: {args.top_k}, Max tokens: {args.max_tokens}")
-    uvicorn.run(app, host=args.host, port=args.port)
+    uvicorn.run(app, host=args.host, port=args.port, root_path=args.root_path)
Author	SHA1	Message	Date
Pengyu Wang	0de282ac4a	Merge `7950813a41` into `bc1fca39f3`	2025-11-15 22:31:03 +04:00
Andrej Karpathy	bc1fca39f3	mqa -> gqa to reduce confusion	2025-11-15 15:43:37 +00:00
svlandeg	7950813a41	make changes more minimal	2025-11-14 09:49:34 +01:00
wang pengyu	7afd2fd206	add root_path for chat web	2025-10-17 02:56:41 +00:00