From 03fa673b7d739238135fcbe39a4c625ed033962e Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 19:51:06 +0000 Subject: [PATCH] add basic logging to chat_web, which i think might be fun --- scripts/chat_web.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/chat_web.py b/scripts/chat_web.py index f8e807c..cae577d 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -35,6 +35,7 @@ import json import os import torch import asyncio +import logging from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware @@ -70,6 +71,14 @@ parser.add_argument('-p', '--port', type=int, default=8000, help='Port to run th parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind the server to') args = parser.parse_args() +# Configure logging for conversation traffic +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() @dataclass @@ -273,6 +282,12 @@ async def chat_completions(request: ChatRequest): # Basic validation to prevent abuse validate_chat_request(request) + # Log incoming conversation to console + logger.info("="*20) + for i, message in enumerate(request.messages): + logger.info(f"[{message.role.upper()}]: {message.content}") + logger.info("-"*20) + # Acquire a worker from the pool (will wait if all are busy) worker_pool = app.state.worker_pool worker = await worker_pool.acquire_worker() @@ -299,6 +314,7 @@ async def chat_completions(request: ChatRequest): conversation_tokens.append(assistant_start) # Streaming response with worker release after completion + response_tokens = [] async def stream_and_release(): try: async for chunk in generate_stream( @@ -308,8 +324,16 @@ async def chat_completions(request: ChatRequest): max_new_tokens=request.max_tokens, top_k=request.top_k ): + # Accumulate response for logging + chunk_data = json.loads(chunk.replace("data: ", "").strip()) + if "token" in chunk_data: + response_tokens.append(chunk_data["token"]) yield chunk finally: + # Log the assistant response to console + full_response = "".join(response_tokens) + logger.info(f"[ASSISTANT] (GPU {worker.gpu_id}): {full_response}") + logger.info("="*20) # Release worker back to pool after streaming is done await worker_pool.release_worker(worker)