mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-07 16:30:11 +00:00
Replaces the helm/observability scaffold with a real monitoring stack
wired into the samosaChaat platform.
Helm chart (helm/observability/)
- Chart.yaml declares kube-prometheus-stack (~62.0) and loki-stack
(~2.10) as subchart dependencies.
- values.yaml configures Prometheus (15d retention, 50Gi PVC,
ServiceMonitor + rule selector on app.kubernetes.io/part-of:
samosachaat), Alertmanager (10Gi PVC), Grafana (OAuth-only via
GitHub + Google, local login disabled, Prometheus + Loki datasources,
dashboards auto-provisioned from a ConfigMap, email + Slack contact
points with a critical route to Slack), Loki (50Gi, 30d retention,
tsdb schema), and Promtail (JSON pipeline that lifts level / service
/ trace_id / user_id into labels, scrape config with pod labels).
- Alert rules: HighCPU, HighMemory, DiskSpaceLow, High5xxRate,
InferenceServiceDown, HighP99Latency.
- templates/grafana-dashboards-configmap.yaml renders every file under
dashboards/ into a single grafana_dashboard=1 ConfigMap.
- dashboards/node-health.json, app-performance.json, inference.json -
fully-formed Grafana dashboards with Prometheus datasource variable,
templated app selector, thresholded gauges, and LogQL-ready labels.
Scraping (helm/samosachaat/templates/servicemonitor.yaml)
- ServiceMonitor CRs for auth / chat-api / inference that Prometheus
picks up via the part-of=samosachaat selector; scrapes /metrics
every 15s and replaces the app label so dashboards line up.
Application instrumentation
- services/{auth,chat-api,inference} each depend on
prometheus-fastapi-instrumentator and expose /metrics (request count,
latency histograms, in-progress gauges).
- services/auth/src/logging_setup.py and
services/inference/src/logging_setup.py mirror the canonical
chat-api implementation - structlog JSON with service, trace_id,
user_id context injection.
- configure_logging() is called at create_app() in auth and inference;
inference's main.py now uses structlog via get_logger() instead of
logging.getLogger.
- log_level setting added to auth + inference config (LOG_LEVEL env).
Docs
- contracts/logging-standard.md defines the required JSON fields,
Python (structlog) + Node.js (pino) implementations, LogQL examples
for cross-service queries, and the x-trace-id propagation contract.
Closes #9
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
40 lines
861 B
TOML
40 lines
861 B
TOML
[project]
|
|
name = "samosachaat-chat-api"
|
|
version = "0.1.0"
|
|
description = "samosaChaat chat API orchestration service"
|
|
readme = "README.md"
|
|
requires-python = ">=3.12"
|
|
dependencies = [
|
|
"fastapi>=0.117.1",
|
|
"uvicorn[standard]>=0.36.0",
|
|
"pydantic>=2.8.0",
|
|
"pydantic-settings>=2.4.0",
|
|
"sqlalchemy[asyncio]>=2.0.36",
|
|
"asyncpg>=0.29.0",
|
|
"httpx>=0.27.0",
|
|
"sse-starlette>=2.1.3",
|
|
"structlog>=24.4.0",
|
|
"cachetools>=5.5.0",
|
|
"prometheus-fastapi-instrumentator>=7.0.0",
|
|
]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"pytest>=8.0.0",
|
|
"pytest-asyncio>=0.24.0",
|
|
"aiosqlite>=0.20.0",
|
|
"respx>=0.21.1",
|
|
]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
testpaths = ["src/tests"]
|
|
python_files = ["test_*.py"]
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src"]
|