mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-07 08:19:52 +00:00
175 lines
4.2 KiB
YAML
175 lines
4.2 KiB
YAML
openapi: 3.1.0
|
|
info:
|
|
title: samosaChaat Inference API
|
|
version: 0.1.0
|
|
description: >
|
|
Contract skeleton for the standalone inference microservice that streams
|
|
tokens and manages model weight lifecycle.
|
|
servers:
|
|
- url: http://inference:8003
|
|
paths:
|
|
/health:
|
|
get:
|
|
summary: Liveness and readiness probe.
|
|
responses:
|
|
"200":
|
|
description: Inference service health.
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
status:
|
|
type: string
|
|
ready:
|
|
type: boolean
|
|
current_model:
|
|
type:
|
|
- string
|
|
- "null"
|
|
required:
|
|
- status
|
|
- ready
|
|
/generate:
|
|
post:
|
|
summary: Stream generated tokens as server-sent events.
|
|
security:
|
|
- internalApiKey: []
|
|
requestBody:
|
|
required: true
|
|
content:
|
|
application/json:
|
|
schema:
|
|
$ref: "#/components/schemas/GenerateRequest"
|
|
responses:
|
|
"200":
|
|
description: SSE response of token chunks and done marker.
|
|
content:
|
|
text/event-stream:
|
|
schema:
|
|
type: string
|
|
/models:
|
|
get:
|
|
summary: List available and loaded model weights.
|
|
security:
|
|
- internalApiKey: []
|
|
responses:
|
|
"200":
|
|
description: Model registry view.
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
current_model:
|
|
type:
|
|
- string
|
|
- "null"
|
|
models:
|
|
type: array
|
|
items:
|
|
$ref: "#/components/schemas/ModelInfo"
|
|
required:
|
|
- current_model
|
|
- models
|
|
/models/swap:
|
|
post:
|
|
summary: Drain workers and swap the loaded model weights.
|
|
security:
|
|
- internalApiKey: []
|
|
requestBody:
|
|
required: true
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
model_tag:
|
|
type: string
|
|
required:
|
|
- model_tag
|
|
responses:
|
|
"202":
|
|
description: Swap request accepted.
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
status:
|
|
type: string
|
|
current_model:
|
|
type: string
|
|
required:
|
|
- status
|
|
- current_model
|
|
/stats:
|
|
get:
|
|
summary: Worker pool and throughput statistics.
|
|
security:
|
|
- internalApiKey: []
|
|
responses:
|
|
"200":
|
|
description: Runtime worker statistics.
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
components:
|
|
securitySchemes:
|
|
internalApiKey:
|
|
type: apiKey
|
|
in: header
|
|
name: X-Internal-API-Key
|
|
schemas:
|
|
ChatMessage:
|
|
type: object
|
|
additionalProperties: false
|
|
properties:
|
|
role:
|
|
type: string
|
|
content:
|
|
type: string
|
|
required:
|
|
- role
|
|
- content
|
|
GenerateRequest:
|
|
type: object
|
|
additionalProperties: false
|
|
properties:
|
|
messages:
|
|
type: array
|
|
items:
|
|
$ref: "#/components/schemas/ChatMessage"
|
|
temperature:
|
|
type:
|
|
- number
|
|
- "null"
|
|
max_tokens:
|
|
type:
|
|
- integer
|
|
- "null"
|
|
top_k:
|
|
type:
|
|
- integer
|
|
- "null"
|
|
required:
|
|
- messages
|
|
ModelInfo:
|
|
type: object
|
|
additionalProperties: false
|
|
properties:
|
|
model_tag:
|
|
type: string
|
|
source:
|
|
type: string
|
|
path:
|
|
type: string
|
|
loaded:
|
|
type: boolean
|
|
required:
|
|
- model_tag
|
|
- source
|
|
- path
|
|
- loaded
|