nanochat/contracts/openapi/inference-api.yaml
2026-04-16 11:06:29 -07:00

175 lines
4.2 KiB
YAML

openapi: 3.1.0
info:
title: samosaChaat Inference API
version: 0.1.0
description: >
Contract skeleton for the standalone inference microservice that streams
tokens and manages model weight lifecycle.
servers:
- url: http://inference:8003
paths:
/health:
get:
summary: Liveness and readiness probe.
responses:
"200":
description: Inference service health.
content:
application/json:
schema:
type: object
properties:
status:
type: string
ready:
type: boolean
current_model:
type:
- string
- "null"
required:
- status
- ready
/generate:
post:
summary: Stream generated tokens as server-sent events.
security:
- internalApiKey: []
requestBody:
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/GenerateRequest"
responses:
"200":
description: SSE response of token chunks and done marker.
content:
text/event-stream:
schema:
type: string
/models:
get:
summary: List available and loaded model weights.
security:
- internalApiKey: []
responses:
"200":
description: Model registry view.
content:
application/json:
schema:
type: object
properties:
current_model:
type:
- string
- "null"
models:
type: array
items:
$ref: "#/components/schemas/ModelInfo"
required:
- current_model
- models
/models/swap:
post:
summary: Drain workers and swap the loaded model weights.
security:
- internalApiKey: []
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
model_tag:
type: string
required:
- model_tag
responses:
"202":
description: Swap request accepted.
content:
application/json:
schema:
type: object
properties:
status:
type: string
current_model:
type: string
required:
- status
- current_model
/stats:
get:
summary: Worker pool and throughput statistics.
security:
- internalApiKey: []
responses:
"200":
description: Runtime worker statistics.
content:
application/json:
schema:
type: object
components:
securitySchemes:
internalApiKey:
type: apiKey
in: header
name: X-Internal-API-Key
schemas:
ChatMessage:
type: object
additionalProperties: false
properties:
role:
type: string
content:
type: string
required:
- role
- content
GenerateRequest:
type: object
additionalProperties: false
properties:
messages:
type: array
items:
$ref: "#/components/schemas/ChatMessage"
temperature:
type:
- number
- "null"
max_tokens:
type:
- integer
- "null"
top_k:
type:
- integer
- "null"
required:
- messages
ModelInfo:
type: object
additionalProperties: false
properties:
model_tag:
type: string
source:
type: string
path:
type: string
loaded:
type: boolean
required:
- model_tag
- source
- path
- loaded