nanochat/contracts/openapi/inference-api.yaml

openapi: 3.1.0
info:
  title: samosaChaat Inference API
  version: 0.1.0
  description: >
    Contract skeleton for the standalone inference microservice that streams
    tokens and manages model weight lifecycle.
servers:
  - url: http://inference:8003
paths:
  /health:
    get:
      summary: Liveness and readiness probe.
      responses:
        "200":
          description: Inference service health.
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                  ready:
                    type: boolean
                  current_model:
                    type:
                      - string
                      - "null"
                required:
                  - status
                  - ready
  /generate:
    post:
      summary: Stream generated tokens as server-sent events.
      security:
        - internalApiKey: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/GenerateRequest"
      responses:
        "200":
          description: SSE response of token chunks and done marker.
          content:
            text/event-stream:
              schema:
                type: string
  /models:
    get:
      summary: List available and loaded model weights.
      security:
        - internalApiKey: []
      responses:
        "200":
          description: Model registry view.
          content:
            application/json:
              schema:
                type: object
                properties:
                  current_model:
                    type:
                      - string
                      - "null"
                  models:
                    type: array
                    items:
                      $ref: "#/components/schemas/ModelInfo"
                required:
                  - current_model
                  - models
  /models/swap:
    post:
      summary: Drain workers and swap the loaded model weights.
      security:
        - internalApiKey: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model_tag:
                  type: string
              required:
                - model_tag
      responses:
        "202":
          description: Swap request accepted.
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                  current_model:
                    type: string
                required:
                  - status
                  - current_model
  /stats:
    get:
      summary: Worker pool and throughput statistics.
      security:
        - internalApiKey: []
      responses:
        "200":
          description: Runtime worker statistics.
          content:
            application/json:
              schema:
                type: object
components:
  securitySchemes:
    internalApiKey:
      type: apiKey
      in: header
      name: X-Internal-API-Key
  schemas:
    ChatMessage:
      type: object
      additionalProperties: false
      properties:
        role:
          type: string
        content:
          type: string
      required:
        - role
        - content
    GenerateRequest:
      type: object
      additionalProperties: false
      properties:
        messages:
          type: array
          items:
            $ref: "#/components/schemas/ChatMessage"
        temperature:
          type:
            - number
            - "null"
        max_tokens:
          type:
            - integer
            - "null"
        top_k:
          type:
            - integer
            - "null"
      required:
        - messages
    ModelInfo:
      type: object
      additionalProperties: false
      properties:
        model_tag:
          type: string
        source:
          type: string
        path:
          type: string
        loaded:
          type: boolean
      required:
        - model_tag
        - source
        - path
        - loaded