> ## Documentation Index
> Fetch the complete documentation index at: https://docs.deepshi.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Create a chat completion

> Generates a model response for the given conversation. Set `stream: true` to receive the response incrementally as Server-Sent Events.


## OpenAPI

````yaml /api-reference/openapi.json post /v1/chat/completions
openapi: 3.1.0
info:
  title: Deepshi API
  description: >-
    OpenAI-compatible REST API for Deepshi's own models and leading third-party
    models. Authenticate with your Deepshi API key as a bearer token, and
    reference a model by its id (e.g. `deepshi-3.0`, `gpt-4o`) in the `model`
    field.
  version: 1.0.0
servers:
  - url: https://api.deepshi.ai
    description: Deepshi API
security:
  - BearerAuth: []
paths:
  /v1/chat/completions:
    post:
      tags:
        - Chat
      summary: Create a chat completion
      description: >-
        Generates a model response for the given conversation. Set `stream:
        true` to receive the response incrementally as Server-Sent Events.
      operationId: createChatCompletion
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: >-
            A chat completion. When `stream: true`, the response is instead an
            SSE stream of `ChatCompletionChunk` events terminated by `data:
            [DONE]`.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionChunk'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/InsufficientQuota'
        '403':
          $ref: '#/components/responses/Forbidden'
        '429':
          $ref: '#/components/responses/RateLimited'
        '500':
          $ref: '#/components/responses/InternalError'
components:
  schemas:
    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: >-
            The model id to use, e.g. `deepshi-3.0` or `gpt-4o`. Use the bare id
            returned by `GET /v1/models`, not a provider-prefixed id.
          example: deepshi-3.0
        messages:
          type: array
          description: The conversation so far, oldest first.
          items:
            $ref: '#/components/schemas/ChatMessage'
        temperature:
          type: number
          minimum: 0
          maximum: 2
          description: Sampling temperature. Lower is more deterministic.
        top_p:
          type: number
          minimum: 0
          maximum: 1
          description: Nucleus sampling cutoff. Use instead of `temperature`, not both.
        max_tokens:
          type: integer
          description: Maximum tokens to generate in the response.
        max_completion_tokens:
          type: integer
          description: Alias of `max_tokens` accepted for OpenAI compatibility.
        reasoning_effort:
          type: string
          example: high
          description: >-
            Controls how much a reasoning model thinks: `none` turns reasoning
            off; `low`/`medium`/`high` set the effort. Omit to use the model's
            default.
        stop:
          description: Up to 4 sequences at which generation stops.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        stream:
          type: boolean
          description: Stream tokens as Server-Sent Events.
        stream_options:
          $ref: '#/components/schemas/StreamOptions'
        seed:
          type: integer
          description: Best-effort deterministic sampling seed.
        'n':
          type: integer
          description: Number of choices to generate.
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
        logit_bias:
          type: object
          additionalProperties:
            type: number
        logprobs:
          type: boolean
        top_logprobs:
          type: integer
          minimum: 0
          maximum: 20
        response_format:
          type: object
          description: >-
            Set to `{ "type": "json_object" }` to force valid JSON output, or `{
            "type": "json_schema", "json_schema": { ... } }` for a schema
            (model-dependent).
        tools:
          type: array
          description: Function/tool definitions the model may call.
          items:
            $ref: '#/components/schemas/Tool'
        tool_choice:
          description: >-
            Controls tool use: `"auto"`, `"none"`, `"required"`, or a specific
            tool.
          oneOf:
            - type: string
              enum:
                - none
                - auto
                - required
            - type: object
        parallel_tool_calls:
          type: boolean
        user:
          type: string
          description: An opaque identifier for your end user.
      description: Unsupported or model-specific fields are ignored rather than rejected.
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion
        created:
          type: integer
          format: int64
        model:
          type: string
          description: The model id you requested (the bare public id).
          example: gpt-4o
        choices:
          type: array
          items:
            $ref: '#/components/schemas/Choice'
        usage:
          $ref: '#/components/schemas/Usage'
        system_fingerprint:
          type: string
    ChatCompletionChunk:
      type: object
      description: >-
        One Server-Sent Event in a streamed response. The stream ends with
        `data: [DONE]`.
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion.chunk
        created:
          type: integer
          format: int64
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              finish_reason:
                type: string
                nullable: true
              delta:
                $ref: '#/components/schemas/Delta'
        usage:
          $ref: '#/components/schemas/Usage'
          description: >-
            Present only on the final chunk when `stream_options.include_usage`
            is true.
    ChatMessage:
      type: object
      required:
        - role
      properties:
        role:
          type: string
          enum:
            - system
            - user
            - assistant
            - tool
        content:
          description: >-
            Message content: a plain string, or an array of content parts (text
            and image_url) for multimodal input on vision-capable models.
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/ContentPart'
        name:
          type: string
        tool_call_id:
          type: string
          description: Set on `tool` messages to reference the tool call being answered.
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
    StreamOptions:
      type: object
      properties:
        include_usage:
          type: boolean
          description: >-
            When true, the final SSE chunk carries a `usage` object (including
            `cost`).
    Tool:
      type: object
      required:
        - type
        - function
      properties:
        type:
          type: string
          enum:
            - function
        function:
          type: object
          required:
            - name
          properties:
            name:
              type: string
            description:
              type: string
            parameters:
              type: object
              description: JSON Schema for the function arguments.
              additionalProperties: true
            strict:
              type: boolean
    Choice:
      type: object
      properties:
        index:
          type: integer
        finish_reason:
          type: string
          description: e.g. `stop`, `length`, `tool_calls`.
        message:
          $ref: '#/components/schemas/ResponseMessage'
        logprobs:
          type: object
          nullable: true
    Usage:
      type: object
      description: Token usage and the billed cost for the request.
      properties:
        prompt_tokens:
          type: integer
          description: Total input tokens (includes any cached tokens).
        prompt_tokens_details:
          type: object
          properties:
            cached_tokens:
              type: integer
            cached_read_tokens:
              type: integer
              description: >-
                Tokens served from the prompt cache, billed at the cache-read
                rate.
            image_tokens:
              type: integer
        completion_tokens:
          type: integer
          description: >-
            Output tokens generated (includes reasoning tokens for reasoning
            models).
        completion_tokens_details:
          type: object
          properties:
            reasoning_tokens:
              type: integer
            image_tokens:
              type: integer
        total_tokens:
          type: integer
        cost:
          $ref: '#/components/schemas/Cost'
    Delta:
      type: object
      properties:
        role:
          type: string
        content:
          type: string
        reasoning:
          type: string
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
    Error:
      type: object
      description: Error envelope, matching the OpenAI shape.
      required:
        - error
      properties:
        error:
          type: object
          properties:
            message:
              type: string
              description: Human-readable description of the error.
            type:
              type: string
              description: >-
                Error category, e.g. `invalid_request_error` or
                `insufficient_quota`.
            code:
              type: string
              nullable: true
              description: >-
                Machine-readable error code, e.g. `invalid_api_key`,
                `model_not_found`, `insufficient_quota` (may be null).
            param:
              type: string
              nullable: true
              description: The request parameter the error relates to, when applicable.
    ContentPart:
      type: object
      required:
        - type
      properties:
        type:
          type: string
          enum:
            - text
            - image_url
        text:
          type: string
        image_url:
          type: object
          properties:
            url:
              type: string
              description: An image URL or a base64 `data:` URI.
            detail:
              type: string
              enum:
                - low
                - high
                - auto
    ToolCall:
      type: object
      properties:
        id:
          type: string
        type:
          type: string
          enum:
            - function
        index:
          type: integer
          description: Present on streaming deltas.
        function:
          type: object
          properties:
            name:
              type: string
            arguments:
              type: string
              description: JSON-encoded arguments string.
    ResponseMessage:
      type: object
      properties:
        role:
          type: string
          example: assistant
        content:
          type: string
          nullable: true
        refusal:
          type: string
          nullable: true
        reasoning:
          type: string
          nullable: true
          description: Reasoning trace, on models that expose one.
        reasoning_details:
          type: array
          description: Structured reasoning segments, on reasoning models.
          items:
            type: object
            additionalProperties: true
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
    Cost:
      type: object
      description: >-
        Deepshi extension: the exact USD amount this request deducted from your
        balance. Standard OpenAI SDKs ignore it.
      properties:
        total_cost:
          type: number
          description: USD cost deducted from your balance for this request.
          example: 0.000135
  responses:
    BadRequest:
      description: Malformed request, or a model id that doesn't exist in the catalog.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            error:
              message: The model does not exist or you do not have access to it.
              type: invalid_request_error
              code: model_not_found
              param: null
    Unauthorized:
      description: Missing, malformed, or unknown API key.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            error:
              message: Incorrect API key provided.
              type: invalid_request_error
              code: invalid_api_key
              param: null
    InsufficientQuota:
      description: The account is out of credits.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            error:
              message: You have insufficient credits to complete this request.
              type: insufficient_quota
              code: insufficient_quota
              param: null
    Forbidden:
      description: The key is revoked, or not permitted to use the requested model.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            error:
              message: The model does not exist or you do not have access to it.
              type: invalid_request_error
              code: model_not_found
              param: null
    RateLimited:
      description: Too many requests. Back off and retry.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    InternalError:
      description: Temporary server error. Retry with backoff.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      description: 'Your Deepshi API key, sent as `Authorization: Bearer <key>`.'

````