> ## Documentation Index
> Fetch the complete documentation index at: https://documentation.datalab.to/llms.txt
> Use this file to discover all available pages before exploring further.

# Segment Document

> Segment a document into sections using a schema. Returns page ranges for each identified segment. Provide a file for end-to-end processing, or a checkpoint_id from a previous /convert call.



## OpenAPI

````yaml https://www.datalab.to/openapi.json post /api/v1/segment
openapi: 3.1.0
info:
  title: Datalab API
  version: 0.0.1
servers:
  - url: https://www.datalab.to
    description: Datalab API
security: []
paths:
  /api/v1/segment:
    post:
      summary: Segment Document
      description: >-
        Segment a document into sections using a schema. Returns page ranges for
        each identified segment. Provide a file for end-to-end processing, or a
        checkpoint_id from a previous /convert call.
      operationId: segment_api_v1_segment_post
      parameters:
        - name: wos-session
          in: cookie
          required: false
          schema:
            type: string
            title: Wos-Session
        - name: datalab_active_team
          in: cookie
          required: false
          schema:
            type: string
            title: Datalab Active Team
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/Body_segment_api_v1_segment_post'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/InitialResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - APIKeyHeader: []
components:
  schemas:
    Body_segment_api_v1_segment_post:
      properties:
        file_url:
          anyOf:
            - type: string
            - type: 'null'
          title: File Url
          description: Optional file URL. Provide either file/file_url or checkpoint_id.
        checkpoint_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Checkpoint Id
          description: >-
            Checkpoint ID from a previous /convert request (with
            save_checkpoint=true). Skips re-parsing when provided.
        segmentation_schema:
          type: string
          title: Segmentation Schema
          description: >-
            The JSON schema for document segmentation. Should contain segment
            names and descriptions for identifying page ranges of different
            document sections.
          dashboard:
            description: >-
              JSON schema with segment names and descriptions for identifying
              page ranges.
            type: json
        mode:
          type: string
          title: Mode
          description: >-
            Output mode for parsing (only used when providing a file, not a
            checkpoint).
          default: fast
          choices:
            - fast
            - balanced
            - accurate
          dashboard:
            description: Processing mode balancing speed and accuracy.
        max_pages:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Pages
          description: The maximum number of pages to process.
        page_range:
          anyOf:
            - type: string
            - type: 'null'
          title: Page Range
          description: The page range to process, comma separated like 0,5-10,20.
          dashboard:
            description: >-
              Comma-separated page ranges to process, e.g. '0-2,4'. Leave empty
              for all pages.
        save_checkpoint:
          type: boolean
          title: Save Checkpoint
          description: >-
            Save a checkpoint after processing for future
            extraction/segmentation calls.
          default: false
          dashboard:
            description: Save a checkpoint for later /extract or /segment calls.
        skip_cache:
          type: boolean
          title: Skip Cache
          description: Skip the cache and re-run.
          default: false
          dashboard:
            description: Skip cache and re-run processing.
        webhook_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Webhook Url
          description: Optional webhook URL to call when the request is complete.
        processing_location:
          anyOf:
            - type: string
            - type: 'null'
          title: Processing Location
          description: >-
            Optional residency region override (e.g. us, eu). When provided, use
            file_url or direct-upload; multipart uploads are rejected. When
            omitted, the request uses the team's configured residency and
            profile.
        workflowstepdata_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Workflowstepdata Id
          description: Optional workflow step data ID to associate with this request.
        model_override_settings:
          anyOf:
            - type: string
            - type: 'null'
          title: Model Override Settings
        file:
          anyOf:
            - type: string
              format: binary
            - type: 'null'
          title: File
          description: >-
            Input PDF, word document, powerpoint, or image file, uploaded as
            multipart form data.  Images must be png, jpg, or webp format.
      type: object
      required:
        - segmentation_schema
      title: Body_segment_api_v1_segment_post
    InitialResponse:
      properties:
        success:
          type: boolean
          title: Success
          description: Whether the request was successful.
          default: true
        error:
          anyOf:
            - type: string
            - type: 'null'
          title: Error
          description: >-
            If the request was not successful, this will contain an error
            message.
        request_id:
          type: string
          title: Request Id
          description: >-
            The ID of the request. This ID can be used to check the status of
            the request.
        request_check_url:
          type: string
          title: Request Check Url
          description: The URL to check the status of the request and get results.
        versions:
          anyOf:
            - additionalProperties: true
              type: object
            - type: string
            - type: 'null'
          title: Versions
          description: A dictionary of the versions of the libraries used in the request.
      type: object
      required:
        - request_id
        - request_check_url
      title: InitialResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
  securitySchemes:
    APIKeyHeader:
      type: apiKey
      in: header
      name: X-API-Key

````