> ## Documentation Index
> Fetch the complete documentation index at: https://documentation.datalab.to/llms.txt
> Use this file to discover all available pages before exploring further.

# Convert Document

> Convert a PDF, image, or document to markdown, HTML, JSON, or chunks. Use save_checkpoint=true to save parsed state for later /extract or /segment calls.



## OpenAPI

````yaml https://www.datalab.to/openapi.json post /api/v1/convert
openapi: 3.1.0
info:
  title: Datalab API
  version: 0.0.1
servers:
  - url: https://www.datalab.to
    description: Datalab API
security: []
paths:
  /api/v1/convert:
    post:
      summary: Convert Document
      description: >-
        Convert a PDF, image, or document to markdown, HTML, JSON, or chunks.
        Use save_checkpoint=true to save parsed state for later /extract or
        /segment calls.
      operationId: convert_api_v1_convert_post
      parameters:
        - name: wos-session
          in: cookie
          required: false
          schema:
            type: string
            title: Wos-Session
        - name: datalab_active_team
          in: cookie
          required: false
          schema:
            type: string
            title: Datalab Active Team
      requestBody:
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/Body_convert_api_v1_convert_post'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/InitialResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - APIKeyHeader: []
components:
  schemas:
    Body_convert_api_v1_convert_post:
      properties:
        file_url:
          anyOf:
            - type: string
            - type: 'null'
          title: File Url
          description: >-
            Optional file URL (http/https). If provided, the server will
            download and process it.
        mode:
          type: string
          title: Mode
          description: >-
            Which output mode to use. Valid values: 'fast' (lowest latency),
            'balanced' (balanced accuracy and latency), 'accurate' (highest
            accuracy).
          default: fast
          choices:
            - fast
            - balanced
            - accurate
          dashboard:
            description: Processing mode balancing speed and accuracy.
        max_pages:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Pages
          description: The maximum number of pages in the document to convert.
        page_range:
          anyOf:
            - type: string
            - type: 'null'
          title: Page Range
          description: >-
            The page range to convert, comma separated like 0,5-10,20. Overrides
            max_pages if provided.
          dashboard:
            description: >-
              Comma-separated page ranges to process, e.g. '0-2,4'. Leave empty
              for all pages.
        paginate:
          type: boolean
          title: Paginate
          description: >-
            Whether to paginate the output. Each page will be separated by a
            horizontal rule with the page number.
          default: false
          dashboard:
            description: Separate output by page with horizontal rules.
        add_block_ids:
          type: boolean
          title: Add Block Ids
          description: >-
            Add data-block-id attributes to HTML elements for citation tracking.
            Only applies when output_format includes 'html'.
          default: false
        include_markdown_in_chunks:
          type: boolean
          title: Include Markdown In Chunks
          description: Include markdown field in chunks and JSON output.
          default: false
        disable_image_extraction:
          type: boolean
          title: Disable Image Extraction
          description: Disable image extraction from the document.
          default: false
          dashboard: {}
        disable_image_captions:
          type: boolean
          title: Disable Image Captions
          description: Disable synthetic image captions/descriptions in output.
          default: false
          dashboard: {}
        word_bboxes:
          type: boolean
          title: Word Bboxes
          description: >-
            When enabled, predict per-word bounding boxes for each page and
            include them under page_info[id].metadata.words. Only supported by
            the Chandra parse pipeline.
          default: false
        fence_synthetic_captions:
          type: boolean
          title: Fence Synthetic Captions
          description: >-
            Wrap synthetic image captions with HTML comment markers for easy
            identification/removal.
          default: false
        output_format:
          anyOf:
            - type: string
            - type: 'null'
          title: Output Format
          description: >-
            The output format. Can be 'json', 'html', 'markdown', or 'chunks'.
            Defaults to 'markdown'. Comma separate multiple formats.
          dashboard:
            choices:
              - markdown
              - html
              - json
              - chunks
            description: Output format for the converted document.
            type: select
        token_efficient_markdown:
          type: boolean
          title: Token Efficient Markdown
          description: >-
            Optimize markdown for LLM token usage (compact tables, single-space
            indents).
          default: false
        skip_cache:
          type: boolean
          title: Skip Cache
          description: Skip the cache and re-run the conversion.
          default: false
          dashboard:
            description: Skip cache and re-run processing.
        save_checkpoint:
          type: boolean
          title: Save Checkpoint
          description: >-
            Save a checkpoint after conversion. The checkpoint_id in the
            response can be used with /extract or /segment to skip re-parsing.
          default: false
          dashboard:
            description: Save a checkpoint for later /extract or /segment calls.
        additional_config:
          anyOf:
            - type: string
            - type: 'null'
          title: Additional Config
          description: >-
            Additional configuration as a JSON string. Supported keys:
            'keep_pageheader_in_output', 'keep_pagefooter_in_output',
            'keep_spreadsheet_formatting'.
        workflowstepdata_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Workflowstepdata Id
          description: Optional workflow step data ID to associate with this request.
        extras:
          anyOf:
            - type: string
            - type: 'null'
          title: Extras
          description: >-
            Comma-separated list of extra features: 'track_changes',
            'chart_understanding', 'table_row_bboxes', 'extract_links',
            'infographic', 'new_block_types'.
          dashboard:
            description: >-
              Comma-separated feature flags: chart_understanding, infographic,
              extract_links, table_row_bboxes, new_block_types.
        webhook_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Webhook Url
          description: Optional webhook URL to call when the request is complete.
        processing_location:
          anyOf:
            - type: string
            - type: 'null'
          title: Processing Location
          description: >-
            Optional residency region override (e.g. us, eu). When provided, use
            file_url or direct-upload; multipart uploads are rejected. When
            omitted, the request uses the team's configured residency and
            profile.
        eval_rubric_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Eval Rubric Id
          description: Optional eval rubric ID to run evaluation after conversion.
        force_new:
          type: boolean
          title: Force New
          description: 'Internal: force Modal backend.'
          default: false
        model_override_settings:
          anyOf:
            - type: string
            - type: 'null'
          title: Model Override Settings
        file:
          anyOf:
            - type: string
              format: binary
            - type: 'null'
          title: File
          description: >-
            Input PDF, word document, powerpoint, or image file, uploaded as
            multipart form data.  Images must be png, jpg, or webp format.
      type: object
      title: Body_convert_api_v1_convert_post
    InitialResponse:
      properties:
        success:
          type: boolean
          title: Success
          description: Whether the request was successful.
          default: true
        error:
          anyOf:
            - type: string
            - type: 'null'
          title: Error
          description: >-
            If the request was not successful, this will contain an error
            message.
        request_id:
          type: string
          title: Request Id
          description: >-
            The ID of the request. This ID can be used to check the status of
            the request.
        request_check_url:
          type: string
          title: Request Check Url
          description: The URL to check the status of the request and get results.
        versions:
          anyOf:
            - additionalProperties: true
              type: object
            - type: string
            - type: 'null'
          title: Versions
          description: A dictionary of the versions of the libraries used in the request.
      type: object
      required:
        - request_id
        - request_check_url
      title: InitialResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
  securitySchemes:
    APIKeyHeader:
      type: apiKey
      in: header
      name: X-API-Key

````