# Crawl multiple URLs based on options

## OpenAPI Specification

```yaml
openapi: 3.0.1
info:
  title: ''
  description: ''
  version: 1.0.0
paths:
  /api/v1/crawler/crawl:
    post:
      summary: Crawl multiple URLs based on options
      deprecated: false
      description: ''
      operationId: crawlUrls
      tags:
        - Crawler/Crawl
        - Crawling
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                url:
                  type: string
                  format: uri
                  description: The base URL to start crawling from
                limit:
                  type: integer
                  description: Maximum number of pages to crawl. Default limit is 10000.
                  default: 10000
                excludePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that exclude matching URLs from
                    the crawl. For example, if you set "excludePaths":
                    ["blog/.*"] for the base URL firecrawl.dev, any results
                    matching that pattern will be excluded, such as
                    https://www.scrapeless.com/blog/firecrawl-launch-week-1-recap.
                includePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that include matching URLs in
                    the crawl. Only the paths that match the specified patterns
                    will be included in the response. For example, if you set
                    "includePaths": ["blog/.*"] for the base URL firecrawl.dev,
                    only results matching that pattern will be included, such as
                    https://www.scrapeless.com/blog/firecrawl-launch-week-1-recap.
                maxDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl relative to the base URL. Basically,
                    the max number of slashes the pathname of a scraped URL may
                    contain.
                  default: 10
                maxDiscoveryDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl based on discovery order. The root
                    site and sitemapped pages has a discovery depth of 0. For
                    example, if you set it to 1, and you set ignoreSitemap, you
                    will only crawl the entered URL and all URLs that are linked
                    on that page.
                ignoreSitemap:
                  type: boolean
                  description: Ignore the website sitemap when crawling
                  default: false
                ignoreQueryParameters:
                  type: boolean
                  description: >-
                    Do not re-scrape the same path with different (or none)
                    query parameters
                  default: false
                deduplicateSimilarURLs:
                  type: boolean
                  description: Controls whether similar URLs should be deduplicated.
                regexOnFullURL:
                  type: boolean
                  description: >-
                    Controls whether the regular expression should be applied to
                    the full URL.
                allowBackwardLinks:
                  type: boolean
                  description: >-
                    Enables the crawler to navigate from a specific URL to
                    previously linked pages.
                  default: false
                allowExternalLinks:
                  type: boolean
                  description: Allows the crawler to follow links to external websites.
                  default: false
                delay:
                  type: number
                  description: >-
                    Delay in seconds between scrapes. This helps respect website
                    rate limits.
                scrapeOptions:
                  $ref: '#/components/schemas/ScrapeOptions'
                browserOptions:
                  $ref: '#/components/schemas/BrowserOptions'
              required:
                - url
              x-apidog-orders:
                - url
                - limit
                - excludePaths
                - includePaths
                - maxDepth
                - maxDiscoveryDepth
                - ignoreSitemap
                - ignoreQueryParameters
                - deduplicateSimilarURLs
                - regexOnFullURL
                - allowBackwardLinks
                - allowExternalLinks
                - delay
                - scrapeOptions
                - browserOptions
              x-apidog-ignore-properties: []
            examples: {}
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlResponse'
          headers: {}
          x-apidog-name: OK
        '402':
          description: Payment required
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - Payment required to access this resource.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: '402'
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - >-
                        Request rate limit exceeded. Please wait and try again
                        later.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: '429'
        '500':
          description: Server error
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - An unexpected error occurred on the server.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: Server Error
      security:
        - apikey-header-x-api-token: []
      x-apidog-folder: Crawler/Crawl
      x-apidog-status: released
      x-run-in-apidog: https://app.apidog.com/web/project/745098/apis/api-17509010-run
components:
  schemas:
    BrowserOptions:
      type: object
      properties:
        session_name:
          type: string
          description: >-
            Set a name for your session to facilitate searching and viewing in
            the historical session list.
        session_ttl:
          type: string
          description: >-
            Controls the session duration and automatically closes the browser
            instance after timeout. Measured in seconds (s), defaults to 180
            seconds (3 minutes), customizable between 60 seconds (1 minute) and
            900 seconds (recommended maximum 15 minutes, but longer times can be
            set). Once the specified TTL is reached, the session will expire and
            Scraping Browser will close the browser instance to free resources.
        session_recording:
          type: string
          description: >-
            Whether to enable session recording. When enabled, the entire
            browser session execution process will be automatically recorded,
            and after the session is completed, it can be replayed and viewed in
            the historical session list details. Defaults to false.
        proxy_country:
          type: string
          description: >-
            Sets the target country/region for the proxy, sending requests via
            an IP address from that region. You can specify a country code
            (e.g., US for the United States, GB for the United Kingdom, ANY for
            any country). See country codes for all supported options.
        proxy_url:
          type: string
          description: >-
            Used to set the browser’s proxy URL, for example:
            http://user:pass@ip:port. If this parameter is set, all other
            proxy_* parameters will be ignored.


            - 💡Custom proxy functionality is currently only available to
            Enterprise and Enterprise Enhanced subscription users Upgrade Now

            - 💡Enterprise-level custom users can contact us to use custom
            proxies.
        fingerprint:
          type: string
          description: >-
            A browser fingerprint is a nearly unique “digital fingerprint”
            created using your browser and device configuration information,
            which can be used to track your online activity even without
            cookies. Fortunately, configuring fingerprints in Scraping Browser
            is optional. We offer deep customization of browser fingerprints,
            such as core parameters like browser user agent, time zone,
            language, and screen resolution, and support extending functionality
            through custom launch parameters. Suitable for multi-account
            management, data collection, and privacy protection scenarios, using
            scrapeless’s own Chromium browser completely avoids detection. By
            default, our Scraping Browser service generates a random fingerprint
            for each session. Reference
      x-apidog-orders:
        - session_name
        - session_ttl
        - session_recording
        - proxy_country
        - proxy_url
        - fingerprint
      x-apidog-ignore-properties: []
      x-apidog-folder: ''
    ScrapeOptions:
      type: object
      properties:
        formats:
          type: array
          items:
            type: string
            enum:
              - markdown
              - html
              - rawHtml
              - links
              - screenshot
              - screenshot@fullPage
              - json
            x-apidog-enum:
              - value: markdown
                name: ''
                description: ''
              - value: html
                name: ''
                description: ''
              - value: rawHtml
                name: ''
                description: ''
              - value: links
                name: ''
                description: ''
              - value: screenshot
                name: ''
                description: ''
              - value: screenshot@fullPage
                name: ''
                description: ''
              - value: json
                name: ''
                description: ''
          description: Formats to include in the output.
          default:
            - markdown
        onlyMainContent:
          type: boolean
          description: >-
            Only return the main content of the page excluding headers, navs,
            footers, etc.
          default: true
        includeTags:
          type: array
          items:
            type: string
          description: Tags to include in the output.
        excludeTags:
          type: array
          items:
            type: string
          description: Tags to exclude from the output.
        headers:
          type: object
          description: >-
            Headers to send with the request. Can be used to send cookies,
            user-agent, etc.
          x-apidog-orders: []
          properties: {}
          x-apidog-ignore-properties: []
        waitFor:
          type: integer
          description: >-
            Specify a delay in milliseconds before fetching the content,
            allowing the page sufficient time to load.
          default: 0
        timeout:
          type: integer
          description: Timeout in milliseconds for the request
          default: 30000
      x-apidog-orders:
        - formats
        - onlyMainContent
        - includeTags
        - excludeTags
        - headers
        - waitFor
        - timeout
      x-apidog-ignore-properties: []
      x-apidog-folder: ''
    CrawlResponse:
      type: object
      properties:
        success:
          type: boolean
        id:
          type: string
      x-apidog-orders:
        - success
        - id
      x-apidog-ignore-properties: []
      x-apidog-folder: ''
  securitySchemes:
    apikey-header-x-api-token:
      type: apiKey
      in: header
      name: x-api-token
servers:
  - url: https://api.scrapeless.com
    description: Prod Env
security:
  - apikey-header-x-api-token: []

```