# Crawl multiple URLs based on options

## OpenAPI Specification

```yaml
openapi: 3.0.1
info:
  title: ''
  description: ''
  version: 1.0.0
paths:
  /api/v2/crawler/crawl:
    post:
      summary: Crawl multiple URLs based on options
      deprecated: false
      description: ''
      operationId: crawlUrls
      tags:
        - Crawler/Crawl
        - Crawling
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                url:
                  type: string
                  format: uri
                  description: The base URL to start crawling from
                limit:
                  type: integer
                  description: Maximum number of pages to crawl. Default limit is 10000.
                  default: 10000
                excludePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that exclude matching URLs from
                    the crawl. For example, if you set "excludePaths":
                    ["blog/.*"] for the base URL firecrawl.dev, any results
                    matching that pattern will be excluded, such as
                    https://www.scrapeless.com/blog/firecrawl-launch-week-1-recap.
                includePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that include matching URLs in
                    the crawl. Only the paths that match the specified patterns
                    will be included in the response. For example, if you set
                    "includePaths": ["blog/.*"] for the base URL firecrawl.dev,
                    only results matching that pattern will be included, such as
                    https://www.scrapeless.com/blog/firecrawl-launch-week-1-recap.
                maxDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl relative to the base URL. Basically,
                    the max number of slashes the pathname of a scraped URL may
                    contain.
                  default: 10
                maxDiscoveryDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl based on discovery order. The root
                    site and sitemapped pages has a discovery depth of 0. For
                    example, if you set it to 1, and you set ignoreSitemap, you
                    will only crawl the entered URL and all URLs that are linked
                    on that page.
                ignoreSitemap:
                  type: boolean
                  description: Ignore the website sitemap when crawling
                  default: false
                ignoreQueryParameters:
                  type: boolean
                  description: >-
                    Do not re-scrape the same path with different (or none)
                    query parameters
                  default: false
                deduplicateSimilarURLs:
                  type: boolean
                  description: Controls whether similar URLs should be deduplicated.
                regexOnFullURL:
                  type: boolean
                  description: >-
                    Controls whether the regular expression should be applied to
                    the full URL.
                allowBackwardLinks:
                  type: boolean
                  description: >-
                    By default, the crawl skips sublinks that aren’t part of the
                    URL hierarchy you specify. For example, crawling
                    https://example.com/products/ wouldn’t capture pages under
                    https://example.com/promotions/deal-567. To include such
                    links, enable the `allowBackwardLinks` parameter.
                  default: false
                allowExternalLinks:
                  type: boolean
                  description: Allows the crawler to follow links to external websites.
                  default: false
                delay:
                  type: number
                  description: >-
                    Delay in seconds between scrapes. This helps respect website
                    rate limits.
                scrapeOptions:
                  $ref: '#/components/schemas/ScrapeOptions'
                browserOptions:
                  type: object
                  properties:
                    sessionName:
                      type: string
                      description: >-
                        Set a name for your session to facilitate searching and
                        viewing in the historical session list.
                    sessionTTL:
                      type: string
                      description: >-
                        Controls the session duration and automatically closes
                        the browser instance after timeout. Measured in seconds
                        (s), defaults to 180 seconds (3 minutes), customizable
                        between 60 seconds (1 minute) and 900 seconds
                        (recommended maximum 15 minutes, but longer times can be
                        set). Once the specified TTL is reached, the session
                        will expire and Scraping Browser will close the browser
                        instance to free resources.
                    sessionRecording:
                      type: string
                      description: >-
                        Whether to enable session recording. When enabled, the
                        entire browser session execution process will be
                        automatically recorded, and after the session is
                        completed, it can be replayed and viewed in the
                        historical session list details. Defaults to false.
                    proxyCountry:
                      type: string
                      description: >-
                        Sets the target country/region for the proxy, sending
                        requests via an IP address from that region. You can
                        specify a country code (e.g., US for the United States,
                        GB for the United Kingdom, ANY for any country). See
                        country codes for all supported options.
                    proxyURL:
                      type: string
                      description: >-
                        Used to set the browser’s proxy URL, for example:
                        http://user:pass@ip:port. If this parameter is set, all
                        other proxy_* parameters will be ignored.


                        - 💡Custom proxy functionality is currently only
                        available to Enterprise and Enterprise Enhanced
                        subscription users Upgrade Now

                        - 💡Enterprise-level custom users can contact us to use
                        custom proxies.
                    fingerprint:
                      type: string
                      description: >-
                        A browser fingerprint is a nearly unique “digital
                        fingerprint” created using your browser and device
                        configuration information, which can be used to track
                        your online activity even without cookies. Fortunately,
                        configuring fingerprints in Scraping Browser is
                        optional. We offer deep customization of browser
                        fingerprints, such as core parameters like browser user
                        agent, time zone, language, and screen resolution, and
                        support extending functionality through custom launch
                        parameters. Suitable for multi-account management, data
                        collection, and privacy protection scenarios, using
                        scrapeless’s own Chromium browser completely avoids
                        detection. By default, our Scraping Browser service
                        generates a random fingerprint for each session.
                        Reference
                  x-apidog-refs: {}
                  x-apidog-orders:
                    - sessionName
                    - sessionTTL
                    - sessionRecording
                    - proxyCountry
                    - proxyURL
                    - fingerprint
                  x-apidog-ignore-properties: []
              required:
                - url
              x-apidog-orders:
                - url
                - limit
                - excludePaths
                - includePaths
                - maxDepth
                - maxDiscoveryDepth
                - ignoreSitemap
                - ignoreQueryParameters
                - deduplicateSimilarURLs
                - regexOnFullURL
                - allowBackwardLinks
                - allowExternalLinks
                - delay
                - scrapeOptions
                - browserOptions
              x-apidog-ignore-properties: []
            examples: {}
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlResponse'
          headers: {}
          x-apidog-name: OK
        '402':
          description: Payment required
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - Payment required to access this resource.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: '402'
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - >-
                        Request rate limit exceeded. Please wait and try again
                        later.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: '429'
        '500':
          description: Server error
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    examples:
                      - An unexpected error occurred on the server.
                x-apidog-orders:
                  - error
                x-apidog-ignore-properties: []
          headers: {}
          x-apidog-name: Server Error
      security:
        - apikey-header-x-api-token: []
      x-apidog-folder: Crawler/Crawl
      x-apidog-status: released
      x-run-in-apidog: https://app.apidog.com/web/project/745098/apis/api-17509010-run
components:
  schemas:
    ScrapeOptions:
      type: object
      properties:
        formats:
          type: array
          items:
            type: string
            enum:
              - markdown
              - html
              - rawHtml
              - links
              - screenshot
              - screenshot@fullPage
              - json
            x-apidog-enum:
              - value: markdown
                name: ''
                description: ''
              - value: html
                name: ''
                description: ''
              - value: rawHtml
                name: ''
                description: ''
              - value: links
                name: ''
                description: ''
              - value: screenshot
                name: ''
                description: ''
              - value: screenshot@fullPage
                name: ''
                description: ''
              - value: json
                name: ''
                description: ''
          description: Formats to include in the output.
          default:
            - markdown
        onlyMainContent:
          type: boolean
          description: >-
            Only return the main content of the page excluding headers, navs,
            footers, etc.
          default: true
        includeTags:
          type: array
          items:
            type: string
          description: Tags to include in the output.
        excludeTags:
          type: array
          items:
            type: string
          description: Tags to exclude from the output.
        headers:
          type: object
          description: >-
            Headers to send with the request. Can be used to send cookies,
            user-agent, etc.
          x-apidog-orders: []
          properties: {}
          x-apidog-ignore-properties: []
        waitFor:
          type: integer
          description: >-
            Specify a delay in milliseconds before fetching the content,
            allowing the page sufficient time to load.
          default: 0
        timeout:
          type: integer
          description: Timeout in milliseconds for the request
          default: 30000
      x-apidog-orders:
        - formats
        - onlyMainContent
        - includeTags
        - excludeTags
        - headers
        - waitFor
        - timeout
      x-apidog-ignore-properties: []
      x-apidog-folder: ''
    CrawlResponse:
      type: object
      properties:
        success:
          type: boolean
        id:
          type: string
      x-apidog-orders:
        - success
        - id
      x-apidog-ignore-properties: []
      x-apidog-folder: ''
  securitySchemes:
    bearer:
      type: bearer
      scheme: bearer
      description: Bearer token authentication using your Scrapeless API key
    apikey-header-x-api-token:
      type: apiKey
      in: header
      name: x-api-token
servers:
  - url: https://api.scrapeless.com
    description: Prod Env
security:
  - apikey-header-x-api-token: []

```
