> ## Documentation Index
> Fetch the complete documentation index at: https://docs.hasdata.com/llms.txt
> Use this file to discover all available pages before exploring further.

# LLM Extraction

Use `aiExtractRules` to define custom rules for extracting structured data from any web page using large language models (LLMs). This is ideal when you don’t want to write manual CSS selectors and need clean, field-level data in JSON format.

Each key you define represents a field you want to extract. You provide a `type` and (optionally) a `description` to help the model understand what data to look for.

## Supported Types

* `string` – plain text value
* `number` – numeric value
* `boolean` – true or false
* `list` – an array of values
* `item` – a nested object (with its own structure under `output`)

You can also use `enum` to restrict a string to a fixed set of values.

## Example Request

<CodeGroup>
  ```bash cURL theme={null}
  curl --request POST \
    --url 'https://api.hasdata.com/scrape/web' \
    --header 'Content-Type: application/json' \
    --header 'x-api-key: <your-api-key>' \
    --data '{"url":"https://hasdata.com","aiExtractRules":{"company":{"description":"company name","type":"string"},"reviews":{"type":"list","output":{"review":{"description":"review text","type":"string"},"author":{"type":"string"}}},"clients":{"type":"list","output":"string"},"trial":{"type":"item","output":{"available":{"type":"boolean"},"type":{"type":"string","enum":["paid","free"]}}},"yearFounded":{"type":"number"}}}'
  ```

  ```bash HasData CLI theme={null}
  hasdata web-scraping \
    --url 'https://hasdata.com' \
    --ai-extract-rules-json '{"company":{"description":"company name","type":"string"},"reviews":{"type":"list","output":{"review":{"description":"review text","type":"string"},"author":{"type":"string"}}},"clients":{"type":"list","output":"string"},"trial":{"type":"item","output":{"available":{"type":"boolean"},"type":{"type":"string","enum":["paid","free"]}}},"yearFounded":{"type":"number"}}'
  ```

  ```javascript Node.js theme={null}
  const axios = require('axios').default;

  const options = {
    method: 'POST',
    url: 'https://api.hasdata.com/scrape/web',
    headers: {'Content-Type': 'application/json', 'x-api-key': '<your-api-key>'},
    data: {
      url: 'https://hasdata.com',
      aiExtractRules: {
        company: {description: 'company name', type: 'string'},
        reviews: {
          type: 'list',
          output: {review: {description: 'review text', type: 'string'}, author: {type: 'string'}}
        },
        clients: {type: 'list', output: 'string'},
        trial: {
          type: 'item',
          output: {available: {type: 'boolean'}, type: {type: 'string', enum: ['paid', 'free']}}
        },
        yearFounded: {type: 'number'}
      }
    }
  };

  try {
    const { data } = await axios.request(options);
    console.log(data);
  } catch (error) {
    console.error(error);
  }
  ```

  ```python Python theme={null}
  import requests

  url = "https://api.hasdata.com/scrape/web"

  payload = {
      "url": "https://hasdata.com",
      "aiExtractRules": {
          "company": {
              "description": "company name",
              "type": "string"
          },
          "reviews": {
              "type": "list",
              "output": {
                  "review": {
                      "description": "review text",
                      "type": "string"
                  },
                  "author": { "type": "string" }
              }
          },
          "clients": {
              "type": "list",
              "output": "string"
          },
          "trial": {
              "type": "item",
              "output": {
                  "available": { "type": "boolean" },
                  "type": {
                      "type": "string",
                      "enum": ["paid", "free"]
                  }
              }
          },
          "yearFounded": { "type": "number" }
      }
  }
  headers = {
      "Content-Type": "application/json",
      "x-api-key": "<your-api-key>"
  }

  response = requests.post(url, json=payload, headers=headers)

  print(response.json())
  ```

  ```php PHP theme={null}
  <?php

  $payload = [
      "url" => "https://hasdata.com",
      "aiExtractRules" => ["company" => ["description" => "company name", "type" => "string"], "reviews" => ["type" => "list", "output" => ["review" => ["description" => "review text", "type" => "string"], "author" => ["type" => "string"]]], "clients" => ["type" => "list", "output" => "string"], "trial" => ["type" => "item", "output" => ["available" => ["type" => "boolean"], "type" => ["type" => "string", "enum" => ["paid", "free"]]]], "yearFounded" => ["type" => "number"]],
  ];

  $curl = curl_init();

  curl_setopt_array($curl, [
    CURLOPT_URL => "https://api.hasdata.com/scrape/web",
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_CUSTOMREQUEST => "POST",
    CURLOPT_POSTFIELDS => json_encode($payload),
    CURLOPT_HTTPHEADER => [
      "Content-Type: application/json",
      "x-api-key: <your-api-key>",
    ],
  ]);

  $response = curl_exec($curl);
  curl_close($curl);

  echo $response;
  ```

  ```java Java theme={null}
  OkHttpClient client = new OkHttpClient();

  MediaType mediaType = MediaType.parse("application/json");
  String json = """
    {
      "url": "https://hasdata.com",
      "aiExtractRules": {
        "company": {
          "description": "company name",
          "type": "string"
        },
        "reviews": {
          "type": "list",
          "output": {
            "review": {
              "description": "review text",
              "type": "string"
            },
            "author": {
              "type": "string"
            }
          }
        },
        "clients": {
          "type": "list",
          "output": "string"
        },
        "trial": {
          "type": "item",
          "output": {
            "available": {
              "type": "boolean"
            },
            "type": {
              "type": "string",
              "enum": [
                "paid",
                "free"
              ]
            }
          }
        },
        "yearFounded": {
          "type": "number"
        }
      }
    }
  """;
  RequestBody requestBody = RequestBody.create(json, mediaType);

  Request request = new Request.Builder()
    .url("https://api.hasdata.com/scrape/web")
    .post(requestBody)
    .addHeader("Content-Type", "application/json")
    .addHeader("x-api-key", "<your-api-key>")
    .build();

  Response response = client.newCall(request).execute();
  ```

  ```csharp C# theme={null}
  using System.Net.Http;
  using System.Text;

  var client = new HttpClient();

  var json = """
  {
    "url": "https://hasdata.com",
    "aiExtractRules": {
      "company": {
        "description": "company name",
        "type": "string"
      },
      "reviews": {
        "type": "list",
        "output": {
          "review": {
            "description": "review text",
            "type": "string"
          },
          "author": {
            "type": "string"
          }
        }
      },
      "clients": {
        "type": "list",
        "output": "string"
      },
      "trial": {
        "type": "item",
        "output": {
          "available": {
            "type": "boolean"
          },
          "type": {
            "type": "string",
            "enum": [
              "paid",
              "free"
            ]
          }
        }
      },
      "yearFounded": {
        "type": "number"
      }
    }
  }
  """;
  var content = new StringContent(json, Encoding.UTF8, "application/json");

  var request = new HttpRequestMessage(new HttpMethod("POST"), "https://api.hasdata.com/scrape/web")
  {
      Content = content,
  };
  request.Headers.Add("x-api-key", "<your-api-key>");

  using var response = await client.SendAsync(request);
  response.EnsureSuccessStatusCode();
  var body = await response.Content.ReadAsStringAsync();
  Console.WriteLine(body);
  ```

  ```ruby Ruby theme={null}
  require 'net/http'
  require 'uri'
  require 'json'

  uri = URI("https://api.hasdata.com/scrape/web")
  payload = {
    "url" => "https://hasdata.com",
    "aiExtractRules" => {"company" => {"description" => "company name", "type" => "string"}, "reviews" => {"type" => "list", "output" => {"review" => {"description" => "review text", "type" => "string"}, "author" => {"type" => "string"}}}, "clients" => {"type" => "list", "output" => "string"}, "trial" => {"type" => "item", "output" => {"available" => {"type" => "boolean"}, "type" => {"type" => "string", "enum" => ["paid", "free"]}}}, "yearFounded" => {"type" => "number"}},
  }

  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true

  request = Net::HTTP::Post.new(uri)
  request["Content-Type"] = 'application/json'
  request["x-api-key"] = '<your-api-key>'
  request.body = payload.to_json

  response = http.request(request)
  puts response.read_body
  ```

  ```rust Rust theme={null}
  use reqwest::blocking::Client;
  use serde_json::json;

  fn main() -> Result<(), Box<dyn std::error::Error>> {
      let client = Client::new();
      let payload = json!({
          "url": "https://hasdata.com",
          "aiExtractRules": {
              "company": {
                  "description": "company name",
                  "type": "string"
              },
              "reviews": {
                  "type": "list",
                  "output": {
                      "review": {
                          "description": "review text",
                          "type": "string"
                      },
                      "author": {
                          "type": "string"
                      }
                  }
              },
              "clients": {
                  "type": "list",
                  "output": "string"
              },
              "trial": {
                  "type": "item",
                  "output": {
                      "available": {
                          "type": "boolean"
                      },
                      "type": {
                          "type": "string",
                          "enum": [
                              "paid",
                              "free"
                          ]
                      }
                  }
              },
              "yearFounded": {
                  "type": "number"
              }
          }
      });
      let res = client
          .post("https://api.hasdata.com/scrape/web")
          .header("Content-Type", "application/json")
          .header("x-api-key", "<your-api-key>")
          .json(&payload)
          .send()?
          .text()?;
      println!("{}", res);
      Ok(())
  }
  ```

  ```go Go theme={null}
  package main

  import (
  	"bytes"
  	"fmt"
  	"io"
  	"net/http"
  )

  func main() {
  	payload := []byte(`{
    "url": "https://hasdata.com",
    "aiExtractRules": {
      "company": {
        "description": "company name",
        "type": "string"
      },
      "reviews": {
        "type": "list",
        "output": {
          "review": {
            "description": "review text",
            "type": "string"
          },
          "author": {
            "type": "string"
          }
        }
      },
      "clients": {
        "type": "list",
        "output": "string"
      },
      "trial": {
        "type": "item",
        "output": {
          "available": {
            "type": "boolean"
          },
          "type": {
            "type": "string",
            "enum": [
              "paid",
              "free"
            ]
          }
        }
      },
      "yearFounded": {
        "type": "number"
      }
    }
  }`)

  	req, _ := http.NewRequest("POST", "https://api.hasdata.com/scrape/web", bytes.NewBuffer(payload))
  	req.Header.Add("Content-Type", "application/json")
  	req.Header.Add("x-api-key", "<your-api-key>")

  	res, _ := http.DefaultClient.Do(req)
  	defer res.Body.Close()

  	responseBody, _ := io.ReadAll(res.Body)
  	fmt.Println(string(responseBody))
  }
  ```
</CodeGroup>

## Example Response

```json theme={null}
{
  "requestMetadata": {
    "id": "784b9b3a-8426-431c-a516-beec621183a0",
    "status": "ok"
  },
  "content": "<!DOCTYPE html><html lang=\"en\"><head>...</body></html>",
  "aiResponse": {
    "company": "HasData",
    "reviews": [
      {
        "review": "Roman from HasData went above and beyond to help us with our scraping needs...",
        "author": "Michael Bonacina"
      },
      {
        "review": "I found HasData, which is one of the best scraping services I have ever used...",
        "author": "Hussein Ali"
      }
    ],
    "clients": [
      "Stanford",
      "Salesforce",
      "Samsung",
      "Nvidia",
      "Mailchimp",
      "Harvard",
      "Copyleaks",
      "LosAngelesTimes",
      "SurveySparrow"
    ],
    "trial": {
      "available": true,
      "type": "free"
    },
    "yearFounded": null
  }
}
```

## Notes

* Descriptions are optional but highly recommended for accuracy.
* `list` fields can output flat values (`"output": "string"`) or objects (`"output": { ... }`).
* Fields with no match will return `null`.
