apify-client

import type { STORAGE_GENERAL_ACCESS } from '@apify/consts'; import type { ApiClientSubResourceOptions } from '../base/api_client'; import { ResourceClient } from '../base/resource_client'; import type { PaginatedIterator, PaginationOptions } from '../utils'; /** * Client for managing a specific Dataset. * * Datasets store structured data results from Actor runs. This client provides methods to push items, * list and retrieve items, download items in various formats (JSON, CSV, Excel, etc.), and manage * the dataset. * * @template Data - Type of items stored in the dataset * * @example * ```javascript * const client = new ApifyClient({ token: 'my-token' }); * const datasetClient = client.dataset('my-dataset-id'); * * // Push items to the dataset * await datasetClient.pushItems([ * { url: 'https://example.com', title: 'Example' }, * { url: 'https://test.com', title: 'Test' } * ]); * * // List all items * const { items } = await datasetClient.listItems(); * * // Download items as CSV * const buffer = await datasetClient.downloadItems('csv'); * ``` * * @see https://docs.apify.com/platform/storage/dataset */ export declare class DatasetClient<Data extends Record<string | number, any> = Record<string | number, unknown>> extends ResourceClient { /** * @hidden */ constructor(options: ApiClientSubResourceOptions); /** * Gets the dataset object from the Apify API. * * @returns The Dataset object, or `undefined` if it does not exist * @see https://docs.apify.com/api/v2/dataset-get */ get(): Promise<Dataset | undefined>; /** * Updates the dataset with specified fields. * * @param newFields - Fields to update in the dataset * @returns The updated Dataset object * @see https://docs.apify.com/api/v2/dataset-put */ update(newFields: DatasetClientUpdateOptions): Promise<Dataset>; /** * Deletes the dataset. * * @see https://docs.apify.com/api/v2/dataset-delete */ delete(): Promise<void>; /** * Lists items in the dataset. * * Returns a paginated list of dataset items. You can use pagination parameters to retrieve * specific subsets of items, and various filtering and formatting options to customize * the output. * * @param options - Options for listing items * @param options.limit - Maximum number of items to return. Default is all items. * @param options.chunkSize - Maximum number of items returned in one API response. Relevant in the context of asyncIterator. * @param options.offset - Number of items to skip from the beginning. Default is 0. * @param options.desc - If `true`, items are returned in descending order (newest first). Default is `false`. * @param options.fields - Array of field names to include in the results. Omits all other fields. * @param options.omit - Array of field names to exclude from the results. * @param options.clean - If `true`, returns only non-empty items and skips hidden fields. Default is `false`. * @param options.skipEmpty - If `true`, skips empty items. Default is `false`. * @param options.skipHidden - If `true`, skips hidden fields (fields starting with `#`). Default is `false`. * @param options.flatten - Array of field names to flatten. Nested objects are converted to dot notation (e.g., `obj.field`). * @param options.unwind - Field name or array of field names to unwind. Each array value creates a separate item. * @param options.view - Name of a predefined view to use for field selection. * @returns A paginated list with `items`, `total` count, `offset`, `count`, and `limit` * @see https://docs.apify.com/api/v2/dataset-items-get * * @example * ```javascript * // Get first 100 items * const { items, total } = await client.dataset('my-dataset').listItems({ limit: 100 }); * console.log(`Retrieved ${items.length} of ${total} total items`); * * // Get items with specific fields only * const { items } = await client.dataset('my-dataset').listItems({ * fields: ['url', 'title'], * skipEmpty: true, * limit: 50 * }); * * // Get items in descending order with pagination * const { items } = await client.dataset('my-dataset').listItems({ * desc: true, * offset: 100, * limit: 50 * }); * ``` */ listItems(options?: DatasetClientListItemOptions): PaginatedIterator<Data>; /** * Downloads dataset items in a specific format. * * Unlike {@link listItems} which returns a {@link PaginatedList} with an array of individual * dataset items, this method returns the items serialized to the provided format * (JSON, CSV, Excel, etc.) as a Buffer. Useful for exporting data for further processing. * * @param format - Output format: `'json'`, `'jsonl'`, `'csv'`, `'xlsx'`, `'xml'`, `'rss'`, or `'html'` * @param options - Download and formatting options (extends all options from {@link listItems}) * @param options.attachment - If `true`, the response will have `Content-Disposition: attachment` header. * @param options.bom - If `true`, adds UTF-8 BOM to the beginning of the file (useful for Excel compatibility). * @param options.delimiter - CSV delimiter character. Default is `,` (comma). * @param options.skipHeaderRow - If `true`, CSV export will not include the header row with field names. * @param options.xmlRoot - Name of the root XML element. Default is `'items'`. * @param options.xmlRow - Name of the XML element for each item. Default is `'item'`. * @param options.fields - Array of field names to include in the export. * @param options.omit - Array of field names to exclude from the export. * @returns Buffer containing the serialized data in the specified format * @see https://docs.apify.com/api/v2/dataset-items-get * * @example * ```javascript * // Download as CSV with BOM for Excel compatibility * const csvBuffer = await client.dataset('my-dataset').downloadItems('csv', { bom: true }); * require('fs').writeFileSync('output.csv', csvBuffer); * * // Download as Excel with custom options * const xlsxBuffer = await client.dataset('my-dataset').downloadItems('xlsx', { * fields: ['url', 'title', 'price'], * skipEmpty: true, * limit: 1000 * }); * * // Download as XML with custom element names * const xmlBuffer = await client.dataset('my-dataset').downloadItems('xml', { * xmlRoot: 'products', * xmlRow: 'product' * }); * ``` */ downloadItems(format: DownloadItemsFormat, options?: DatasetClientDownloadItemsOptions): Promise<Buffer>; /** * Stores one or more items into the dataset. * * Items can be objects, strings, or arrays thereof. Each item will be stored as a separate * record in the dataset. Objects are automatically serialized to JSON. If you provide an array, * all items will be stored in order. This method is idempotent - calling it multiple times * with the same data will not create duplicates, but will append items each time. * * @param items - A single item (object or string) or an array of items to store. * Objects are automatically stringified to JSON. Strings are stored as-is. * @see https://docs.apify.com/api/v2/dataset-items-post * * @example * ```javascript * // Store a single object * await client.dataset('my-dataset').pushItems({ * url: 'https://example.com', * title: 'Example Page', * extractedAt: new Date() * }); * * // Store multiple items at once * await client.dataset('my-dataset').pushItems([ * { url: 'https://example.com', title: 'Example' }, * { url: 'https://test.com', title: 'Test' }, * { url: 'https://demo.com', title: 'Demo' } * ]); * * // Store string items * await client.dataset('my-dataset').pushItems(['item1', 'item2', 'item3']); * ``` */ pushItems(items: Data | Data[] | string | string[]): Promise<void>; /** * Gets statistical information about the dataset. * * Returns statistics for each field in the dataset, including information about * data types, null counts, and value ranges. * * @returns Dataset statistics, or `undefined` if not available * @see https://docs.apify.com/api/v2/dataset-statistics-get */ getStatistics(): Promise<DatasetStatistics | undefined>; /** * Generates a public URL for accessing dataset items. * * If the client has permission to access the dataset's URL signing key, * the URL will include a cryptographic signature allowing access without authentication. * This is useful for sharing dataset results with external services or users. * * @param options - URL generation options (extends all options from {@link listItems}) * @param options.expiresInSecs - Number of seconds until the signed URL expires. If omitted, the URL never expires. * @param options.fields - Array of field names to include in the response. * @param options.limit - Maximum number of items to return. * @param options.offset - Number of items to skip. * @returns A public URL string for accessing the dataset items * * @example * ```javascript * // Create a URL that expires in 1 hour with specific fields * const url = await client.dataset('my-dataset').createItemsPublicUrl({ * expiresInSecs: 3600, * fields: ['url', 'title'], * limit: 100 * }); * console.log(`Share this URL: ${url}`); * * // Create a permanent public URL for clean items only * const url = await client.dataset('my-dataset').createItemsPublicUrl({ * clean: true, * skipEmpty: true * }); * ``` */ createItemsPublicUrl(options?: DatasetClientCreateItemsUrlOptions): Promise<string>; private _createPaginationList; } /** * Represents a dataset storage on the Apify platform. * * Datasets store structured data as a sequence of items (records). Each item is a JSON object. * Datasets are useful for storing results from web scraping, crawling, or data processing tasks. */ export interface Dataset { id: string; name?: string; title?: string; userId: string; username?: string; createdAt: Date; modifiedAt: Date; accessedAt: Date; itemCount: number; cleanItemCount: number; actId?: string; actRunId?: string; stats: DatasetStats; fields: string[]; generalAccess?: STORAGE_GENERAL_ACCESS | null; urlSigningSecretKey?: string | null; itemsPublicUrl: string; } /** * Statistics about dataset usage and storage. */ export interface DatasetStats { readCount?: number; writeCount?: number; deleteCount?: number; storageBytes?: number; } /** * Options for updating a dataset. */ export interface DatasetClientUpdateOptions { name?: string | null; title?: string; generalAccess?: STORAGE_GENERAL_ACCESS | null; } /** * Options for listing items from a dataset. * * Provides various filtering, pagination, and transformation options to customize * the output format and content of retrieved items. */ export interface DatasetClientListItemOptions extends PaginationOptions { clean?: boolean; desc?: boolean; flatten?: string[]; fields?: string[]; omit?: string[]; skipEmpty?: boolean; skipHidden?: boolean; unwind?: string | string[]; view?: string; signature?: string; } /** * Options for creating a public URL to access dataset items. * * Extends {@link DatasetClientListItemOptions} with URL expiration control. */ export interface DatasetClientCreateItemsUrlOptions extends DatasetClientListItemOptions { expiresInSecs?: number; } /** * Supported formats for downloading dataset items. */ export declare enum DownloadItemsFormat { JSON = "json", JSONL = "jsonl", XML = "xml", HTML = "html", CSV = "csv", XLSX = "xlsx", RSS = "rss" } /** * Options for downloading dataset items in a specific format. * * Extends {@link DatasetClientListItemOptions} with format-specific options. */ export interface DatasetClientDownloadItemsOptions extends DatasetClientListItemOptions { attachment?: boolean; bom?: boolean; delimiter?: string; skipHeaderRow?: boolean; xmlRoot?: string; xmlRow?: string; } /** * Statistical information about dataset fields. * * Provides insights into the data structure and content of the dataset. */ export interface DatasetStatistics { fieldStatistics: Record<string, FieldStatistics>; } /** * Statistics for a single field in a dataset. */ export interface FieldStatistics { min?: number; max?: number; nullCount?: number; emptyCount?: number; } //# sourceMappingURL=dataset.d.ts.map