@langchain/community
Version:
Third-party integrations for LangChain.js
125 lines (124 loc) • 5.59 kB
JavaScript
;
/* eslint-disable @typescript-eslint/no-explicit-any */
Object.defineProperty(exports, "__esModule", { value: true });
exports.ApifyDatasetLoader = void 0;
const apify_client_1 = require("apify-client");
const async_caller_1 = require("@langchain/core/utils/async_caller");
const env_1 = require("@langchain/core/utils/env");
const base_1 = require("@langchain/core/document_loaders/base");
/**
* A class that extends the BaseDocumentLoader and implements the
* DocumentLoader interface. It represents a document loader that loads
* documents from an Apify dataset.
* @example
* ```typescript
* const loader = new ApifyDatasetLoader("your-dataset-id", {
* datasetMappingFunction: (item) =>
* new Document({
* pageContent: item.text || "",
* metadata: { source: item.url },
* }),
* clientOptions: {
* token: "your-apify-token",
* },
* });
*
* const docs = await loader.load();
*
* const chain = new RetrievalQAChain();
* const res = await chain.invoke({ query: "What is LangChain?" });
*
* console.log(res.text);
* console.log(res.sourceDocuments.map((d) => d.metadata.source));
* ```
*/
class ApifyDatasetLoader extends base_1.BaseDocumentLoader {
constructor(datasetId, config) {
super();
Object.defineProperty(this, "apifyClient", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "datasetId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "datasetMappingFunction", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "caller", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
this.apifyClient = new apify_client_1.ApifyClient({ ...clientOptions, token });
this.datasetId = datasetId;
this.datasetMappingFunction = datasetMappingFunction;
this.caller = new async_caller_1.AsyncCaller(asyncCallerParams);
}
static _getApifyApiToken(config) {
return config?.token ?? (0, env_1.getEnvironmentVariable)("APIFY_API_TOKEN");
}
/**
* Retrieves the dataset items from the Apify platform and applies the
* datasetMappingFunction to each item to create an array of Document
* instances.
* @returns An array of Document instances.
*/
async load() {
const dataset = await this.apifyClient
.dataset(this.datasetId)
.listItems({ clean: true });
const documentList = await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))));
return documentList.flat();
}
/**
* Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
* @param actorId The ID or name of the Actor on the Apify platform.
* @param input The input object of the Actor that you're trying to run.
* @param options Options specifying settings for the Actor run.
* @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
* @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
*/
static async fromActorCall(actorId, input, config) {
const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
const actorCall = await apifyClient
.actor(actorId)
.call(input, config.callOptions ?? {});
return new ApifyDatasetLoader(actorCall.defaultDatasetId, {
datasetMappingFunction: config.datasetMappingFunction,
clientOptions: { ...config.clientOptions, token: apifyApiToken },
});
}
/**
* Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
* @param taskId The ID or name of the task on the Apify platform.
* @param input The input object of the task that you're trying to run. Overrides the task's saved input.
* @param options Options specifying settings for the task run.
* @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
* @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
*/
static async fromActorTaskCall(taskId, input, config) {
const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
const taskCall = await apifyClient
.task(taskId)
.call(input, config.callOptions ?? {});
return new ApifyDatasetLoader(taskCall.defaultDatasetId, {
datasetMappingFunction: config.datasetMappingFunction,
clientOptions: { ...config.clientOptions, token: apifyApiToken },
});
}
}
exports.ApifyDatasetLoader = ApifyDatasetLoader;