@langchain/community
Version:
Third-party integrations for LangChain.js
135 lines (134 loc) • 5.81 kB
JavaScript
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
const require_runtime = require("../../_virtual/_rolldown/runtime.cjs");
let _langchain_core_utils_env = require("@langchain/core/utils/env");
let _langchain_core_utils_async_caller = require("@langchain/core/utils/async_caller");
let _langchain_core_document_loaders_base = require("@langchain/core/document_loaders/base");
let apify_client = require("apify-client");
//#region src/document_loaders/web/apify_dataset.ts
var apify_dataset_exports = /* @__PURE__ */ require_runtime.__exportAll({ ApifyDatasetLoader: () => ApifyDatasetLoader });
/**
* A class that extends the BaseDocumentLoader and implements the
* DocumentLoader interface. It represents a document loader that loads
* documents from an Apify dataset.
* @example
* ```typescript
* const loader = new ApifyDatasetLoader("your-dataset-id", {
* datasetMappingFunction: (item) =>
* new Document({
* pageContent: item.text || "",
* metadata: { source: item.url },
* }),
* clientOptions: {
* token: "your-apify-token",
* },
* });
*
* const docs = await loader.load();
*
* const chain = new RetrievalQAChain();
* const res = await chain.invoke({ query: "What is LangChain?" });
*
* console.log(res.text);
* console.log(res.sourceDocuments.map((d) => d.metadata.source));
* ```
*/
var ApifyDatasetLoader = class ApifyDatasetLoader extends _langchain_core_document_loaders_base.BaseDocumentLoader {
apifyClient;
datasetId;
datasetMappingFunction;
caller;
constructor(datasetId, config) {
super();
const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
this.apifyClient = ApifyDatasetLoader._getApifyClient(clientOptions);
this.datasetId = datasetId;
this.datasetMappingFunction = datasetMappingFunction;
this.caller = new _langchain_core_utils_async_caller.AsyncCaller(asyncCallerParams);
}
/**
* Creates an instance of the ApifyClient class with the provided clientOptions.
* Adds a User-Agent header to the request config for langchainjs attribution.
* @param clientOptions
* @private
*/
static _getApifyClient(clientOptions) {
const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
return new apify_client.ApifyClient({
...clientOptions,
token,
requestInterceptors: [...clientOptions?.requestInterceptors ?? [], ApifyDatasetLoader._addUserAgent],
token
});
}
static _getApifyApiToken(config) {
return config?.token ?? (0, _langchain_core_utils_env.getEnvironmentVariable)("APIFY_API_TOKEN");
}
/**
* Adds a User-Agent header to the request config.
* @param config
* @private
*/
static _addUserAgent(config) {
const updatedConfig = { ...config };
updatedConfig.headers ??= {};
updatedConfig.headers["User-Agent"] = `${updatedConfig.headers["User-Agent"] ?? ""}; Origin/langchainjs`;
return updatedConfig;
}
/**
* Retrieves the dataset items from the Apify platform and applies the
* datasetMappingFunction to each item to create an array of Document
* instances.
* @returns An array of Document instances.
*/
async load() {
const dataset = await this.apifyClient.dataset(this.datasetId).listItems({ clean: true });
return (await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))))).flat();
}
/**
* Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
* @param actorId The ID or name of the Actor on the Apify platform.
* @param input The input object of the Actor that you're trying to run.
* @param config Options specifying settings for the Actor run.
* @param config.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
* @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
*/
static async fromActorCall(actorId, input, config) {
const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
return new ApifyDatasetLoader((await ApifyDatasetLoader._getApifyClient(config.clientOptions).actor(actorId).call(input, config.callOptions ?? {})).defaultDatasetId, {
datasetMappingFunction: config.datasetMappingFunction,
clientOptions: {
...config.clientOptions,
token: apifyApiToken
}
});
}
/**
* Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
* @param taskId The ID or name of the task on the Apify platform.
* @param input The input object of the task that you're trying to run. Overrides the task's saved input.
* @param config Options specifying settings for the task run.
* @param config.callOptions Options specifying settings for the task run.
* @param config.clientOptions Options specifying settings for the Apify client.
* @param config.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
* @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
*/
static async fromActorTaskCall(taskId, input, config) {
const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
return new ApifyDatasetLoader((await ApifyDatasetLoader._getApifyClient(config.clientOptions).task(taskId).call(input, config.callOptions ?? {})).defaultDatasetId, {
datasetMappingFunction: config.datasetMappingFunction,
clientOptions: {
...config.clientOptions,
token: apifyApiToken
}
});
}
};
//#endregion
exports.ApifyDatasetLoader = ApifyDatasetLoader;
Object.defineProperty(exports, "apify_dataset_exports", {
enumerable: true,
get: function() {
return apify_dataset_exports;
}
});
//# sourceMappingURL=apify_dataset.cjs.map