@nutrient-sdk/dws-client-typescript
Version:
Node.js TypeScript client library for Nutrient Document Web Services (DWS) API
1,553 lines (1,547 loc) • 113 kB
JavaScript
import axios from 'axios';
import FormData from 'form-data';
import fs from 'fs';
import path from 'path';
import { Readable } from 'stream';
// src/errors.ts
var NutrientError = class _NutrientError extends Error {
constructor(message, code = "NUTRIENT_ERROR", details, statusCode) {
super(message);
this.name = "NutrientError";
this.code = code;
this.details = details;
this.statusCode = statusCode;
if (Error.captureStackTrace) {
Error.captureStackTrace(this, _NutrientError);
}
}
/**
* Returns a JSON representation of the error
*/
toJSON() {
return {
name: this.name,
message: this.message,
code: this.code,
details: this.details,
statusCode: this.statusCode,
stack: this.stack
};
}
/**
* Returns a string representation of the error
*/
toString() {
let result = `${this.name}: ${this.message}`;
if (this.code !== "NUTRIENT_ERROR") {
result += ` (${this.code})`;
}
if (this.statusCode) {
result += ` [HTTP ${this.statusCode}]`;
}
return result;
}
/**
* Wraps an unknown error into a NutrientError
* @param error - The error to wrap
* @param message - Optional message to prepend
* @returns A NutrientError instance
*/
static wrap(error, message) {
if (error instanceof _NutrientError) {
return error;
}
if (error instanceof Error) {
const wrappedMessage = message ? `${message}: ${error.message}` : error.message;
return new _NutrientError(wrappedMessage, "WRAPPED_ERROR", {
originalError: error.name,
originalMessage: error.message,
stack: error.stack
});
}
const errorMessage = message ?? "An unknown error occurred";
return new _NutrientError(errorMessage, "UNKNOWN_ERROR", { originalError: String(error) });
}
};
var ValidationError = class _ValidationError extends NutrientError {
constructor(message, details, statusCode) {
super(message, "VALIDATION_ERROR", details, statusCode);
this.name = "ValidationError";
if (Error.captureStackTrace) {
Error.captureStackTrace(this, _ValidationError);
}
}
};
var APIError = class _APIError extends NutrientError {
constructor(message, statusCode, details) {
super(message, "API_ERROR", details, statusCode);
this.name = "APIError";
if (Error.captureStackTrace) {
Error.captureStackTrace(this, _APIError);
}
}
};
var AuthenticationError = class _AuthenticationError extends NutrientError {
constructor(message, details, statusCode = 401) {
super(message, "AUTHENTICATION_ERROR", details, statusCode);
this.name = "AuthenticationError";
if (Error.captureStackTrace) {
Error.captureStackTrace(this, _AuthenticationError);
}
}
};
var NetworkError = class _NetworkError extends NutrientError {
constructor(message, details, statusCode) {
super(message, "NETWORK_ERROR", details, statusCode);
this.name = "NetworkError";
if (Error.captureStackTrace) {
Error.captureStackTrace(this, _NetworkError);
}
}
};
async function sendRequest(config, clientOptions, responseType) {
try {
const apiKey = await resolveApiKey(clientOptions.apiKey);
const baseUrl = clientOptions.baseUrl ?? "https://api.nutrient.io";
const url = `${baseUrl.replace(/\/$/, "")}${config.endpoint.toString()}`;
const axiosConfig = {
method: config.method,
url,
headers: {
Authorization: `Bearer ${apiKey}`,
...config.headers
},
timeout: clientOptions.timeout ?? 0,
// No default timeout
validateStatus: () => true,
// Handle all status codes manually
responseType
};
prepareRequestBody(axiosConfig, config);
const response = await axios(axiosConfig);
return handleResponse(response);
} catch (error) {
throw convertError(error, config);
}
}
async function resolveApiKey(apiKey) {
if (typeof apiKey === "string") {
return apiKey;
}
try {
const resolvedKey = await apiKey();
if (typeof resolvedKey !== "string" || resolvedKey.length === 0) {
throw new AuthenticationError("API key function must return a non-empty string", {
resolvedType: typeof resolvedKey
});
}
return resolvedKey;
} catch (error) {
if (error instanceof AuthenticationError) {
throw error;
}
throw new AuthenticationError("Failed to resolve API key from function", {
error: error instanceof Error ? error.message : String(error)
});
}
}
function prepareRequestBody(axiosConfig, config) {
if (config.method === "POST") {
if (["/build", "/analyze_build"].includes(config.endpoint)) {
const typedConfig = config;
if (typedConfig.data.files && typedConfig.data.files.size > 0) {
const formData = new FormData();
for (const [key, value] of typedConfig.data.files) {
appendFileToFormData(formData, key, value);
}
formData.append("instructions", JSON.stringify(typedConfig.data.instructions));
axiosConfig.data = formData;
axiosConfig.headers = {
...axiosConfig.headers,
...formData.getHeaders()
};
} else {
axiosConfig.data = typedConfig.data.instructions;
axiosConfig.headers = {
...axiosConfig.headers,
"Content-Type": "application/json"
};
}
return axiosConfig;
} else if (config.endpoint === "/sign") {
const typedConfig = config;
const formData = new FormData();
appendFileToFormData(formData, "file", typedConfig.data.file);
if (typedConfig.data.image) {
appendFileToFormData(formData, "image", typedConfig.data.image);
}
if (typedConfig.data.graphicImage) {
appendFileToFormData(formData, "graphicImage", typedConfig.data.graphicImage);
}
if (typedConfig.data.data) {
formData.append("data", JSON.stringify(typedConfig.data.data));
} else {
formData.append(
"data",
JSON.stringify({
signatureType: "cades",
cadesLevel: "b-lt"
})
);
}
axiosConfig.data = formData;
return axiosConfig;
} else if (config.endpoint === "/ai/redact") {
const typedConfig = config;
if (typedConfig.data.file && typedConfig.data.fileKey) {
const formData = new FormData();
appendFileToFormData(formData, typedConfig.data.fileKey, typedConfig.data.file);
formData.append("data", JSON.stringify(typedConfig.data.data));
axiosConfig.data = formData;
} else {
axiosConfig.data = typedConfig.data.data;
axiosConfig.headers = {
...axiosConfig.headers,
"Content-Type": "application/json"
};
}
return axiosConfig;
}
}
if (config.data) {
axiosConfig.data = config.data;
axiosConfig.headers = {
...axiosConfig.headers,
"Content-Type": "application/json"
};
}
return axiosConfig;
}
function appendFileToFormData(formData, key, file) {
if (Buffer.isBuffer(file.data)) {
formData.append(key, file.data, {
filename: file.filename,
contentType: file.contentType
});
} else if (file.data instanceof Uint8Array) {
formData.append(key, Buffer.from(file.data), {
filename: file.filename,
contentType: file.contentType
});
} else if (file.data && typeof file.data === "object" && "pipe" in file.data) {
formData.append(key, file.data, {
filename: file.filename,
contentType: file.contentType
});
} else {
throw new ValidationError("Expected Buffer, Uint8Array, or ReadableStream for file data", {
dataType: typeof file.data
});
}
}
function handleResponse(response) {
const { status, statusText, headers } = response;
const data = response.data;
if (status >= 400) {
throw createHttpError(status, statusText, data);
}
return {
data,
status,
statusText,
headers
};
}
function createHttpError(status, statusText, data) {
const message = extractErrorMessage(data) ?? `HTTP ${status}: ${statusText}`;
const details = typeof data === "object" && data !== null ? data : { response: data };
if (status === 401 || status === 403) {
return new AuthenticationError(message, details, status);
}
if (status >= 400 && status < 500) {
return new ValidationError(message, details, status);
}
return new APIError(message, status, details);
}
function extractErrorMessage(data) {
if (typeof data === "object" && data !== null) {
const errorData = data;
if (typeof errorData["error_description"] === "string") {
return errorData["error_description"];
}
if (typeof errorData["error_message"] === "string") {
return errorData["error_message"];
}
if (typeof errorData["message"] === "string") {
return errorData["message"];
}
if (typeof errorData["error"] === "string") {
return errorData["error"];
}
if (typeof errorData["detail"] === "string") {
return errorData["detail"];
}
if (typeof errorData["details"] === "string") {
return errorData["details"];
}
if (typeof errorData["error"] === "object" && errorData["error"] !== null) {
const nestedError = errorData["error"];
if (typeof nestedError["message"] === "string") {
return nestedError["message"];
}
if (typeof nestedError["description"] === "string") {
return nestedError["description"];
}
}
if (Array.isArray(errorData["errors"]) && errorData["errors"].length > 0) {
const firstError = errorData["errors"][0];
if (typeof firstError === "string") {
return firstError;
}
if (typeof firstError === "object" && firstError !== null) {
const errorObj = firstError;
if (typeof errorObj["message"] === "string") {
return errorObj["message"];
}
}
}
}
return null;
}
function convertError(error, config) {
if (error instanceof NutrientError) {
return error;
}
if (axios.isAxiosError(error)) {
const response = error.response;
const request = error.request;
const message = error.message;
if (response) {
return createHttpError(response.status, response.statusText, response.data);
}
if (request) {
const sanitizedHeaders = config.headers;
if (sanitizedHeaders) {
delete sanitizedHeaders["Authorization"];
}
return new NetworkError("Network request failed", {
message,
endpoint: config.endpoint,
method: config.method,
headers: sanitizedHeaders
});
}
return new ValidationError("Request configuration error", {
message,
endpoint: config.endpoint,
method: config.method,
data: config.data
});
}
return new NutrientError("Unexpected error occurred", "UNKNOWN_ERROR", {
error: error instanceof Error ? error.message : String(error),
endpoint: config.endpoint,
method: config.method,
data: config.data
});
}
// src/builders/base.ts
var BaseBuilder = class {
constructor(clientOptions) {
this.clientOptions = clientOptions;
}
/**
* Sends a request to the API
*/
async sendRequest(path2, options, responseType) {
const config = {
endpoint: path2,
method: "POST",
data: {
instructions: options.instructions,
files: "files" in options ? options.files : void 0
}
};
const response = await sendRequest(config, this.clientOptions, responseType);
return response.data;
}
};
// src/build.ts
var DEFAULT_DIMENSION = { value: 100, unit: "%" };
var BuildActions = {
/**
* Create an OCR action
* @param language - Language(s) for OCR
*/
ocr(language) {
return {
type: "ocr",
language
};
},
/**
* Create a rotation action
* @param rotateBy - Rotation angle (90, 180, or 270)
*/
rotate(rotateBy) {
return {
type: "rotate",
rotateBy
};
},
/**
* Create a text watermark action
* @param text - Watermark text
* @param options - Watermark options
* @param options.width - Width dimension of the watermark (value and unit, e.g. {value: 100, unit: '%'})
* @param options.height - Height dimension of the watermark (value and unit, e.g. {value: 100, unit: '%'})
* @param options.top - Top position of the watermark (value and unit)
* @param options.right - Right position of the watermark (value and unit)
* @param options.bottom - Bottom position of the watermark (value and unit)
* @param options.left - Left position of the watermark (value and unit)
* @param options.rotation - Rotation of the watermark in counterclockwise degrees (default: 0)
* @param options.opacity - Watermark opacity (0 is fully transparent, 1 is fully opaque)
* @param options.fontFamily - Font family for the text (e.g. 'Helvetica')
* @param options.fontSize - Size of the text in points
* @param options.fontColor - Foreground color of the text (e.g. '#ffffff')
* @param options.fontStyle - Text style array ('bold', 'italic', or both)
*/
watermarkText(text, options = {
width: DEFAULT_DIMENSION,
height: DEFAULT_DIMENSION,
rotation: 0
}) {
return {
type: "watermark",
text,
...options,
rotation: options.rotation ?? 0,
width: options.width ?? DEFAULT_DIMENSION,
height: options.height ?? DEFAULT_DIMENSION
};
},
/**
* Create an image watermark action
* @param image - Watermark image
* @param options - Watermark options
* @param options.width - Width dimension of the watermark (value and unit, e.g. {value: 100, unit: '%'})
* @param options.height - Height dimension of the watermark (value and unit, e.g. {value: 100, unit: '%'})
* @param options.top - Top position of the watermark (value and unit)
* @param options.right - Right position of the watermark (value and unit)
* @param options.bottom - Bottom position of the watermark (value and unit)
* @param options.left - Left position of the watermark (value and unit)
* @param options.rotation - Rotation of the watermark in counterclockwise degrees (default: 0)
* @param options.opacity - Watermark opacity (0 is fully transparent, 1 is fully opaque)
*/
watermarkImage(image, options = {
width: DEFAULT_DIMENSION,
height: DEFAULT_DIMENSION,
rotation: 0
}) {
return {
__needsFileRegistration: true,
fileInput: image,
createAction: (fileHandle) => ({
type: "watermark",
image: fileHandle,
...options,
rotation: options.rotation ?? 0,
width: options.width ?? DEFAULT_DIMENSION,
height: options.height ?? DEFAULT_DIMENSION
})
};
},
/**
* Create a flatten action
* @param annotationIds - Optional annotation IDs to flatten (all if not specified)
*/
flatten(annotationIds) {
return {
type: "flatten",
...annotationIds && { annotationIds }
};
},
/**
* Create an apply Instant JSON action
* @param file - Instant JSON file input
*/
applyInstantJson(file) {
return {
__needsFileRegistration: true,
fileInput: file,
createAction: (fileHandle) => ({
type: "applyInstantJson",
file: fileHandle
})
};
},
/**
* Create an apply XFDF action
* @param file - XFDF file input
* @param options - Apply Xfdf options
* @param options.ignorePageRotation - If true, ignores page rotation when applying XFDF data (default: false)
* @param options.richTextEnabled - If true, plain text annotations will be converted to rich text annotations. If false, all text annotations will be plain text annotations (default: true)
*/
applyXfdf(file, options) {
return {
__needsFileRegistration: true,
fileInput: file,
createAction: (fileHandle) => ({
type: "applyXfdf",
file: fileHandle,
...options
})
};
},
/**
* Create redactions with text search
* @param text - Text to search and redact
* @param options - Redaction options
* @param options.content - Visual aspects of the redaction annotation (background color, overlay text, etc.)
* @param strategyOptions - Redaction strategy options
* @param strategyOptions.includeAnnotations - If true, redaction annotations are created on top of annotations whose content match the provided text (default: true)
* @param strategyOptions.caseSensitive - If true, the search will be case sensitive (default: false)
* @param strategyOptions.start - The index of the page from where to start the search (default: 0)
* @param strategyOptions.limit - Starting from start, the number of pages to search (default: to the end of the document)
*/
createRedactionsText(text, options, strategyOptions) {
return {
type: "createRedactions",
strategy: "text",
strategyOptions: {
text,
...strategyOptions
},
...options
};
},
/**
* Create redactions with regex pattern
* @param regex - Regex pattern to search and redact
* @param options - Redaction options
* @param options.content - Visual aspects of the redaction annotation (background color, overlay text, etc.)
* @param strategyOptions - Redaction strategy options
* @param strategyOptions.includeAnnotations - If true, redaction annotations are created on top of annotations whose content match the provided regex (default: true)
* @param strategyOptions.caseSensitive - If true, the search will be case sensitive (default: true)
* @param strategyOptions.start - The index of the page from where to start the search (default: 0)
* @param strategyOptions.limit - Starting from start, the number of pages to search (default: to the end of the document)
*/
createRedactionsRegex(regex, options, strategyOptions) {
return {
type: "createRedactions",
strategy: "regex",
strategyOptions: {
regex,
...strategyOptions
},
...options
};
},
/**
* Create redactions with preset pattern
* @param preset - Preset pattern to search and redact (e.g. 'email-address', 'credit-card-number', 'social-security-number', etc.)
* @param options - Redaction options
* @param options.content - Visual aspects of the redaction annotation (background color, overlay text, etc.)
* @param strategyOptions - Redaction strategy options
* @param strategyOptions.includeAnnotations - If true, redaction annotations are created on top of annotations whose content match the provided preset (default: true)
* @param strategyOptions.start - The index of the page from where to start the search (default: 0)
* @param strategyOptions.limit - Starting from start, the number of pages to search (default: to the end of the document)
*/
createRedactionsPreset(preset, options, strategyOptions) {
return {
type: "createRedactions",
strategy: "preset",
strategyOptions: {
preset,
...strategyOptions
},
...options
};
},
/**
* Apply previously created redactions
*/
applyRedactions() {
return {
type: "applyRedactions"
};
}
};
var BuildOutputs = {
/**
* PDF output configuration
* @param options - PDF output options
*/
pdf(options) {
return {
type: "pdf",
...options?.metadata && { metadata: options.metadata },
...options?.labels && { labels: options.labels },
...options?.userPassword && { user_password: options.userPassword },
...options?.ownerPassword && { owner_password: options.ownerPassword },
...options?.userPermissions && { user_permissions: options.userPermissions },
...options?.optimize && { optimize: options.optimize }
};
},
/**
* PDF/A output configuration
* @param options - PDF/A output options
*/
pdfa(options) {
return {
type: "pdfa",
...options?.conformance && { conformance: options.conformance },
...options?.vectorization !== void 0 && { vectorization: options.vectorization },
...options?.rasterization !== void 0 && { rasterization: options.rasterization },
...options?.metadata && { metadata: options.metadata },
...options?.labels && { labels: options.labels },
...options?.userPassword && { user_password: options.userPassword },
...options?.ownerPassword && { owner_password: options.ownerPassword },
...options?.userPermissions && { user_permissions: options.userPermissions },
...options?.optimize && { optimize: options.optimize }
};
},
/**
* PDF/UA output configuration
* @param options - PDF/UA output options
*/
pdfua(options) {
return {
type: "pdfua",
...options?.metadata && { metadata: options.metadata },
...options?.labels && { labels: options.labels },
...options?.userPassword && { user_password: options.userPassword },
...options?.ownerPassword && { owner_password: options.ownerPassword },
...options?.userPermissions && { user_permissions: options.userPermissions },
...options?.optimize && { optimize: options.optimize }
};
},
/**
* Image output configuration
* @param format - Image format type
* @param options - Image output options
*/
image(format, options) {
return {
type: "image",
format,
...options?.pages && { pages: options.pages },
...options?.width && { width: options.width },
...options?.height && { height: options.height },
...options?.dpi && { dpi: options.dpi }
};
},
/**
* JSON content output configuration
* @param options - JSON content extraction options
*/
jsonContent(options) {
return {
type: "json-content",
...options?.plainText !== void 0 && { plainText: options.plainText },
...options?.structuredText !== void 0 && { structuredText: options.structuredText },
...options?.keyValuePairs !== void 0 && { keyValuePairs: options.keyValuePairs },
...options?.tables !== void 0 && { tables: options.tables },
...options?.language && { language: options.language }
};
},
/**
* Office document output configuration
* @param type - Office document type
*/
office(type) {
return {
type
};
},
/**
* HTML output configuration
* @param layout - The layout type to use for conversion to HTML
*/
html(layout) {
return {
type: "html",
layout
};
},
/**
* Markdown output configuration
*/
markdown() {
return {
type: "markdown"
};
},
/**
* Get MIME type and filename for a given output configuration
* @param output - The output configuration
* @returns MIME type and optional filename
*/
getMimeTypeForOutput(output) {
switch (output.type) {
case "pdf":
case "pdfa":
case "pdfua":
return { mimeType: "application/pdf", filename: "output.pdf" };
case "image": {
const imageOutput = output;
const format = imageOutput.format ?? "png";
return {
mimeType: format === "jpg" ? "image/jpeg" : `image/${format}`,
filename: `output.${format}`
};
}
case "docx":
return {
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename: "output.docx"
};
case "xlsx":
return {
mimeType: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename: "output.xlsx"
};
case "pptx":
return {
mimeType: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
filename: "output.pptx"
};
case "html":
return {
mimeType: "text/html",
filename: "output.html"
};
case "markdown":
return {
mimeType: "text/markdown",
filename: "output.md"
};
default:
return { mimeType: "application/octet-stream", filename: "output" };
}
}
};
// src/types/inputs.ts
function isBuffer(input) {
return typeof Buffer !== "undefined" && Buffer.isBuffer(input);
}
function isUint8Array(input) {
return input instanceof Uint8Array;
}
function isUrl(input) {
try {
new URL(input);
return true;
} catch {
return false;
}
}
async function processFileInput(input) {
if (typeof input === "string") {
return await processFilePathInput(input);
}
if (isBuffer(input)) {
return processBufferInput(input);
}
if (isUint8Array(input)) {
return processUint8ArrayInput(input);
}
if (typeof input === "object" && input !== null) {
if ("type" in input) {
switch (input.type) {
case "file-path":
return await processFilePathInput(input.path);
case "buffer":
return processBufferInput(input.buffer, input.filename);
case "uint8array":
return processUint8ArrayInput(input.data, input.filename);
default:
throw new ValidationError(`Unsupported input type: ${input.type}`, {
input
});
}
}
}
throw new ValidationError("Invalid file input provided", { input });
}
function processBufferInput(buffer, filename) {
return {
data: buffer,
filename: filename ?? "buffer"
};
}
function processUint8ArrayInput(data, filename) {
return {
data,
filename: filename ?? "data.bin"
};
}
async function processFilePathInput(filePath) {
try {
try {
await fs.promises.access(filePath, fs.constants.F_OK);
} catch {
throw new ValidationError(`File not found: ${filePath}`, { filePath });
}
const readStream = fs.createReadStream(filePath);
const filename = path.basename(filePath);
readStream.on("error", (streamError) => {
readStream.destroy();
throw new ValidationError(`Failed to read file: ${filePath}`, {
filePath,
error: streamError.message
});
});
return {
data: readStream,
filename
};
} catch (error) {
if (error instanceof ValidationError) {
throw error;
}
throw new ValidationError(`Failed to create read stream for file: ${filePath}`, {
filePath,
error: error instanceof Error ? error.message : String(error)
});
}
}
function validateFileInput(input) {
if (typeof input === "string") {
return true;
}
if (isBuffer(input) || isUint8Array(input)) {
return true;
}
if (typeof input === "object" && input !== null && "type" in input) {
const typedInput = input;
return ["file-path", "buffer", "uint8array", "url"].includes(typedInput.type);
}
return false;
}
function isRemoteFileInput(input) {
if (typeof input === "string") {
return isUrl(input);
}
return typeof input === "object" && input !== null && "type" in input && input.type === "url";
}
async function processRemoteFileInput(input) {
let url;
if (typeof input === "string") {
url = input;
} else {
url = input.url;
}
const buffer = await fetchFromUrl(url);
return {
data: buffer,
filename: "buffer"
};
}
async function fetchFromUrl(url) {
try {
const response = await fetch(url);
if (!response.ok) {
throw new ValidationError(`Failed to fetch URL: ${response.status} ${response.statusText}`, {
url,
status: response.status,
statusText: response.statusText
});
}
return Buffer.from(await response.arrayBuffer());
} catch (error) {
if (error instanceof ValidationError) {
throw error;
}
throw new ValidationError(`Failed to fetch URL: ${url}`, {
url,
error: error instanceof Error ? error.message : String(error)
});
}
}
async function getPdfPageCount(pdfData) {
let pdfBytes;
if (isBuffer(pdfData.data)) {
pdfBytes = pdfData.data;
} else if (isUint8Array(pdfData.data)) {
pdfBytes = Buffer.from(pdfData.data);
} else if (pdfData.data instanceof fs.ReadStream || pdfData.data instanceof Readable) {
try {
const chunks2 = [];
for await (const chunk of pdfData.data) {
chunks2.push(chunk);
}
pdfBytes = Buffer.concat(chunks2);
} catch (error) {
throw new ValidationError(`Failed to read PDF stream: ${pdfData.filename}`, {
filename: pdfData.filename,
error: error instanceof Error ? error.message : String(error)
});
}
} else {
throw new ValidationError("Invalid PDF data provided", { input: pdfData });
}
const pdfContent = pdfBytes.toString("binary");
const objects = [];
const chunks = pdfContent.split("endobj");
for (let i = 0; i < chunks.length - 1; i++) {
const objMatch = /(\d+)\s+(\d+)\s+obj/.exec(chunks[i]);
if (objMatch?.[1] && objMatch[2]) {
const objNum = objMatch[1];
const genNum = objMatch[2];
const content = chunks[i].substring(objMatch.index + objMatch[0].length);
objects.push([objNum, genNum, content]);
}
}
if (objects.length === 0) {
throw new ValidationError("Could not find any objects in PDF", { input: pdfData });
}
let catalogObj = null;
for (const [, , objData] of objects) {
if (objData.includes("/Type") && objData.includes("/Catalog")) {
catalogObj = objData;
break;
}
}
if (!catalogObj) {
throw new ValidationError("Could not find /Catalog object in PDF", { input: pdfData });
}
const pagesRefMatch = /\/Pages\s+(\d+)\s+(\d+)\s+R/.exec(catalogObj);
if (!pagesRefMatch) {
throw new ValidationError("Could not find /Pages reference in /Catalog", { input: pdfData });
}
const pagesObjNum = pagesRefMatch[1];
const pagesObjGen = pagesRefMatch[2];
let pagesObjData = null;
for (const [objNum, genNum, objData] of objects) {
if (objNum === pagesObjNum && genNum === pagesObjGen) {
pagesObjData = objData;
break;
}
}
if (!pagesObjData) {
throw new ValidationError("Could not find root /Pages object", { input: pdfData });
}
const countMatch = /\/Count\s+(\d+)/.exec(pagesObjData);
if (!countMatch) {
throw new ValidationError("Could not find /Count in root /Pages object", { input: pdfData });
}
return parseInt(countMatch[1], 10);
}
async function isValidPdf(fileData) {
let fileBytes;
try {
if (isBuffer(fileData.data)) {
fileBytes = fileData.data;
} else if (isUint8Array(fileData.data)) {
fileBytes = Buffer.from(fileData.data);
} else if (fileData.data instanceof fs.ReadStream || fileData.data instanceof Readable) {
try {
const chunks = [];
for await (const chunk of fileData.data) {
chunks.push(chunk);
}
fileBytes = Buffer.concat(chunks);
} catch {
return false;
}
} else {
return false;
}
const pdfHeader = fileBytes.slice(0, 5).toString("ascii");
return pdfHeader === "%PDF-";
} catch {
return false;
}
}
// src/builders/workflow.ts
var WorkflowBuilder = class extends BaseBuilder {
constructor() {
super(...arguments);
this.buildInstructions = {
parts: []
};
this.assets = /* @__PURE__ */ new Map();
this.assetIndex = 0;
this.currentStep = 0;
this.isExecuted = false;
}
/**
* Registers an asset in the workflow and returns its key for use in actions
* @param asset - The asset to register
* @returns The asset key that can be used in BuildActions
*/
registerAssets(asset) {
if (!validateFileInput(asset)) {
throw new ValidationError("Invalid file input provided to workflow", { asset });
}
if (isRemoteFileInput(asset)) {
throw new ValidationError("Remote file input doesn't need to be registered", { asset });
}
const assetKey = `asset_${this.assetIndex++}`;
this.assets.set(assetKey, asset);
return assetKey;
}
/**
* Adds a file part to the workflow
*/
addFilePart(file, options, actions) {
this.ensureNotExecuted();
let fileField;
if (isRemoteFileInput(file)) {
fileField = { url: typeof file === "string" ? file : file.url };
} else {
fileField = this.registerAssets(file);
}
const processedActions = actions ? actions.map((action) => this.processAction(action)) : void 0;
const filePart = {
file: fileField,
...options,
...processedActions && processedActions.length > 0 ? { actions: processedActions } : {}
};
this.buildInstructions.parts.push(filePart);
return this;
}
/**
* Adds an HTML part to the workflow
*/
addHtmlPart(html, assets, options, actions) {
this.ensureNotExecuted();
let htmlField;
if (isRemoteFileInput(html)) {
htmlField = { url: typeof html === "string" ? html : html.url };
} else {
htmlField = this.registerAssets(html);
}
let assetsField;
if (assets) {
assetsField = [];
for (const asset of assets) {
if (isRemoteFileInput(asset)) {
throw new ValidationError("Assets file input cannot be an URL", { input: asset });
}
const asset_key = this.registerAssets(asset);
assetsField.push(asset_key);
}
}
const processedActions = actions ? actions.map((action) => this.processAction(action)) : void 0;
const htmlPart = {
html: htmlField,
assets: assetsField,
...options,
...processedActions && processedActions.length > 0 ? { actions: processedActions } : {}
};
this.buildInstructions.parts.push(htmlPart);
return this;
}
/**
* Adds a new page part to the workflow
*/
addNewPage(options, actions) {
this.ensureNotExecuted();
const processedActions = actions ? actions.map((action) => this.processAction(action)) : void 0;
const newPagePart = {
page: "new",
...options,
...processedActions && processedActions.length > 0 ? { actions: processedActions } : {}
};
this.buildInstructions.parts.push(newPagePart);
return this;
}
/**
* Adds a document part by document ID
*/
addDocumentPart(documentId, options, actions) {
this.ensureNotExecuted();
const { layer, ...documentOptions } = options ?? {};
const processedActions = actions ? actions.map((action) => this.processAction(action)) : void 0;
const documentPart = {
document: { id: documentId, ...layer && { layer } },
...documentOptions,
...processedActions && processedActions.length > 0 ? { actions: processedActions } : {}
};
this.buildInstructions.parts.push(documentPart);
return this;
}
/**
* Processes an action, registering files if needed
*/
processAction(action) {
if (this.isActionWithFileInput(action)) {
let fileHandle;
if (isRemoteFileInput(action.fileInput)) {
fileHandle = {
url: typeof action.fileInput === "string" ? action.fileInput : action.fileInput.url
};
} else {
fileHandle = this.registerAssets(action.fileInput);
}
return action.createAction(fileHandle);
}
return action;
}
/**
* Type guard to check if action needs file registration
*/
isActionWithFileInput(action) {
return typeof action === "object" && action !== null && "__needsFileRegistration" in action;
}
/**
* Applies actions to the entire document
*/
applyActions(actions) {
var _a;
this.ensureNotExecuted();
(_a = this.buildInstructions).actions ?? (_a.actions = []);
const processedActions = actions.map((action) => this.processAction(action));
this.buildInstructions.actions.push(...processedActions);
return this;
}
/**
* Applies a single action to the entire document
*/
applyAction(action) {
return this.applyActions([action]);
}
/**
* Sets the output configuration
*/
output(output) {
this.ensureNotExecuted();
this.buildInstructions.output = output;
return this;
}
/**
* Sets PDF output
*/
outputPdf(options) {
this.output(BuildOutputs.pdf(options));
return this;
}
/**
* Sets PDF/A output
*/
outputPdfA(options) {
this.output(BuildOutputs.pdfa(options));
return this;
}
/**
* Sets PDF/UA output
*/
outputPdfUa(options) {
this.output(BuildOutputs.pdfua(options));
return this;
}
/**
* Sets image output
*/
outputImage(format, options) {
if (!options?.dpi && !options?.height && !options?.width) {
throw new ValidationError(
"Image output requires at least one of the following options: dpi, height, width"
);
}
this.output(BuildOutputs.image(format, options));
return this;
}
/**
* Sets Office format output
*/
outputOffice(format) {
this.output(BuildOutputs.office(format));
return this;
}
/**
* Sets HTML output
*/
outputHtml(layout) {
this.output(BuildOutputs.html(layout));
return this;
}
/**
* Set Markdown output
*/
outputMarkdown() {
this.output(BuildOutputs.markdown());
return this;
}
/**
* Sets JSON content extraction output
*/
outputJson(options) {
this.output(BuildOutputs.jsonContent(options));
return this;
}
/**
* Validates the workflow before execution
*/
validate() {
var _a;
if (this.buildInstructions.parts.length === 0) {
throw new ValidationError("Workflow has no parts to execute");
}
(_a = this.buildInstructions).output ?? (_a.output = { type: "pdf" });
}
/**
* Ensures the workflow hasn't been executed
*/
ensureNotExecuted() {
if (this.isExecuted) {
throw new ValidationError(
"This workflow has already been executed. Create a new workflow builder for additional operations."
);
}
}
/**
* Prepares files for the request
*/
async prepareFiles() {
const requestFiles = /* @__PURE__ */ new Map();
const processedEntries = await Promise.all(
Array.from(this.assets.entries()).map(async ([key, value]) => {
const normalizedFileData = await processFileInput(value);
return [key, normalizedFileData];
})
);
for (const [key, data] of processedEntries) {
requestFiles.set(key, data);
}
return requestFiles;
}
/**
* Cleans up resources after execution
*/
cleanup() {
this.assets.clear();
this.assetIndex = 0;
this.currentStep = 0;
this.isExecuted = true;
}
/**
* Executes the workflow
*/
async execute(options) {
this.ensureNotExecuted();
this.currentStep = 0;
const result = {
success: false,
errors: []
};
try {
this.currentStep = 1;
options?.onProgress?.(this.currentStep, 3);
this.validate();
this.currentStep = 2;
options?.onProgress?.(this.currentStep, 3);
const outputConfig = this.buildInstructions.output;
if (!outputConfig || !outputConfig.type) {
throw new Error("Output configuration is required");
}
const files = await this.prepareFiles();
let responseType = "arraybuffer";
if (outputConfig.type === "json-content") {
responseType = "json";
} else if (["html", "markdown"].includes(outputConfig.type)) {
responseType = "text";
}
const response = await this.sendRequest(
"/build",
{
instructions: this.buildInstructions,
files
},
responseType
);
this.currentStep = 3;
options?.onProgress?.(this.currentStep, 3);
if (outputConfig.type === "json-content") {
result.success = true;
result.output = {
data: response
};
} else if (["html", "markdown"].includes(outputConfig.type)) {
const { mimeType, filename } = BuildOutputs.getMimeTypeForOutput(outputConfig);
result.success = true;
result.output = {
content: response,
mimeType,
filename
};
} else {
const { mimeType, filename } = BuildOutputs.getMimeTypeForOutput(outputConfig);
const buffer = new Uint8Array(response);
result.success = true;
result.output = {
buffer,
mimeType,
filename
};
}
} catch (error) {
result.errors?.push({
step: this.currentStep,
error: NutrientError.wrap(error, `Workflow failed at step ${this.currentStep}`)
});
} finally {
this.cleanup();
}
return result;
}
/**
* Performs a dry run to analyze the workflow
*/
async dryRun() {
this.ensureNotExecuted();
const result = {
success: false,
errors: []
};
try {
this.validate();
const response = await this.sendRequest(
"/analyze_build",
{
instructions: this.buildInstructions
},
"json"
);
result.success = true;
result.analysis = response;
} catch (error) {
result.errors?.push({
step: 0,
error: NutrientError.wrap(error, "Dry run failed")
});
}
return result;
}
};
// src/builders/staged-workflow.ts
var StagedWorkflowBuilder = class {
constructor(clientOptions) {
this.builder = new WorkflowBuilder(clientOptions);
}
/**
* Adds a file part to the workflow.
*
* @param file - The file to add to the workflow. Can be a local file path, Buffer, or URL.
* @param options - Additional options for the file part.
* @param actions - Actions to apply to the file part.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Add a PDF file from a local path
* workflow.addFilePart('/path/to/document.pdf');
*
* @example
* // Add a file with options and actions
* workflow.addFilePart(
* '/path/to/document.pdf',
* { pages: { start: 1, end: 3 } },
* [BuildActions.watermarkText('CONFIDENTIAL')]
* );
*/
addFilePart(file, options, actions) {
this.builder.addFilePart(file, options, actions);
return this;
}
/**
* Adds an HTML part to the workflow.
*
* @param html - The HTML content to add. Can be a file path, Buffer, or URL.
* @param assets - Optional array of assets (CSS, images, etc.) to include with the HTML. Only local files or Buffers are supported (not URLs).
* @param options - Additional options for the HTML part.
* @param actions - Actions to apply to the HTML part.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Add HTML content from a file
* workflow.addHtmlPart('/path/to/content.html');
*
* @example
* // Add HTML with assets and options
* workflow.addHtmlPart(
* '/path/to/content.html',
* ['/path/to/style.css', '/path/to/image.png'],
* { layout: { size: 'A4' } }
* );
*/
addHtmlPart(html, assets, options, actions) {
this.builder.addHtmlPart(html, assets, options, actions);
return this;
}
/**
* Adds a new blank page to the workflow.
*
* @param options - Additional options for the new page, such as page size, orientation, etc.
* @param actions - Actions to apply to the new page.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Add a simple blank page
* workflow.addNewPage();
*
* @example
* // Add a new page with specific options
* workflow.addNewPage(
* { layout: { size: 'A4', orientation: 'portrait' } }
* );
*/
addNewPage(options, actions) {
this.builder.addNewPage(options, actions);
return this;
}
/**
* Adds a document part to the workflow by referencing an existing document by ID.
*
* @param documentId - The ID of the document to add to the workflow.
* @param options - Additional options for the document part.
* @param options.layer - Optional layer name to select a specific layer from the document.
* @param actions - Actions to apply to the document part.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Add a document by ID
* workflow.addDocumentPart('doc_12345abcde');
*
* @example
* // Add a document with a specific layer and options
* workflow.addDocumentPart(
* 'doc_12345abcde',
* {
* layer: 'content',
* pages: { start: 0, end: 3 }
* }
* );
*/
addDocumentPart(documentId, options, actions) {
this.builder.addDocumentPart(documentId, options, actions);
return this;
}
// Action methods
/**
* Applies multiple actions to the workflow.
*
* @param actions - An array of actions to apply to the workflow.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Apply multiple actions to the workflow
* workflow.applyActions([
* BuildActions.watermarkText('DRAFT', { opacity: 0.5 }),
* BuildActions.ocr('eng'),
* BuildActions.flatten()
* ]);
*/
applyActions(actions) {
this.builder.applyActions(actions);
return this;
}
/**
* Applies a single action to the workflow.
*
* @param action - The action to apply to the workflow.
* @returns The workflow builder instance for method chaining.
*
* @example
* // Apply a watermark action
* workflow.applyAction(
* BuildActions.watermarkText('CONFIDENTIAL', {
* opacity: 0.3,
* rotation: 45
* })
* );
*
* @example
* // Apply an OCR action
* workflow.applyAction(BuildActions.ocr('eng'));
*/
applyAction(action) {
this.builder.applyAction(action);
return this;
}
// Output methods
/**
* Sets the output format to PDF.
*
* @param options - Additional options for PDF output, such as compression, encryption, etc.
* @param options.metadata - Document metadata properties like title and author.
* @param options.labels - Custom labels to add to the document for organization and categorization.
* @param options.userPassword - Password required to open the document. When set, the PDF will be encrypted.
* @param options.ownerPassword - Password required to modify the document. Provides additional security beyond the user password.
* @param options.userPermissions - Array of permissions granted to users who open the document with the user password.
* Options include: "printing", "modification", "content-copying", "annotation", "form-filling", etc.
* @param options.optimize - PDF optimization settings to reduce file size and improve performance.
* @param options.optimize.mrcCompression - When true, applies Mixed Raster Content compression to reduce file size.
* @param options.optimize.imageOptimizationQuality - Controls the quality of image optimization (1-5, where 1 is highest quality).
* @returns The workflow builder instance for method chaining.
*
* @example
* // Set output format to PDF with default options
* workflow.outputPdf();
*
* @example
* // Set output format to PDF with specific options
* workflow.outputPdf({
* userPassword: 'secret',
* userPermissions: ["printing"],
* metadata: {
* title: 'Important Document',
* author: 'Document System'
* },
* optimize: {
* mrcCompression: true,
* imageOptimizationQuality: 3
* }
* });
*/
outputPdf(options) {
this.builder.outputPdf(options);
return this;
}
/**
* Sets the output format to PDF/A (archival PDF).
*
* @param options - Additional options for PDF/A output.
* @param options.conformance - The PDF/A conformance level to target. Options include 'pdfa-1b', 'pdfa-1a', 'pdfa-2b', 'pdfa-2a', 'pdfa-3b', 'pdfa-3a'.
* Different levels have different requirements for long-term archiving.
* @param options.vectorization - When true, attempts to convert raster content to vector graphics where possible, improving quality and reducing file size.
* @param options.rasterization - When true, converts vector graphics to raster images, which can help with compatibility in some cases.
* @param options.metadata - Document metadata properties like title and author. Metadata is important for archival documents.
* @param options.labels - Custom labels to add to the document for organization and categorization.
* @param options.userPassword - Password required to open the document. When set, the PDF will be encrypted.
* @param options.ownerPassword - Password required to modify the document. Provides additional security beyond the user password.
* @param options.userPermissions - Array of permissions granted to users who open the document with the user password.
* Options include: "printing", "modification", "content-copying", "annotation", "form-filling", etc.
* @param options.optimize - PDF optimization settings to reduce file size and improve performance.
* @param options.optimize.mrcCompression - When true, applies Mixed Raster Content compression to reduce file size.
* @param options.optimize.imageOptimizationQuality - Controls the quality of image optimization (1-5, where 1 is highest quality).
* @returns The workflow builder instance for method chaining.
*
* @example
* // Set output format to PDF/A with default options
* workflow.outputPdfA();
*
* @example
* // Set output format to PDF/A with specific options
* workflow.outputPdfA({
* conformance: 'pdfa-2b',
* vectorization: true,
* metadata: {
* title: 'Archive Document',
* author: 'Document System'
* },
* optimize: {
* mrcCompression: true
* }
* });
*/
outputPdfA(options) {
this.builder.outputPdfA(options);
return this;
}
/**
* Sets the output format to PDF/UA (Universal Accessibility).
*
* @param options - Additional options for PDF/UA output.
* @param options.metadata - Document