UNPKG

pdfvector

Version:

Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases

483 lines 16.4 kB
// Re-export constants for convenience export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js"; /** * Custom error class for PDF Vector API errors */ export class PDFVectorError extends Error { status; code; constructor(message, /** HTTP status code */ status, /** Custom error code */ code) { super(message); this.status = status; this.code = code; this.name = "PDFVectorError"; } } /** * PDF Vector TypeScript/JavaScript SDK client * * @example * Parse from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * url: "https://example.com/document.pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Parse from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * data: await readFile("document.pdf"), * contentType: "application/pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Ask questions about documents: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * url: "https://example.com/research-paper.pdf", * prompt: "What are the main findings and methodology?" * }); * console.log(result.markdown); * console.log(`Cost: ${result.creditCount} credits`); * ``` * * Extract structured data: * ```typescript * const result = await client.extract({ * url: "https://example.com/invoice.pdf", * prompt: "Extract invoice details", * schema: { * type: "object", * properties: { * invoiceNumber: { type: "string" }, * date: { type: "string" }, * totalAmount: { type: "number" }, * items: { * type: "array", * items: { * type: "object", * properties: { * description: { type: "string" }, * quantity: { type: "number" }, * price: { type: "number" } * } * } * } * }, * required: ["invoiceNumber", "date", "totalAmount", "items"], * additionalProperties: false * } * }); * console.log(result.data); * ``` * * Search academic publications: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const results = await client.academicSearch({ * query: "machine learning transformers", * providers: ["semantic-scholar", "arxiv"], * limit: 10, * yearFrom: 2020 * }); * console.log(results.results); * ``` */ export class PDFVector { /** API key for authentication (format: pdfvector_xxx) */ apiKey; /** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */ baseUrl; /** * Creates a new PDF Vector client * @param config Configuration object containing API key and optional base URL */ constructor(config) { this.apiKey = config.apiKey; this.baseUrl = config.baseUrl ?? "https://www.pdfvector.com"; } /** * Handle API response errors (internal) * @private */ async handleResponseError(response) { const errorText = await response.text(); let errorMessage = `HTTP ${response.status}: ${response.statusText}`; let errorCode; try { const errorData = JSON.parse(errorText); if (errorData.message) errorMessage = errorData.message; if (errorData.code) errorCode = errorData.code; } catch { if (errorText) errorMessage = errorText; } throw new PDFVectorError(errorMessage, response.status, errorCode); } /** * Convert data to base64 string (internal) * @private */ async dataToBase64(data) { // If already a base64 string, return as is if (typeof data === "string") return data; // Convert Buffer to base64 if (Buffer.isBuffer(data)) return data.toString("base64"); // Convert Uint8Array to base64 if (data instanceof Uint8Array) return Buffer.from(data).toString("base64"); // Convert ArrayBuffer to base64 if (data instanceof ArrayBuffer) return Buffer.from(data).toString("base64"); // Convert Blob to base64 if (typeof Blob !== "undefined" && data instanceof Blob) { const arrayBuffer = await data.arrayBuffer(); return Buffer.from(arrayBuffer).toString("base64"); } // Convert ReadableStream to base64 if (typeof ReadableStream !== "undefined" && data instanceof ReadableStream) { const response = new Response(data); const arrayBuffer = await response.arrayBuffer(); return Buffer.from(arrayBuffer).toString("base64"); } throw new PDFVectorError("Unsupported data type for conversion to base64"); } /** * Parse a PDF/Word document from URL or data and convert to markdown * @param request Parse request parameters (URL or data) * @returns Promise resolving to parsed document details * @throws {PDFVectorError} When the API request fails */ async parse(request) { const url = `${this.baseUrl}/v1/api/parse`; let body; // Handle data input if ("data" in request) { const base64Data = await this.dataToBase64(request.data); body = { file: base64Data, useLLM: request.useLLM, }; } else { // Handle URL input body = request; } const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(body), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Ask questions about a PDF/Word document and get AI-powered answers in natural language * @param request Ask request parameters (URL or data with prompt) * @returns Promise resolving to AI-generated answer about the document in markdown format * @throws {PDFVectorError} When the API request fails * * @example * Ask from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * url: "https://example.com/research-paper.pdf", * prompt: "What are the key findings in this research?" * }); * console.log(result.markdown); * console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`); * ``` * * @example * Ask from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * data: await readFile("document.pdf"), * contentType: "application/pdf", * prompt: "Summarize the main points of this document" * }); * console.log(result.markdown); * ``` */ async ask(request) { const url = `${this.baseUrl}/v1/api/ask`; let body; // Handle data input if ("data" in request) { const base64Data = await this.dataToBase64(request.data); body = { file: base64Data, prompt: request.prompt, }; } else { // Handle URL input body = request; } const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(body), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Extract structured data from a PDF/Word document based on a JSON Schema * @param request Extract request parameters (URL or data with prompt and schema) * @returns Promise resolving to structured data matching the provided schema * @throws {PDFVectorError} When the API request fails * * @example * Extract from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.extract({ * url: "https://example.com/invoice.pdf", * prompt: "Extract invoice details from this document", * schema: { * type: "object", * properties: { * invoiceNumber: { type: "string" }, * date: { type: "string" }, * totalAmount: { type: "number" }, * items: { * type: "array", * items: { * type: "object", * properties: { * description: { type: "string" }, * quantity: { type: "number" }, * price: { type: "number" } * } * } * } * }, * required: ["invoiceNumber", "date", "totalAmount", "items"], * additionalProperties: false * } * }); * console.log(result.data); * console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`); * ``` * * @example * Extract from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.extract({ * data: await readFile("research-paper.pdf"), * contentType: "application/pdf", * prompt: "Extract the title, authors, abstract, and key findings", * schema: { * type: "object", * properties: { * title: { type: "string" }, * authors: { type: "array", items: { type: "string" } }, * abstract: { type: "string" }, * keyFindings: { type: "array", items: { type: "string" } }, * publicationDate: { type: "string" } * }, * required: ["title", "authors", "abstract", "keyFindings"], * additionalProperties: false * } * }); * console.log(result.data); * ``` */ async extract(request) { const url = `${this.baseUrl}/v1/api/extract`; let body; // Handle data input if ("data" in request) { const base64Data = await this.dataToBase64(request.data); body = { file: base64Data, prompt: request.prompt, schema: request.schema, }; } else { // Handle URL input body = request; } const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(body), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Search academic publications across multiple databases * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails * * @example * ```typescript * const results = await client.academicSearch({ * query: "machine learning", * providers: ["semantic-scholar", "arxiv"], * limit: 20, * fields: ["title", "authors", "abstract", "year"] * }); * ``` */ async academicSearch(request) { const url = `${this.baseUrl}/v1/api/academic-search`; // Prepare request body const body = { query: request.query, providers: request.providers?.length ? request.providers : ["semantic-scholar"], offset: request.offset ?? 0, limit: request.limit ?? 20, yearFrom: request.yearFrom, yearTo: request.yearTo, fields: request.fields, }; // Remove undefined values from body const cleanBody = Object.fromEntries(Object.entries(body).filter(([, value]) => value !== undefined)); const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(cleanBody), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Search academic publications (alias for academicSearch) * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails */ async search(request) { return this.academicSearch(request); } /** * Fetch specific academic publications by their IDs with auto-detection * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails * * @example * Auto-detection with mixed ID formats: * ```typescript * const results = await client.academicFetch({ * ids: [ * "10.1038/nature12373", // DOI auto-detected * "12345678", // PubMed ID auto-detected * "2301.00001", // ArXiv ID auto-detected * "arXiv:2507.16298v1", // ArXiv with prefix * "ED123456", // ERIC ID auto-detected * "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar * "pubmed:98765432", // PDFVector format * "some-unknown-id" // Will try all providers * ] * }); * ``` * * @example * Fetching specific fields only: * ```typescript * const results = await client.academicFetch({ * ids: ["10.1038/nature12373", "pubmed:12345678"], * fields: ["title", "authors", "year", "abstract"] * }); * ``` * * @example * Handling results: * ```typescript * // Successfully fetched publications * results.results.forEach(pub => { * console.log(`Fetched: ${pub.title}`); * console.log(`Provider: ${pub.detectedProvider}`); * console.log(`Requested as: ${pub.id}`); * }); * * // Handle errors for IDs that couldn't be fetched * results.errors?.forEach(error => { * console.log(`Failed to fetch ${error.id}: ${error.error}`); * if (error.code === "NOT_FOUND") { * console.log("Publication not found in any provider"); * } * }); * ``` */ async academicFetch(request) { const url = `${this.baseUrl}/v1/api/academic-fetch`; const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(request), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Fetch specific academic publications by their IDs (alias for academicFetch) * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails */ async fetch(request) { return this.academicFetch(request); } } //# sourceMappingURL=index.js.map