UNPKG

pdfvector

Version:

Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases

416 lines 15.7 kB
import type { AcademicFetchResponse, AcademicSearchProvider, AcademicSearchPublicationField, AcademicSearchResponse, AskResponse, ExtractResponse, ParseResponse } from "./types.js"; export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js"; export type { AcademicFetchError, AcademicFetchResponse, AcademicFetchResult, AcademicSearchArxivData, AcademicSearchAuthor, AcademicSearchBasePublication, AcademicSearchEricData, AcademicSearchGoogleScholarData, AcademicSearchProvider, AcademicSearchProviderData, AcademicSearchProviderError, AcademicSearchPublication, AcademicSearchPublicationField, AcademicSearchPubMedData, AcademicSearchResponse, AcademicSearchSemanticScholarData, AskResponse, ExtractResponse, ParseResponse, } from "./types.js"; /** * Base interface for parse request parameters */ export interface ParseBaseRequest { /** Whether to use LLM parsing. Defaults to "auto" */ useLLM?: "auto" | "always" | "never"; } /** * Request parameters for parsing a PDF/Word document from URL */ export interface ParseURLRequest extends ParseBaseRequest { /** Direct URL to a PDF/Word document that will be parsed and converted to markdown */ url: string; } /** * Request parameters for parsing a PDF/Word document from data */ export interface ParseDataRequest extends ParseBaseRequest { /** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */ data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream; /** Content type of the document (e.g., "application/pdf", "application/msword") */ contentType: string; } /** * Base interface for ask request parameters */ export interface AskBaseRequest { /** The question or prompt you want to ask about the document */ prompt: string; } /** * Request parameters for asking questions about a document from URL */ export interface AskURLRequest extends AskBaseRequest { /** Direct URL to a PDF/Word document that will be analyzed */ url: string; } /** * Request parameters for asking questions about a document from data */ export interface AskDataRequest extends AskBaseRequest { /** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */ data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream; /** Content type of the document (e.g., "application/pdf", "application/msword") */ contentType: string; } /** * Base interface for extract request parameters */ export interface ExtractBaseRequest { /** Instructions for extracting structured data from the document */ prompt: string; /** JSON Schema object that defines the structure of the expected output. Should follow the JSON Schema specification and include additionalProperties property */ schema: Record<string, unknown>; } /** * Request parameters for extracting structured data from a document from URL */ export interface ExtractURLRequest extends ExtractBaseRequest { /** Direct URL to a PDF/Word document that will be analyzed */ url: string; } /** * Request parameters for extracting structured data from a document from data */ export interface ExtractDataRequest extends ExtractBaseRequest { /** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */ data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream; /** Content type of the document (e.g., "application/pdf", "application/msword") */ contentType: string; } /** * Configuration for the PDF Vector client */ export interface PDFVectorConfig { /** API key for authentication (format: pdfvector_xxx) */ apiKey: string; /** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */ baseUrl?: string; } /** * Request parameters for searching academic publications */ export interface SearchRequest { /** Search query string */ query: string; /** Providers to search (defaults to ["semantic-scholar"]) */ providers?: AcademicSearchProvider[]; /** Number of results to skip (for pagination, defaults to 0) */ offset?: number; /** Maximum number of results to return (1-100, defaults to 20) */ limit?: number; /** Filter results by publication year (from) */ yearFrom?: number; /** Filter results by publication year (to) */ yearTo?: number; /** List of fields to include in the response. If not specified, all base publication fields are returned. Use 'providerData' to include provider-specific metadata. */ fields?: AcademicSearchPublicationField[]; } /** * Request parameters for fetching specific academic publications by ID */ export interface FetchRequest { /** Array of IDs (strings with auto-detection) */ ids: string[]; /** List of fields to include in the response. If not specified, all base publication fields are returned. Use 'providerData' to include provider-specific metadata. */ fields?: AcademicSearchPublicationField[]; } /** * Custom error class for PDF Vector API errors */ export declare class PDFVectorError extends Error { /** HTTP status code */ status?: number | undefined; /** Custom error code */ code?: string | undefined; constructor(message: string, /** HTTP status code */ status?: number | undefined, /** Custom error code */ code?: string | undefined); } /** * PDF Vector TypeScript/JavaScript SDK client * * @example * Parse from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * url: "https://example.com/document.pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Parse from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * data: await readFile("document.pdf"), * contentType: "application/pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Ask questions about documents: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * url: "https://example.com/research-paper.pdf", * prompt: "What are the main findings and methodology?" * }); * console.log(result.markdown); * console.log(`Cost: ${result.creditCount} credits`); * ``` * * Extract structured data: * ```typescript * const result = await client.extract({ * url: "https://example.com/invoice.pdf", * prompt: "Extract invoice details", * schema: { * type: "object", * properties: { * invoiceNumber: { type: "string" }, * date: { type: "string" }, * totalAmount: { type: "number" }, * items: { * type: "array", * items: { * type: "object", * properties: { * description: { type: "string" }, * quantity: { type: "number" }, * price: { type: "number" } * } * } * } * }, * required: ["invoiceNumber", "date", "totalAmount", "items"], * additionalProperties: false * } * }); * console.log(result.data); * ``` * * Search academic publications: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const results = await client.academicSearch({ * query: "machine learning transformers", * providers: ["semantic-scholar", "arxiv"], * limit: 10, * yearFrom: 2020 * }); * console.log(results.results); * ``` */ export declare class PDFVector { /** API key for authentication (format: pdfvector_xxx) */ readonly apiKey: string; /** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */ readonly baseUrl: string; /** * Creates a new PDF Vector client * @param config Configuration object containing API key and optional base URL */ constructor(config: PDFVectorConfig); /** * Handle API response errors (internal) * @private */ private handleResponseError; /** * Convert data to base64 string (internal) * @private */ private dataToBase64; /** * Parse a PDF/Word document from URL or data and convert to markdown * @param request Parse request parameters (URL or data) * @returns Promise resolving to parsed document details * @throws {PDFVectorError} When the API request fails */ parse(request: ParseURLRequest | ParseDataRequest): Promise<ParseResponse>; /** * Ask questions about a PDF/Word document and get AI-powered answers in natural language * @param request Ask request parameters (URL or data with prompt) * @returns Promise resolving to AI-generated answer about the document in markdown format * @throws {PDFVectorError} When the API request fails * * @example * Ask from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * url: "https://example.com/research-paper.pdf", * prompt: "What are the key findings in this research?" * }); * console.log(result.markdown); * console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`); * ``` * * @example * Ask from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.ask({ * data: await readFile("document.pdf"), * contentType: "application/pdf", * prompt: "Summarize the main points of this document" * }); * console.log(result.markdown); * ``` */ ask(request: AskURLRequest | AskDataRequest): Promise<AskResponse>; /** * Extract structured data from a PDF/Word document based on a JSON Schema * @param request Extract request parameters (URL or data with prompt and schema) * @returns Promise resolving to structured data matching the provided schema * @throws {PDFVectorError} When the API request fails * * @example * Extract from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.extract({ * url: "https://example.com/invoice.pdf", * prompt: "Extract invoice details from this document", * schema: { * type: "object", * properties: { * invoiceNumber: { type: "string" }, * date: { type: "string" }, * totalAmount: { type: "number" }, * items: { * type: "array", * items: { * type: "object", * properties: { * description: { type: "string" }, * quantity: { type: "number" }, * price: { type: "number" } * } * } * } * }, * required: ["invoiceNumber", "date", "totalAmount", "items"], * additionalProperties: false * } * }); * console.log(result.data); * console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`); * ``` * * @example * Extract from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.extract({ * data: await readFile("research-paper.pdf"), * contentType: "application/pdf", * prompt: "Extract the title, authors, abstract, and key findings", * schema: { * type: "object", * properties: { * title: { type: "string" }, * authors: { type: "array", items: { type: "string" } }, * abstract: { type: "string" }, * keyFindings: { type: "array", items: { type: "string" } }, * publicationDate: { type: "string" } * }, * required: ["title", "authors", "abstract", "keyFindings"], * additionalProperties: false * } * }); * console.log(result.data); * ``` */ extract(request: ExtractURLRequest | ExtractDataRequest): Promise<ExtractResponse>; /** * Search academic publications across multiple databases * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails * * @example * ```typescript * const results = await client.academicSearch({ * query: "machine learning", * providers: ["semantic-scholar", "arxiv"], * limit: 20, * fields: ["title", "authors", "abstract", "year"] * }); * ``` */ academicSearch(request: SearchRequest): Promise<AcademicSearchResponse>; /** * Search academic publications (alias for academicSearch) * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails */ search(request: SearchRequest): Promise<AcademicSearchResponse>; /** * Fetch specific academic publications by their IDs with auto-detection * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails * * @example * Auto-detection with mixed ID formats: * ```typescript * const results = await client.academicFetch({ * ids: [ * "10.1038/nature12373", // DOI auto-detected * "12345678", // PubMed ID auto-detected * "2301.00001", // ArXiv ID auto-detected * "arXiv:2507.16298v1", // ArXiv with prefix * "ED123456", // ERIC ID auto-detected * "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar * "pubmed:98765432", // PDFVector format * "some-unknown-id" // Will try all providers * ] * }); * ``` * * @example * Fetching specific fields only: * ```typescript * const results = await client.academicFetch({ * ids: ["10.1038/nature12373", "pubmed:12345678"], * fields: ["title", "authors", "year", "abstract"] * }); * ``` * * @example * Handling results: * ```typescript * // Successfully fetched publications * results.results.forEach(pub => { * console.log(`Fetched: ${pub.title}`); * console.log(`Provider: ${pub.detectedProvider}`); * console.log(`Requested as: ${pub.id}`); * }); * * // Handle errors for IDs that couldn't be fetched * results.errors?.forEach(error => { * console.log(`Failed to fetch ${error.id}: ${error.error}`); * if (error.code === "NOT_FOUND") { * console.log("Publication not found in any provider"); * } * }); * ``` */ academicFetch(request: FetchRequest): Promise<AcademicFetchResponse>; /** * Fetch specific academic publications by their IDs (alias for academicFetch) * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails */ fetch(request: FetchRequest): Promise<AcademicFetchResponse>; } //# sourceMappingURL=index.d.ts.map