pdfvector
Version:
Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases
416 lines • 15.7 kB
TypeScript
import type { AcademicFetchResponse, AcademicSearchProvider, AcademicSearchPublicationField, AcademicSearchResponse, AskResponse, ExtractResponse, ParseResponse } from "./types.js";
export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js";
export type { AcademicFetchError, AcademicFetchResponse, AcademicFetchResult, AcademicSearchArxivData, AcademicSearchAuthor, AcademicSearchBasePublication, AcademicSearchEricData, AcademicSearchGoogleScholarData, AcademicSearchProvider, AcademicSearchProviderData, AcademicSearchProviderError, AcademicSearchPublication, AcademicSearchPublicationField, AcademicSearchPubMedData, AcademicSearchResponse, AcademicSearchSemanticScholarData, AskResponse, ExtractResponse, ParseResponse, } from "./types.js";
/**
* Base interface for parse request parameters
*/
export interface ParseBaseRequest {
/** Whether to use LLM parsing. Defaults to "auto" */
useLLM?: "auto" | "always" | "never";
}
/**
* Request parameters for parsing a PDF/Word document from URL
*/
export interface ParseURLRequest extends ParseBaseRequest {
/** Direct URL to a PDF/Word document that will be parsed and converted to markdown */
url: string;
}
/**
* Request parameters for parsing a PDF/Word document from data
*/
export interface ParseDataRequest extends ParseBaseRequest {
/** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */
data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream;
/** Content type of the document (e.g., "application/pdf", "application/msword") */
contentType: string;
}
/**
* Base interface for ask request parameters
*/
export interface AskBaseRequest {
/** The question or prompt you want to ask about the document */
prompt: string;
}
/**
* Request parameters for asking questions about a document from URL
*/
export interface AskURLRequest extends AskBaseRequest {
/** Direct URL to a PDF/Word document that will be analyzed */
url: string;
}
/**
* Request parameters for asking questions about a document from data
*/
export interface AskDataRequest extends AskBaseRequest {
/** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */
data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream;
/** Content type of the document (e.g., "application/pdf", "application/msword") */
contentType: string;
}
/**
* Base interface for extract request parameters
*/
export interface ExtractBaseRequest {
/** Instructions for extracting structured data from the document */
prompt: string;
/** JSON Schema object that defines the structure of the expected output. Should follow the JSON Schema specification and include additionalProperties property */
schema: Record<string, unknown>;
}
/**
* Request parameters for extracting structured data from a document from URL
*/
export interface ExtractURLRequest extends ExtractBaseRequest {
/** Direct URL to a PDF/Word document that will be analyzed */
url: string;
}
/**
* Request parameters for extracting structured data from a document from data
*/
export interface ExtractDataRequest extends ExtractBaseRequest {
/** Direct data of the document - supports string, Buffer, Uint8Array, ArrayBuffer, Blob, or ReadableStream */
data: string | Buffer | Uint8Array | ArrayBuffer | Blob | ReadableStream;
/** Content type of the document (e.g., "application/pdf", "application/msword") */
contentType: string;
}
/**
* Configuration for the PDF Vector client
*/
export interface PDFVectorConfig {
/** API key for authentication (format: pdfvector_xxx) */
apiKey: string;
/** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */
baseUrl?: string;
}
/**
* Request parameters for searching academic publications
*/
export interface SearchRequest {
/** Search query string */
query: string;
/** Providers to search (defaults to ["semantic-scholar"]) */
providers?: AcademicSearchProvider[];
/** Number of results to skip (for pagination, defaults to 0) */
offset?: number;
/** Maximum number of results to return (1-100, defaults to 20) */
limit?: number;
/** Filter results by publication year (from) */
yearFrom?: number;
/** Filter results by publication year (to) */
yearTo?: number;
/** List of fields to include in the response. If not specified, all base publication fields are returned. Use 'providerData' to include provider-specific metadata. */
fields?: AcademicSearchPublicationField[];
}
/**
* Request parameters for fetching specific academic publications by ID
*/
export interface FetchRequest {
/** Array of IDs (strings with auto-detection) */
ids: string[];
/** List of fields to include in the response. If not specified, all base publication fields are returned. Use 'providerData' to include provider-specific metadata. */
fields?: AcademicSearchPublicationField[];
}
/**
* Custom error class for PDF Vector API errors
*/
export declare class PDFVectorError extends Error {
/** HTTP status code */
status?: number | undefined;
/** Custom error code */
code?: string | undefined;
constructor(message: string,
/** HTTP status code */
status?: number | undefined,
/** Custom error code */
code?: string | undefined);
}
/**
* PDF Vector TypeScript/JavaScript SDK client
*
* @example
* Parse from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* url: "https://example.com/document.pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Parse from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* data: await readFile("document.pdf"),
* contentType: "application/pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Ask questions about documents:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* url: "https://example.com/research-paper.pdf",
* prompt: "What are the main findings and methodology?"
* });
* console.log(result.markdown);
* console.log(`Cost: ${result.creditCount} credits`);
* ```
*
* Extract structured data:
* ```typescript
* const result = await client.extract({
* url: "https://example.com/invoice.pdf",
* prompt: "Extract invoice details",
* schema: {
* type: "object",
* properties: {
* invoiceNumber: { type: "string" },
* date: { type: "string" },
* totalAmount: { type: "number" },
* items: {
* type: "array",
* items: {
* type: "object",
* properties: {
* description: { type: "string" },
* quantity: { type: "number" },
* price: { type: "number" }
* }
* }
* }
* },
* required: ["invoiceNumber", "date", "totalAmount", "items"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* ```
*
* Search academic publications:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const results = await client.academicSearch({
* query: "machine learning transformers",
* providers: ["semantic-scholar", "arxiv"],
* limit: 10,
* yearFrom: 2020
* });
* console.log(results.results);
* ```
*/
export declare class PDFVector {
/** API key for authentication (format: pdfvector_xxx) */
readonly apiKey: string;
/** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */
readonly baseUrl: string;
/**
* Creates a new PDF Vector client
* @param config Configuration object containing API key and optional base URL
*/
constructor(config: PDFVectorConfig);
/**
* Handle API response errors (internal)
* @private
*/
private handleResponseError;
/**
* Convert data to base64 string (internal)
* @private
*/
private dataToBase64;
/**
* Parse a PDF/Word document from URL or data and convert to markdown
* @param request Parse request parameters (URL or data)
* @returns Promise resolving to parsed document details
* @throws {PDFVectorError} When the API request fails
*/
parse(request: ParseURLRequest | ParseDataRequest): Promise<ParseResponse>;
/**
* Ask questions about a PDF/Word document and get AI-powered answers in natural language
* @param request Ask request parameters (URL or data with prompt)
* @returns Promise resolving to AI-generated answer about the document in markdown format
* @throws {PDFVectorError} When the API request fails
*
* @example
* Ask from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* url: "https://example.com/research-paper.pdf",
* prompt: "What are the key findings in this research?"
* });
* console.log(result.markdown);
* console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`);
* ```
*
* @example
* Ask from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* data: await readFile("document.pdf"),
* contentType: "application/pdf",
* prompt: "Summarize the main points of this document"
* });
* console.log(result.markdown);
* ```
*/
ask(request: AskURLRequest | AskDataRequest): Promise<AskResponse>;
/**
* Extract structured data from a PDF/Word document based on a JSON Schema
* @param request Extract request parameters (URL or data with prompt and schema)
* @returns Promise resolving to structured data matching the provided schema
* @throws {PDFVectorError} When the API request fails
*
* @example
* Extract from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.extract({
* url: "https://example.com/invoice.pdf",
* prompt: "Extract invoice details from this document",
* schema: {
* type: "object",
* properties: {
* invoiceNumber: { type: "string" },
* date: { type: "string" },
* totalAmount: { type: "number" },
* items: {
* type: "array",
* items: {
* type: "object",
* properties: {
* description: { type: "string" },
* quantity: { type: "number" },
* price: { type: "number" }
* }
* }
* }
* },
* required: ["invoiceNumber", "date", "totalAmount", "items"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`);
* ```
*
* @example
* Extract from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.extract({
* data: await readFile("research-paper.pdf"),
* contentType: "application/pdf",
* prompt: "Extract the title, authors, abstract, and key findings",
* schema: {
* type: "object",
* properties: {
* title: { type: "string" },
* authors: { type: "array", items: { type: "string" } },
* abstract: { type: "string" },
* keyFindings: { type: "array", items: { type: "string" } },
* publicationDate: { type: "string" }
* },
* required: ["title", "authors", "abstract", "keyFindings"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* ```
*/
extract(request: ExtractURLRequest | ExtractDataRequest): Promise<ExtractResponse>;
/**
* Search academic publications across multiple databases
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*
* @example
* ```typescript
* const results = await client.academicSearch({
* query: "machine learning",
* providers: ["semantic-scholar", "arxiv"],
* limit: 20,
* fields: ["title", "authors", "abstract", "year"]
* });
* ```
*/
academicSearch(request: SearchRequest): Promise<AcademicSearchResponse>;
/**
* Search academic publications (alias for academicSearch)
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*/
search(request: SearchRequest): Promise<AcademicSearchResponse>;
/**
* Fetch specific academic publications by their IDs with auto-detection
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*
* @example
* Auto-detection with mixed ID formats:
* ```typescript
* const results = await client.academicFetch({
* ids: [
* "10.1038/nature12373", // DOI auto-detected
* "12345678", // PubMed ID auto-detected
* "2301.00001", // ArXiv ID auto-detected
* "arXiv:2507.16298v1", // ArXiv with prefix
* "ED123456", // ERIC ID auto-detected
* "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar
* "pubmed:98765432", // PDFVector format
* "some-unknown-id" // Will try all providers
* ]
* });
* ```
*
* @example
* Fetching specific fields only:
* ```typescript
* const results = await client.academicFetch({
* ids: ["10.1038/nature12373", "pubmed:12345678"],
* fields: ["title", "authors", "year", "abstract"]
* });
* ```
*
* @example
* Handling results:
* ```typescript
* // Successfully fetched publications
* results.results.forEach(pub => {
* console.log(`Fetched: ${pub.title}`);
* console.log(`Provider: ${pub.detectedProvider}`);
* console.log(`Requested as: ${pub.id}`);
* });
*
* // Handle errors for IDs that couldn't be fetched
* results.errors?.forEach(error => {
* console.log(`Failed to fetch ${error.id}: ${error.error}`);
* if (error.code === "NOT_FOUND") {
* console.log("Publication not found in any provider");
* }
* });
* ```
*/
academicFetch(request: FetchRequest): Promise<AcademicFetchResponse>;
/**
* Fetch specific academic publications by their IDs (alias for academicFetch)
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*/
fetch(request: FetchRequest): Promise<AcademicFetchResponse>;
}
//# sourceMappingURL=index.d.ts.map