pdfvector
Version:
Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases
483 lines • 16.4 kB
JavaScript
// Re-export constants for convenience
export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js";
/**
* Custom error class for PDF Vector API errors
*/
export class PDFVectorError extends Error {
status;
code;
constructor(message,
/** HTTP status code */
status,
/** Custom error code */
code) {
super(message);
this.status = status;
this.code = code;
this.name = "PDFVectorError";
}
}
/**
* PDF Vector TypeScript/JavaScript SDK client
*
* @example
* Parse from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* url: "https://example.com/document.pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Parse from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* data: await readFile("document.pdf"),
* contentType: "application/pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Ask questions about documents:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* url: "https://example.com/research-paper.pdf",
* prompt: "What are the main findings and methodology?"
* });
* console.log(result.markdown);
* console.log(`Cost: ${result.creditCount} credits`);
* ```
*
* Extract structured data:
* ```typescript
* const result = await client.extract({
* url: "https://example.com/invoice.pdf",
* prompt: "Extract invoice details",
* schema: {
* type: "object",
* properties: {
* invoiceNumber: { type: "string" },
* date: { type: "string" },
* totalAmount: { type: "number" },
* items: {
* type: "array",
* items: {
* type: "object",
* properties: {
* description: { type: "string" },
* quantity: { type: "number" },
* price: { type: "number" }
* }
* }
* }
* },
* required: ["invoiceNumber", "date", "totalAmount", "items"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* ```
*
* Search academic publications:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const results = await client.academicSearch({
* query: "machine learning transformers",
* providers: ["semantic-scholar", "arxiv"],
* limit: 10,
* yearFrom: 2020
* });
* console.log(results.results);
* ```
*/
export class PDFVector {
/** API key for authentication (format: pdfvector_xxx) */
apiKey;
/** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */
baseUrl;
/**
* Creates a new PDF Vector client
* @param config Configuration object containing API key and optional base URL
*/
constructor(config) {
this.apiKey = config.apiKey;
this.baseUrl = config.baseUrl ?? "https://www.pdfvector.com";
}
/**
* Handle API response errors (internal)
* @private
*/
async handleResponseError(response) {
const errorText = await response.text();
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
let errorCode;
try {
const errorData = JSON.parse(errorText);
if (errorData.message)
errorMessage = errorData.message;
if (errorData.code)
errorCode = errorData.code;
}
catch {
if (errorText)
errorMessage = errorText;
}
throw new PDFVectorError(errorMessage, response.status, errorCode);
}
/**
* Convert data to base64 string (internal)
* @private
*/
async dataToBase64(data) {
// If already a base64 string, return as is
if (typeof data === "string")
return data;
// Convert Buffer to base64
if (Buffer.isBuffer(data))
return data.toString("base64");
// Convert Uint8Array to base64
if (data instanceof Uint8Array)
return Buffer.from(data).toString("base64");
// Convert ArrayBuffer to base64
if (data instanceof ArrayBuffer)
return Buffer.from(data).toString("base64");
// Convert Blob to base64
if (typeof Blob !== "undefined" && data instanceof Blob) {
const arrayBuffer = await data.arrayBuffer();
return Buffer.from(arrayBuffer).toString("base64");
}
// Convert ReadableStream to base64
if (typeof ReadableStream !== "undefined" &&
data instanceof ReadableStream) {
const response = new Response(data);
const arrayBuffer = await response.arrayBuffer();
return Buffer.from(arrayBuffer).toString("base64");
}
throw new PDFVectorError("Unsupported data type for conversion to base64");
}
/**
* Parse a PDF/Word document from URL or data and convert to markdown
* @param request Parse request parameters (URL or data)
* @returns Promise resolving to parsed document details
* @throws {PDFVectorError} When the API request fails
*/
async parse(request) {
const url = `${this.baseUrl}/v1/api/parse`;
let body;
// Handle data input
if ("data" in request) {
const base64Data = await this.dataToBase64(request.data);
body = {
file: base64Data,
useLLM: request.useLLM,
};
}
else {
// Handle URL input
body = request;
}
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(body),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Ask questions about a PDF/Word document and get AI-powered answers in natural language
* @param request Ask request parameters (URL or data with prompt)
* @returns Promise resolving to AI-generated answer about the document in markdown format
* @throws {PDFVectorError} When the API request fails
*
* @example
* Ask from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* url: "https://example.com/research-paper.pdf",
* prompt: "What are the key findings in this research?"
* });
* console.log(result.markdown);
* console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`);
* ```
*
* @example
* Ask from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.ask({
* data: await readFile("document.pdf"),
* contentType: "application/pdf",
* prompt: "Summarize the main points of this document"
* });
* console.log(result.markdown);
* ```
*/
async ask(request) {
const url = `${this.baseUrl}/v1/api/ask`;
let body;
// Handle data input
if ("data" in request) {
const base64Data = await this.dataToBase64(request.data);
body = {
file: base64Data,
prompt: request.prompt,
};
}
else {
// Handle URL input
body = request;
}
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(body),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Extract structured data from a PDF/Word document based on a JSON Schema
* @param request Extract request parameters (URL or data with prompt and schema)
* @returns Promise resolving to structured data matching the provided schema
* @throws {PDFVectorError} When the API request fails
*
* @example
* Extract from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.extract({
* url: "https://example.com/invoice.pdf",
* prompt: "Extract invoice details from this document",
* schema: {
* type: "object",
* properties: {
* invoiceNumber: { type: "string" },
* date: { type: "string" },
* totalAmount: { type: "number" },
* items: {
* type: "array",
* items: {
* type: "object",
* properties: {
* description: { type: "string" },
* quantity: { type: "number" },
* price: { type: "number" }
* }
* }
* }
* },
* required: ["invoiceNumber", "date", "totalAmount", "items"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* console.log(`Cost: ${result.creditCount} credits (${result.pageCount} pages)`);
* ```
*
* @example
* Extract from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.extract({
* data: await readFile("research-paper.pdf"),
* contentType: "application/pdf",
* prompt: "Extract the title, authors, abstract, and key findings",
* schema: {
* type: "object",
* properties: {
* title: { type: "string" },
* authors: { type: "array", items: { type: "string" } },
* abstract: { type: "string" },
* keyFindings: { type: "array", items: { type: "string" } },
* publicationDate: { type: "string" }
* },
* required: ["title", "authors", "abstract", "keyFindings"],
* additionalProperties: false
* }
* });
* console.log(result.data);
* ```
*/
async extract(request) {
const url = `${this.baseUrl}/v1/api/extract`;
let body;
// Handle data input
if ("data" in request) {
const base64Data = await this.dataToBase64(request.data);
body = {
file: base64Data,
prompt: request.prompt,
schema: request.schema,
};
}
else {
// Handle URL input
body = request;
}
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(body),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Search academic publications across multiple databases
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*
* @example
* ```typescript
* const results = await client.academicSearch({
* query: "machine learning",
* providers: ["semantic-scholar", "arxiv"],
* limit: 20,
* fields: ["title", "authors", "abstract", "year"]
* });
* ```
*/
async academicSearch(request) {
const url = `${this.baseUrl}/v1/api/academic-search`;
// Prepare request body
const body = {
query: request.query,
providers: request.providers?.length
? request.providers
: ["semantic-scholar"],
offset: request.offset ?? 0,
limit: request.limit ?? 20,
yearFrom: request.yearFrom,
yearTo: request.yearTo,
fields: request.fields,
};
// Remove undefined values from body
const cleanBody = Object.fromEntries(Object.entries(body).filter(([, value]) => value !== undefined));
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(cleanBody),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Search academic publications (alias for academicSearch)
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*/
async search(request) {
return this.academicSearch(request);
}
/**
* Fetch specific academic publications by their IDs with auto-detection
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*
* @example
* Auto-detection with mixed ID formats:
* ```typescript
* const results = await client.academicFetch({
* ids: [
* "10.1038/nature12373", // DOI auto-detected
* "12345678", // PubMed ID auto-detected
* "2301.00001", // ArXiv ID auto-detected
* "arXiv:2507.16298v1", // ArXiv with prefix
* "ED123456", // ERIC ID auto-detected
* "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar
* "pubmed:98765432", // PDFVector format
* "some-unknown-id" // Will try all providers
* ]
* });
* ```
*
* @example
* Fetching specific fields only:
* ```typescript
* const results = await client.academicFetch({
* ids: ["10.1038/nature12373", "pubmed:12345678"],
* fields: ["title", "authors", "year", "abstract"]
* });
* ```
*
* @example
* Handling results:
* ```typescript
* // Successfully fetched publications
* results.results.forEach(pub => {
* console.log(`Fetched: ${pub.title}`);
* console.log(`Provider: ${pub.detectedProvider}`);
* console.log(`Requested as: ${pub.id}`);
* });
*
* // Handle errors for IDs that couldn't be fetched
* results.errors?.forEach(error => {
* console.log(`Failed to fetch ${error.id}: ${error.error}`);
* if (error.code === "NOT_FOUND") {
* console.log("Publication not found in any provider");
* }
* });
* ```
*/
async academicFetch(request) {
const url = `${this.baseUrl}/v1/api/academic-fetch`;
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(request),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Fetch specific academic publications by their IDs (alias for academicFetch)
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*/
async fetch(request) {
return this.academicFetch(request);
}
}
//# sourceMappingURL=index.js.map