pdfvector
Version:
Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases
286 lines • 9.83 kB
JavaScript
// Re-export constants for convenience
export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js";
/**
* Custom error class for PDF Vector API errors
*/
export class PDFVectorError extends Error {
status;
code;
constructor(message,
/** HTTP status code */
status,
/** Custom error code */
code) {
super(message);
this.status = status;
this.code = code;
this.name = "PDFVectorError";
}
}
/**
* PDF Vector TypeScript/JavaScript SDK client
*
* @example
* Parse from URL:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* url: "https://example.com/document.pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Parse from data:
* ```typescript
* import { readFile } from "fs/promises";
*
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const result = await client.parse({
* data: await readFile("document.pdf"),
* contentType: "application/pdf",
* useLLM: "auto"
* });
* console.log(result.markdown);
* ```
*
* Search academic publications:
* ```typescript
* const client = new PDFVector({ apiKey: "pdfvector_xxx" });
* const results = await client.academicSearch({
* query: "machine learning transformers",
* providers: ["semantic-scholar", "arxiv"],
* limit: 10,
* yearFrom: 2020
* });
* console.log(results.results);
* ```
*/
export class PDFVector {
/** API key for authentication (format: pdfvector_xxx) */
apiKey;
/** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */
baseUrl;
/**
* Creates a new PDF Vector client
* @param config Configuration object containing API key and optional base URL
*/
constructor(config) {
this.apiKey = config.apiKey;
this.baseUrl = config.baseUrl ?? "https://www.pdfvector.com";
}
/**
* Handle API response errors (internal)
* @private
*/
async handleResponseError(response) {
const errorText = await response.text();
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
let errorCode;
try {
const errorData = JSON.parse(errorText);
if (errorData.message)
errorMessage = errorData.message;
if (errorData.code)
errorCode = errorData.code;
}
catch {
if (errorText)
errorMessage = errorText;
}
throw new PDFVectorError(errorMessage, response.status, errorCode);
}
/**
* Create a temporary upload URL for file upload (internal)
* @private
*/
async createTemporaryUpload(contentType) {
const url = `${this.baseUrl}/v1/api/temporary-upload`;
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify({ contentType }),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Parse a PDF/Word document from URL or data and convert to markdown
* @param request Parse request parameters (URL or data)
* @returns Promise resolving to parsed document details
* @throws {PDFVectorError} When the API request fails
*/
async parse(request) {
// Handle data input
if ("data" in request) {
// Create temporary upload
const temporaryUpload = await this.createTemporaryUpload(request.contentType);
// Upload data using temporary upload URL
const temporaryResponse = await fetch(temporaryUpload.url, {
method: "PUT",
body: request.data,
headers: {
"content-type": request.contentType,
},
});
if (!temporaryResponse.ok) {
throw new PDFVectorError(`Failed to upload file: HTTP ${temporaryResponse.status}`);
}
// Convert to URL request for processing
const urlRequest = {
url: `pdfvector-s3://${temporaryUpload.key}`,
useLLM: request.useLLM,
};
return this.parse(urlRequest);
}
// Handle URL input
const url = `${this.baseUrl}/v1/api/parse`;
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(request),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Search academic publications across multiple databases
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*
* @example
* ```typescript
* const results = await client.academicSearch({
* query: "machine learning",
* providers: ["semantic-scholar", "arxiv"],
* limit: 20,
* fields: ["title", "authors", "abstract", "year"]
* });
* ```
*/
async academicSearch(request) {
const url = `${this.baseUrl}/v1/api/academic-search`;
// Prepare request body
const body = {
query: request.query,
providers: request.providers?.length
? request.providers
: ["semantic-scholar"],
offset: request.offset ?? 0,
limit: request.limit ?? 20,
yearFrom: request.yearFrom,
yearTo: request.yearTo,
fields: request.fields,
};
// Remove undefined values from body
const cleanBody = Object.fromEntries(Object.entries(body).filter(([, value]) => value !== undefined));
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(cleanBody),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Search academic publications (alias for academicSearch)
* @param request Search request parameters
* @returns Promise resolving to search results
* @throws {PDFVectorError} When the API request fails
*/
async search(request) {
return this.academicSearch(request);
}
/**
* Fetch specific academic publications by their IDs with auto-detection
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*
* @example
* Auto-detection with mixed ID formats:
* ```typescript
* const results = await client.academicFetch({
* ids: [
* "10.1038/nature12373", // DOI auto-detected
* "12345678", // PubMed ID auto-detected
* "2301.00001", // ArXiv ID auto-detected
* "arXiv:2507.16298v1", // ArXiv with prefix
* "ED123456", // ERIC ID auto-detected
* "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar
* "pubmed:98765432", // PDFVector format
* "some-unknown-id" // Will try all providers
* ]
* });
* ```
*
* @example
* Fetching specific fields only:
* ```typescript
* const results = await client.academicFetch({
* ids: ["10.1038/nature12373", "pubmed:12345678"],
* fields: ["title", "authors", "year", "abstract"]
* });
* ```
*
* @example
* Handling results:
* ```typescript
* // Successfully fetched publications
* results.results.forEach(pub => {
* console.log(`Fetched: ${pub.title}`);
* console.log(`Provider: ${pub.detectedProvider}`);
* console.log(`Requested as: ${pub.id}`);
* });
*
* // Handle errors for IDs that couldn't be fetched
* results.errors?.forEach(error => {
* console.log(`Failed to fetch ${error.id}: ${error.error}`);
* if (error.code === "NOT_FOUND") {
* console.log("Publication not found in any provider");
* }
* });
* ```
*/
async academicFetch(request) {
const url = `${this.baseUrl}/v1/api/academic-fetch`;
const response = await fetch(url, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(request),
});
if (!response.ok)
await this.handleResponseError(response);
const data = await response.json();
return data;
}
/**
* Fetch specific academic publications by their IDs (alias for academicFetch)
* @param request Fetch request parameters containing IDs and optional fields
* @returns Promise resolving to fetch results and errors
* @throws {PDFVectorError} When the API request fails
*/
async fetch(request) {
return this.academicFetch(request);
}
}
//# sourceMappingURL=index.js.map