UNPKG

pdfvector

Version:

Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDFs to markdown and search academic publications across multiple databases

286 lines 9.83 kB
// Re-export constants for convenience export { AcademicSearchProviderValues, AcademicSearchPublicationFieldValues, } from "./types.js"; /** * Custom error class for PDF Vector API errors */ export class PDFVectorError extends Error { status; code; constructor(message, /** HTTP status code */ status, /** Custom error code */ code) { super(message); this.status = status; this.code = code; this.name = "PDFVectorError"; } } /** * PDF Vector TypeScript/JavaScript SDK client * * @example * Parse from URL: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * url: "https://example.com/document.pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Parse from data: * ```typescript * import { readFile } from "fs/promises"; * * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const result = await client.parse({ * data: await readFile("document.pdf"), * contentType: "application/pdf", * useLLM: "auto" * }); * console.log(result.markdown); * ``` * * Search academic publications: * ```typescript * const client = new PDFVector({ apiKey: "pdfvector_xxx" }); * const results = await client.academicSearch({ * query: "machine learning transformers", * providers: ["semantic-scholar", "arxiv"], * limit: 10, * yearFrom: 2020 * }); * console.log(results.results); * ``` */ export class PDFVector { /** API key for authentication (format: pdfvector_xxx) */ apiKey; /** Base URL for the PDF Vector API. Defaults to https://www.pdfvector.com */ baseUrl; /** * Creates a new PDF Vector client * @param config Configuration object containing API key and optional base URL */ constructor(config) { this.apiKey = config.apiKey; this.baseUrl = config.baseUrl ?? "https://www.pdfvector.com"; } /** * Handle API response errors (internal) * @private */ async handleResponseError(response) { const errorText = await response.text(); let errorMessage = `HTTP ${response.status}: ${response.statusText}`; let errorCode; try { const errorData = JSON.parse(errorText); if (errorData.message) errorMessage = errorData.message; if (errorData.code) errorCode = errorData.code; } catch { if (errorText) errorMessage = errorText; } throw new PDFVectorError(errorMessage, response.status, errorCode); } /** * Create a temporary upload URL for file upload (internal) * @private */ async createTemporaryUpload(contentType) { const url = `${this.baseUrl}/v1/api/temporary-upload`; const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify({ contentType }), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Parse a PDF/Word document from URL or data and convert to markdown * @param request Parse request parameters (URL or data) * @returns Promise resolving to parsed document details * @throws {PDFVectorError} When the API request fails */ async parse(request) { // Handle data input if ("data" in request) { // Create temporary upload const temporaryUpload = await this.createTemporaryUpload(request.contentType); // Upload data using temporary upload URL const temporaryResponse = await fetch(temporaryUpload.url, { method: "PUT", body: request.data, headers: { "content-type": request.contentType, }, }); if (!temporaryResponse.ok) { throw new PDFVectorError(`Failed to upload file: HTTP ${temporaryResponse.status}`); } // Convert to URL request for processing const urlRequest = { url: `pdfvector-s3://${temporaryUpload.key}`, useLLM: request.useLLM, }; return this.parse(urlRequest); } // Handle URL input const url = `${this.baseUrl}/v1/api/parse`; const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(request), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Search academic publications across multiple databases * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails * * @example * ```typescript * const results = await client.academicSearch({ * query: "machine learning", * providers: ["semantic-scholar", "arxiv"], * limit: 20, * fields: ["title", "authors", "abstract", "year"] * }); * ``` */ async academicSearch(request) { const url = `${this.baseUrl}/v1/api/academic-search`; // Prepare request body const body = { query: request.query, providers: request.providers?.length ? request.providers : ["semantic-scholar"], offset: request.offset ?? 0, limit: request.limit ?? 20, yearFrom: request.yearFrom, yearTo: request.yearTo, fields: request.fields, }; // Remove undefined values from body const cleanBody = Object.fromEntries(Object.entries(body).filter(([, value]) => value !== undefined)); const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(cleanBody), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Search academic publications (alias for academicSearch) * @param request Search request parameters * @returns Promise resolving to search results * @throws {PDFVectorError} When the API request fails */ async search(request) { return this.academicSearch(request); } /** * Fetch specific academic publications by their IDs with auto-detection * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails * * @example * Auto-detection with mixed ID formats: * ```typescript * const results = await client.academicFetch({ * ids: [ * "10.1038/nature12373", // DOI auto-detected * "12345678", // PubMed ID auto-detected * "2301.00001", // ArXiv ID auto-detected * "arXiv:2507.16298v1", // ArXiv with prefix * "ED123456", // ERIC ID auto-detected * "0f40b1f08821e22e859c6050916cec3667778613", // Semantic Scholar * "pubmed:98765432", // PDFVector format * "some-unknown-id" // Will try all providers * ] * }); * ``` * * @example * Fetching specific fields only: * ```typescript * const results = await client.academicFetch({ * ids: ["10.1038/nature12373", "pubmed:12345678"], * fields: ["title", "authors", "year", "abstract"] * }); * ``` * * @example * Handling results: * ```typescript * // Successfully fetched publications * results.results.forEach(pub => { * console.log(`Fetched: ${pub.title}`); * console.log(`Provider: ${pub.detectedProvider}`); * console.log(`Requested as: ${pub.id}`); * }); * * // Handle errors for IDs that couldn't be fetched * results.errors?.forEach(error => { * console.log(`Failed to fetch ${error.id}: ${error.error}`); * if (error.code === "NOT_FOUND") { * console.log("Publication not found in any provider"); * } * }); * ``` */ async academicFetch(request) { const url = `${this.baseUrl}/v1/api/academic-fetch`; const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(request), }); if (!response.ok) await this.handleResponseError(response); const data = await response.json(); return data; } /** * Fetch specific academic publications by their IDs (alias for academicFetch) * @param request Fetch request parameters containing IDs and optional fields * @returns Promise resolving to fetch results and errors * @throws {PDFVectorError} When the API request fails */ async fetch(request) { return this.academicFetch(request); } } //# sourceMappingURL=index.js.map