internetarchive-sdk-js
Version:
NodeJS / Typescript SDK for Internet Archive APIs
458 lines (448 loc) • 18.2 kB
JavaScript
;
var fs = require('fs');
var zod = require('zod');
var qs = require('qs');
var pkgTypes = require('pkg-types');
require('crypto');
require('slugify');
class IaModuleError extends Error {
constructor(message) {
super(message);
this.name = 'IaModuleError';
}
}
class IaApiError extends Error {
constructor(message, statusCode) {
super(message);
this.name = 'IaApiError';
this.statusCode = statusCode;
}
}
async function getPackageInfo() {
try {
return await pkgTypes.readPackageJSON('./node_modules/internetarchive-sdk-js/package.json');
}
catch {
return null;
}
}
function isASCII(str) {
// eslint-disable-next-line no-control-regex
return /^[\x00-\x7F]+$/.test(str);
}
function parseZodErrorToString(err) {
return err.issues.map((issue, _idx) => {
const path = issue.path?.[0] ? `${issue.path?.[0]} - ` : '';
return path + `${issue.message}`;
}).join(', ');
}
/* https://github.com/colinhacks/zod/issues/61 */
function oneOf(key1, key2) {
return (arg, ctx) => {
if ((arg[key1] === undefined) === (arg[key2] === undefined)) {
ctx.addIssue({
code: zod.z.ZodIssueCode.custom,
message: `Either <${key1}> or <${key2}> must be set.`,
});
return false;
}
return true;
};
}
const ia$1 = {
Options: zod.z.object({
testmode: zod.z.boolean().optional(),
setScanner: zod.z.boolean().optional(),
}),
};
const Mediatype = zod.z.enum(['audio', 'collection', 'data', 'etree', 'image', 'movies', 'software', 'texts', 'web']);
const UploadFileHeaders = zod.z.object({
'x-archive-interactive-priority': zod.z.number().optional(),
'x-archive-meta-mediatype': Mediatype,
});
const CreateItemRequestHeaders = zod.z.intersection(zod.z.object({
'authorization': zod.z.string(),
'x-amz-auto-make-bucket': zod.z.number(),
'x-archive-interactive-priority': zod.z.number().optional(),
'x-archive-meta-identifier': zod.z.string().min(1).or(zod.z.number()),
'x-archive-meta-collection': zod.z.string().min(1).or(zod.z.number()).optional(),
'x-archive-meta01-collection': zod.z.string().min(1).or(zod.z.number()).optional(),
'x-archive-meta02-collection': zod.z.string().min(1).or(zod.z.number()).optional(),
'x-archive-meta-mediatype': Mediatype,
}).superRefine(oneOf('x-archive-meta-collection', 'x-archive-meta01-collection')), zod.z.record(zod.z.string(), zod.z.any()));
const UpdateItemRequestPatch = zod.z.object({
op: zod.z.enum(['add']),
path: zod.z.string(),
value: zod.z.string(),
});
const UpdateItemRequestData = zod.z.object({
'-target': zod.z.enum(['metadata']),
'-patch': zod.z.array(UpdateItemRequestPatch),
});
var schema = /*#__PURE__*/Object.freeze({
__proto__: null,
CreateItemRequestHeaders: CreateItemRequestHeaders,
Mediatype: Mediatype,
UpdateItemRequestData: UpdateItemRequestData,
UpdateItemRequestPatch: UpdateItemRequestPatch,
UploadFileHeaders: UploadFileHeaders,
ia: ia$1
});
const { ia } = schema;
class HttpClient {
constructor(token, options = {}) {
this.checkToken = () => {
if (!this.token) {
throw new IaModuleError('API token required.');
}
};
this.makeRequest = async (endpoint, options) => {
const { path, params, body, data } = options ?? {};
if (endpoint.auth)
this.checkToken();
const baseUrl = endpoint.baseUrl;
const apiUrl = baseUrl + (path ? `/${path}` : '') + (params ? `?${new URLSearchParams(params)}` : '');
const headers = {
...(endpoint.auth && { authorization: `LOW ${this.token}` }),
...options?.headers,
};
try {
ia.Options.parse(this.options);
}
catch (err) {
if (err instanceof zod.ZodError) {
const error = 'Invalid options args: ' + parseZodErrorToString(err);
throw new IaModuleError(error);
}
}
try {
if (data && body) {
throw new IaModuleError('Cannot pass data and body data at the same time.');
}
if (endpoint?.schema) {
try {
if (endpoint.schema.type === 'headers')
schema?.[endpoint.schema.name].parse(headers);
if (endpoint.schema.type === 'data')
schema?.[endpoint.schema.name].parse(data);
if (endpoint.schema.type === 'qs')
schema?.[endpoint.schema.name].parse(data);
if (endpoint.schema.type === 'body')
schema?.[endpoint.schema.name].parse(body);
}
catch (err) {
if (err instanceof zod.ZodError) {
console.error(err);
const error = `Invalid ${endpoint.schema.name} - ${parseZodErrorToString(err)}`;
throw new IaModuleError(error);
}
}
}
const response = await fetch(apiUrl, {
headers,
method: endpoint.method,
...(body && { body: JSON.stringify(body) }),
...(data && { body: endpoint.schema.type === 'qs' ? qs.stringify(data) : data }),
});
if (!response.ok) {
const message = response.status === 403 ? 'archive.org token is incorrect or you do not have access to this collection.' : endpoint?.emptyBody ? response.statusText : JSON.parse(await response.text())?.error ?? response.statusText;
throw new IaApiError(message, response.status);
}
else {
return endpoint?.emptyBody
? {
status: response?.status,
}
: await response.json();
}
}
catch (err) {
throw new IaApiError(err?.cause?.message ?? err?.message, err.statusCode);
}
};
this.token = token;
this.options = options;
}
}
var endpoints = {
/**
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
createItem: {
method: 'PUT',
baseUrl: 'https://s3.us.archive.org',
schema: {
type: 'headers',
name: 'CreateItemRequestHeaders',
},
auth: true,
emptyBody: true,
},
/**
* @see {@link https://archive.org/developers/metadata.html Archive.org - Item Metadata API API}
*/
getItem: {
method: 'GET',
baseUrl: 'https://archive.org/metadata',
auth: false,
},
/**
* @see {@link https://archive.org/developers/metadata.html Archive.org - Item Metadata API API}
*/
updateItem: {
method: 'POST',
baseUrl: 'https://archive.org/metadata',
schema: {
type: 'qs',
name: 'UpdateItemRequestData',
},
auth: true,
},
/**
* @see {@link https://archive.org/advancedsearch.php Archive.org - Advanced Search API}
*/
getItems: {
method: 'GET',
baseUrl: 'https://archive.org/advancedsearch.php',
auth: false,
},
/**
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
uploadFile: {
method: 'PUT',
baseUrl: 'http://s3.us.archive.org',
schema: {
type: 'headers',
name: 'UploadFileHeaders',
},
auth: true,
emptyBody: true,
},
/**
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
deleteFile: {
method: 'DELETE',
baseUrl: 'http://s3.us.archive.org',
auth: true,
emptyBody: true,
},
/**
* @see {@link https://archive.org/developers/tasks.html Archive.org - Tasks API}
*/
getTask: {
method: 'GET',
baseUrl: 'https://archive.org/services/tasks.php',
auth: true,
},
};
const defaultIaOptions = {
testmode: false,
setScanner: true,
};
class InternetArchive {
/**
* Provides access to Internet Archive APIs through methods
*
* @param token - {@link https://archive.org/developers/tutorial-get-ia-credentials.html S3-like API Key} formatted as "accesskey:secretkey" (required for all methods except getItem or getItems)
* @param options - InternetArchive API options
* @param options.testmode - Option to add item to {@link https://archive.org/details/test_collection Test Collection} (auto deletes in 30 days) - default FALSE
* @param options.setScanner - option to add scanner metadata for internetarchive-sdk-js - default TRUE
* @see {@link https://archive.org/developers/tutorial-get-ia-credentials.html Archive.org - Get your Internet Archive credentials}
* @see {@link https://archive.org/details/test_collection Archive.org - Test Collection}
*/
constructor(token, options = {}) {
this.token = token ?? null;
this.options = {
testmode: options?.testmode ?? defaultIaOptions.testmode,
setScanner: options?.setScanner ?? defaultIaOptions.setScanner,
};
this.httpClient = new HttpClient(token, this.options);
}
/**
* Creates an Item in a Collection (Uploads a file and adds metadata).
*
* @param item - identifier, collection, mediatype, upload, metadata.
* @param item.identifier - The unique identifier for the item.
* @param item.collection - The collection that the item belongs to.
* @param item.mediatype - The item mediatype.
* @param item.upload - The item upload.
* @param item.metadata - The item metadata (optional).
* @returns The item identifier, metadata, and upload filename.
*
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
async createItem(item) {
const { identifier, collection, mediatype, upload, metadata } = item;
const packageInfo = await getPackageInfo();
const isTestCollection = this.options?.testmode ?? collection === 'test_collection';
const headers = {
'x-amz-auto-make-bucket': 1,
'x-archive-interactive-priority': 1,
'x-archive-meta-identifier': identifier,
'x-archive-meta-mediatype': mediatype,
...(isTestCollection && collection !== 'test_collection' ? { 'x-archive-meta01-collection': collection, 'x-archive-meta02-collection': 'test_collection' } : { 'x-archive-meta-collection': collection }),
...(this.options?.setScanner && packageInfo && { 'x-archive-meta-scanner': `${packageInfo.name}-${packageInfo.version}` }),
};
if (metadata && Object.keys(metadata).length) {
/* filters out identifier, mediatype, or collection from metadata */
Object.entries(metadata).filter(([key, _val]) => !['identifier', 'mediatype', 'collection', 'scanner'].includes(key)).forEach(([key, val]) => {
/* returns error if item create contains ascii characters */
if (val !== '' && !isASCII(val)) {
throw new Error(`Metadata values cannot include ASCII characters on Item Create requests. Field <${key}> contains value '${val}'.`);
}
headers[`x-archive-meta-${key}`] = val;
});
}
const data = upload?.data ?? (upload?.path ? fs.readFileSync(upload.path) : null);
const path = upload?.filename ? `${identifier}/${upload?.filename}` : identifier;
await this.httpClient.makeRequest(endpoints.createItem, { data, path, headers });
/* returns id and metadata */
return {
identifier,
metadata,
...(upload && {
upload: {
filename: upload.filename,
path,
},
}),
};
}
/**
* Returns Items based on filters and options.
*
* @param items - filters (collection, subject, creator) and options (fields, rows).
* @param items.filters - Filter by collection, subject, creator.
* @param items.options - Options to specify fields returned and amount of items.
* @returns The responseHeader and response with items as docs.
*
* @see {@link https://archive.org/advancedsearch.php Archive.org - Advanced Search API}
*/
async getItems(items) {
const { filters, options } = items || {};
const { fields, rows } = options ?? {};
const params = {
'q': filters?.collection && filters?.subject && filters?.creator
? `collection:(${filters?.collection})&subject:("${filters?.subject}")&creator:("${filters?.creator}")`
: filters?.collection && filters?.subject
? `collection:(${filters.collection})&subject:("${filters.subject}")`
: filters?.collection && filters?.creator
? `collection:(${filters.collection})&creator:("${filters.creator}")`
: filters?.subject && filters?.creator
? `subject:(${filters.subject})&creator:("${filters.creator}")`
: filters?.collection
? `collection:(${filters.collection})`
: filters?.subject
? `subject:(${filters.subject})`
: filters?.creator
? `creator:(${filters.creator})`
: null,
...(fields && { 'fl[]': fields.replace(/ /g, '') }),
'rows': Number(rows) || 50,
'output': 'json',
'sort[]': 'date desc',
};
if (!params.q) {
throw new Error('collection, subject, or creator required');
}
return await this.httpClient.makeRequest(endpoints.getItems, { params });
}
/**
* Returns an Item by identifier.
*
* @param identifier - The unique identifier for the item.
* @returns Item metadata, file paths, and other info.
*
* @see {@link https://archive.org/developers/metadata.html Archive.org - Item Metadata API API}
*/
async getItem(identifier) {
return await this.httpClient.makeRequest(endpoints.getItem, { path: identifier });
}
/**
* Updates an Item by identifier and metadata.
*
* @param identifier - The unique identifier for the item.
* @param metadata - The item metadata.
* @returns Update response (success, error, task_id, log).
*
* @see {@link https://archive.org/developers/metadata.html Archive.org - Item Metadata API API}
*/
async updateItem(identifier, metadata) {
const packageInfo = await getPackageInfo();
if (this.options?.setScanner && packageInfo) {
metadata.scanner = `${packageInfo.name}-${packageInfo.version}`;
}
const patch = Object.keys(metadata).map((key) => {
return {
op: 'add',
path: `/${key}`,
value: metadata[key],
};
});
const data = {
'-target': 'metadata',
'-patch': patch,
};
const headers = {
'content-type': 'application/x-www-form-urlencoded;',
};
return await this.httpClient.makeRequest(endpoints.updateItem, { path: identifier, data, headers });
}
/**
* Uploads a File to a parent Item.
*
* @param upload - identifier, mediatype, file.
* @param upload.identifier - The unique identifier of the parent item.
* @param upload.mediatype - The upload mediatype.
* @param upload.file - The upload file.
*
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
async uploadFile(upload) {
const { identifier, mediatype, file } = upload;
const { path, filename, data: buffer } = file || {};
const headers = {
'x-archive-interactive-priority': 1,
'x-archive-meta-mediatype': mediatype,
};
if (!filename) {
throw new Error('filename required');
}
const data = buffer ?? (path ? fs.readFileSync(path) : null);
if (!data) {
throw new Error('buffer or path required');
}
return await this.httpClient.makeRequest(endpoints.uploadFile, { data, path: `${identifier}/${filename}`, headers });
}
/**
* Deletes a File from an Item.
*
* @param path - The path of the file [identifier/filename].
*
* @see {@link https://archive.org/developers/ias3.html Archive.org - ias3 Internet archive S3-like API}
*/
async deleteFile(path) {
const headers = {
'x-archive-cascade-delete': 1,
};
return await this.httpClient.makeRequest(endpoints.deleteFile, { path, headers });
}
/**
* Returns Tasks from an Item.
*
* @param identifier - identifier, mediatype, file.
* @param criteria - Parameters to filter item tasks.
*
* @see {@link https://archive.org/developers/tasks.html Archive.org - Tasks API}
*/
async getItemTasks(identifier, criteria) {
const params = {
identifier,
...criteria,
};
return await this.httpClient.makeRequest(endpoints.getTask, { params });
}
}
module.exports = InternetArchive;