iajs
Version:
Internet Archive JavaScript Client
776 lines (661 loc) • 21.3 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory(require('node-fetch'), require('xmldom')) : typeof define === 'function' && define.amd ? define(['node-fetch', 'xmldom'], factory) : (global = global || self, global.ia = factory(global.fetch, global.xmldom));
})(this, function (fetch, xmldom) {
'use strict';
fetch = fetch && Object.prototype.hasOwnProperty.call(fetch, 'default') ? fetch['default'] : fetch;
var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
function createCommonjsModule(fn, module) {
return module = {
exports: {}
}, fn(module, module.exports), module.exports;
}
var fetchJsonp = createCommonjsModule(function (module, exports) {
(function (global, factory) {
{
factory(exports, module);
}
})(commonjsGlobal, function (exports, module) {
var defaultOptions = {
timeout: 5000,
jsonpCallback: 'callback',
jsonpCallbackFunction: null
};
function generateCallbackFunction() {
return 'jsonp_' + Date.now() + '_' + Math.ceil(Math.random() * 100000);
}
function clearFunction(functionName) {
// IE8 throws an exception when you try to delete a property on window
// http://stackoverflow.com/a/1824228/751089
try {
delete window[functionName];
} catch (e) {
window[functionName] = undefined;
}
}
function removeScript(scriptId) {
var script = document.getElementById(scriptId);
if (script) {
document.getElementsByTagName('head')[0].removeChild(script);
}
}
function fetchJsonp(_url) {
var options = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1]; // to avoid param reassign
var url = _url;
var timeout = options.timeout || defaultOptions.timeout;
var jsonpCallback = options.jsonpCallback || defaultOptions.jsonpCallback;
var timeoutId = undefined;
return new Promise(function (resolve, reject) {
var callbackFunction = options.jsonpCallbackFunction || generateCallbackFunction();
var scriptId = jsonpCallback + '_' + callbackFunction;
window[callbackFunction] = function (response) {
resolve({
ok: true,
// keep consistent with fetch API
json: function json() {
return Promise.resolve(response);
}
});
if (timeoutId) clearTimeout(timeoutId);
removeScript(scriptId);
clearFunction(callbackFunction);
}; // Check if the user set their own params, and if not add a ? to start a list of params
url += url.indexOf('?') === -1 ? '?' : '&';
var jsonpScript = document.createElement('script');
jsonpScript.setAttribute('src', '' + url + jsonpCallback + '=' + callbackFunction);
if (options.charset) {
jsonpScript.setAttribute('charset', options.charset);
}
jsonpScript.id = scriptId;
document.getElementsByTagName('head')[0].appendChild(jsonpScript);
timeoutId = setTimeout(function () {
reject(new Error('JSONP request to ' + _url + ' timed out'));
clearFunction(callbackFunction);
removeScript(scriptId);
window[callbackFunction] = function () {
clearFunction(callbackFunction);
};
}, timeout); // Caught if got 404/500
jsonpScript.onerror = function () {
reject(new Error('JSONP request to ' + _url + ' failed'));
clearFunction(callbackFunction);
removeScript(scriptId);
if (timeoutId) clearTimeout(timeoutId);
};
});
} // export as global function
/*
let local;
if (typeof global !== 'undefined') {
local = global;
} else if (typeof self !== 'undefined') {
local = self;
} else {
try {
local = Function('return this')();
} catch (e) {
throw new Error('polyfill failed because global object is unavailable in this environment');
}
}
local.fetchJsonp = fetchJsonp;
*/
module.exports = fetchJsonp;
});
});
let CORS_PROXY = "https://iajs-cors.rchrd2.workers.dev";
const enc = encodeURIComponent;
const paramify = obj => new URLSearchParams(obj).toString();
const str2arr = v => Array.isArray(v) ? v : [v];
const isInBrowser = () => {
return !(typeof window === "undefined");
};
const corsWorkAround = url => {
if (isInBrowser()) {
return `${CORS_PROXY}/${url}`;
} else {
return url;
}
};
const fetchJson = async function (url, options) {
const res = await fetch(url, options);
return await res.json();
};
const authToHeaderS3 = function (auth) {
return auth.values.s3.access && auth.values.s3.secret ? {
Authorization: `LOW ${auth.values.s3.access}:${auth.values.s3.secret}`
} : {};
};
const authToHeaderCookies = function (auth) {
if (auth.values.cookies["logged-in-sig"] && auth.values.cookies["logged-in-user"]) {
let cookieStr = `logged-in-sig=${auth.values.cookies["logged-in-sig"]};`;
cookieStr += ` logged-in-user=${auth.values.cookies["logged-in-user"]}`;
const headers = {
Cookie: cookieStr
};
if (isInBrowser()) {
headers["X-Cookie-Cors"] = cookieStr;
}
return headers;
} else {
return {};
}
};
const newEmptyAuth = function () {
return JSON.parse(JSON.stringify({
success: false,
values: {
cookies: {
"logged-in-sig": null,
"logged-in-user": null
},
email: null,
itemname: null,
s3: {
access: null,
secret: null
},
screenname: null
},
version: 1
}));
};
class Auth {
constructor() {
this.XAUTH_BASE = corsWorkAround("https://archive.org/services/xauthn/");
}
async login(email, password) {
try {
const fetchOptions = {
method: "POST",
body: `email=${enc(email)}&password=${enc(password)}`,
headers: {
"Content-Type": "application/x-www-form-urlencoded"
}
};
const response = await fetch(`${this.XAUTH_BASE}?op=login`, fetchOptions);
const data = await response.json();
if (!data.success) {
data.values = { ...data.values,
...newEmptyAuth().values
};
}
return data;
} catch (e) {
// TODO figure out syntax for catching error reponse
return newEmptyAuth();
}
}
async fromS3(access, secret, newAuth = newEmptyAuth()) {
newAuth.success = 1;
newAuth.values.s3.access = access;
newAuth.values.s3.secret = secret;
const info = await fetchJson("https://s3.us.archive.org?check_auth=1", {
headers: authToHeaderS3(newAuth)
});
newAuth.values.email = info.username;
newAuth.values.itemname = info.itemname;
newAuth.values.screenname = info.screenname; // Note the auth object is missing cookie fields.
// It is still TBD if those are needed
return newAuth;
}
async fromCookies(loggedInSig, loggedInUser, newAuth = newEmptyAuth()) {
newAuth.values.cookies["logged-in-sig"] = loggedInSig;
newAuth.values.cookies["logged-in-user"] = loggedInUser;
const s3response = await fetch(corsWorkAround("https://archive.org/account/s3.php?output_json=1"), {
headers: authToHeaderCookies(newAuth)
});
const s3 = await s3response.json();
if (!s3.success) {
throw new Error();
}
return await this.fromS3(s3.key.s3accesskey, s3.key.s3secretkey, newAuth);
}
}
class BookReaderAPI {}
class FavoritesAPI {
constructor() {
this.API_BASE = corsWorkAround("https://archive.org/bookmarks.php"); // TODO support this non-json explore endpoint
this.EXPLORE_API_BASE = "https://archive.org/bookmarks-explore.php";
}
async get({
screenname = null,
auth = newEmptyAuth()
}) {
if (!screenname && auth.values.screenname) {
screenname = auth.values.screenname;
}
if (screenname) {
let params = {
output: "json",
screenname
};
return await fetchJson(`${this.API_BASE}?${paramify(params)}`);
} else {
throw new Error("Neither screenname or auth provided for bookmarks lookup");
}
}
async add({
identifier = null,
comments = "",
auth = newEmptyAuth()
} = {}) {
return await this.modify({
identifier,
add_bookmark: 1
}, auth);
}
async remove({
identifier = null,
auth = null
} = {}) {
return await this.modify({
identifier,
del_bookmark: identifier
}, auth);
}
async modify(params, auth) {
try {
let mdResponse = await iajs.MetadataAPI.get({
identifier: params.identifier,
path: "/metadata"
});
params.title = str2arr(mdResponse.result.title).join(", ");
params.mediatype = mdResponse.result.mediatype;
} catch (e) {
throw new Error(`Metadata lookup failed for: ${params.identifier}`);
}
params.output = "json";
const response = await fetch(`${this.API_BASE}?${paramify(params)}`, {
method: "POST",
headers: authToHeaderCookies(auth)
});
return await response.json().catch(e => {
return {
error: e
};
});
}
}
class GifcitiesAPI {
constructor() {
this.API_BASE = "https://gifcities.archive.org/api/v1/gifsearch";
}
async get({
q = null
} = {}) {
if (q === null) return [];
return fetchJson(`${this.API_BASE}?q=${enc(q)}`);
}
async search(q) {
return this.get({
q
});
}
}
class MetadataAPI {
constructor() {
this.READ_API_BASE = "https://archive.org/metadata";
this.WRITE_API_BASE = corsWorkAround("https://archive.org/metadata");
}
async get({
identifier = null,
path = "",
auth = newEmptyAuth()
} = {}) {
const options = {};
options.headers = authToHeaderS3(auth);
return fetchJson(`${this.READ_API_BASE}/${identifier}/${path}`, options);
}
async patch({
identifier = null,
target = "metadata",
priority = -5,
patch = {},
auth = newEmptyAuth()
} = {}) {
// https://archive.org/services/docs/api/metadata.html#targets
const reqParams = {
"-target": target,
"-patch": JSON.stringify(patch),
priority,
secret: auth.values.s3.secret,
access: auth.values.s3.access
};
const url = `${this.WRITE_API_BASE}/${identifier}`;
const body = paramify(reqParams);
const response = await fetch(url, {
method: "POST",
body,
headers: {
"Content-Type": "application/x-www-form-urlencoded"
}
});
return await response.json();
}
}
class RelatedAPI {
constructor() {
this.API_BASE = "https://be-api.us.archive.org/mds/v1";
}
async get({
identifier = null
} = {}) {
return fetchJson(`${this.API_BASE}/get_related/all/${identifier}`);
}
}
class ReviewsAPI {
constructor() {
this.WRITE_API_BASE = corsWorkAround("https://archive.org/services/reviews.php?identifier=");
this.READ_API_BASE = "https://archive.org/metadata";
}
async get({
identifier = null
} = {}) {
return fetchJson(`${this.READ_API_BASE}/${identifier}/reviews`);
}
async add({
identifier = null,
title = null,
body = null,
stars = null,
auth = newEmptyAuth()
} = {}) {
const url = `${this.WRITE_API_BASE}${identifier}`;
const response = await fetch(url, {
method: "POST",
body: JSON.stringify({
title,
body,
stars
}),
headers: {
"Content-Type": "application/json",
...authToHeaderS3(auth)
}
});
return await response.json();
}
}
class S3API {
constructor() {
this.API_BASE = "https://s3.us.archive.org";
}
async ls({
identifier = null,
auth = newEmptyAuth()
} = {}) {
// throw new Error("TODO parse that XML");
if (!identifier) {
throw new Error("Missing required args");
}
return await (await fetch(`${this.API_BASE}/${identifier}`)).text();
}
async createEmptyItem({
identifier = null,
testItem = false,
metadata = {},
headers = {},
wait = true,
auth = newEmptyAuth()
} = {}) {
return await this.upload({
identifier,
testItem,
metadata,
headers,
wait,
auth,
autocreate: true
});
}
async upload({
identifier = null,
key = null,
body = "",
autocreate = false,
skipDerive = false,
testItem = false,
keepOldVersions = true,
metadata = {},
headers = {},
wait = true,
auth = newEmptyAuth()
}) {
if (!identifier) {
throw new Error("Missing required args");
}
if (testItem) {
metadata["collection"] = "test_collection";
}
const requestHeaders = {};
Object.keys(metadata).forEach(k => {
str2arr(metadata[k]).forEach((v, idx) => {
k = k.replace(/_/g, "--");
let headerKey = `x-archive-meta${idx}-${k}`;
requestHeaders[headerKey] = v;
});
});
Object.assign(requestHeaders, headers, authToHeaderS3(auth));
if (autocreate) {
requestHeaders["x-archive-auto-make-bucket"] = 1;
}
if (skipDerive) {
requestHeaders["x-archive-queue-derive"] = 0;
}
requestHeaders["x-archive-keep-old-version"] = keepOldVersions ? 1 : 0;
const requestUrl = key ? `${this.API_BASE}/${identifier}/${key}` : `${this.API_BASE}/${identifier}`;
const response = await fetch(requestUrl, {
method: "PUT",
headers: requestHeaders,
body
});
if (response.status !== 200) {
// NOTE this may not be the right thing to check.
// Maybe different codes are okay
throw new Error(`Response: ${response.status}`);
}
if (!wait) {
return response;
} // The finished response seems to be empty
return await response.text();
}
}
class SearchAPI {
constructor() {
this.API_BASE = "https://archive.org/advancedsearch.php";
}
async get({
q = null,
page = 1,
fields = ["identifier"],
...options
} = {}) {
if (!q) {
throw new Error("Missing required arg 'q'");
}
if (typeof q == "object") {
q = this.buildQueryFromObject(q);
}
const reqParams = {
q,
page,
fl: fields,
...options,
output: "json"
};
const encodedParams = paramify(reqParams);
const url = `${this.API_BASE}?${encodedParams}`;
return fetchJson(url);
}
async search(q) {
return await this.get({
q
});
}
buildQueryFromObject(qObject) {
// Map dictionary to a key=val search query
return Object.keys(qObject).map(key => {
if (Array.isArray(qObject[key])) {
return `${key}:( ${qObject[key].map(v => `"${v}"`).join(" OR ")} )`;
} else {
return `${key}:"${qObject[key]}"`;
}
}).join(" AND ");
}
}
class SearchTextAPI {}
class ViewsAPI {
constructor() {
// https://be-api.us.archive.org/views/v1/short/<identifier>[,<identifier>,...]
this.API_BASE = "https://be-api.us.archive.org/views/v1/short";
}
async get({
identifier = null
} = {}) {
identifier = Array.isArray(identifier) ? identifier.join(",") : identifier;
return fetchJson(`${this.API_BASE}/${identifier}`);
}
}
class WaybackAPI {
constructor() {
this.AVAILABLE_API_BASE = "https://archive.org/wayback/available";
this.CDX_API_BASE = corsWorkAround("https://web.archive.org/cdx/search/");
this.SAVE_API_BASE = corsWorkAround("https://web.archive.org/save/");
}
/**
* @see https://archive.org/help/wayback_api.php
*/
async available({
url = null,
timestamp = null
} = {}) {
const params = {
url
};
if (timestamp !== null) {
params.timestamp = timestamp;
}
const searchParams = paramify(params);
const fetchFunction = isInBrowser() ? fetchJsonp : fetch;
const response = await fetchFunction(`${this.AVAILABLE_API_BASE}?${searchParams}`);
return await response.json();
}
/**
* @see https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server
*/
async cdx(options = {}) {
options.output = "json";
const searchParams = paramify(options);
const response = await fetch(`${this.CDX_API_BASE}?${searchParams}`);
const raw = await response.text();
let json;
try {
json = JSON.parse(raw);
} catch (e) {
json = {
error: raw.trim()
};
}
return json;
}
/**
* @see https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA/edit
*/
async savePageNow({
url = null,
captureOutlinks = 0,
captureAll = true,
captureScreenshot = false,
skipFirstArchive = true,
ifNotArchivedWithin = null,
auth = newEmptyAuth()
} = {}) {
url = url.replace(/^https?\:\/\//, "");
const params = {
url,
capture_outlinks: captureOutlinks,
capture_all: captureAll ? "1" : "0",
capture_screenshot: captureScreenshot ? "1" : "0",
skip_first_archive: skipFirstArchive ? "1" : "0"
};
if (ifNotArchivedWithin) {
params.if_not_archived_within = ifNotArchivedWithin;
}
const response = await fetch(this.SAVE_API_BASE, {
credentials: "omit",
method: "POST",
body: paramify(params),
headers: {
Accept: "application/json",
"Content-Type": "application/x-www-form-urlencoded",
...authToHeaderS3(auth)
}
});
return await response.json();
}
}
class ZipFileAPI {
/**
* List the contents of a zip file in an item
* Eg: https://archive.org/download/goodytwoshoes00newyiala/goodytwoshoes00newyiala_jp2.zip/
*/
async ls(identifier, zipPath, auth = newEmptyAuth()) {
if (!zipPath.match(/\.(7z|cbr|cbz|cdr|iso|rar|tar|zip)$/)) {
throw new Error("Invalid zip type");
}
const requestUrl = corsWorkAround(`https://archive.org/download/${identifier}/${enc(zipPath)}/`);
const response = await fetch(requestUrl, {
headers: authToHeaderCookies(auth)
});
if (response.status != 200) {
throw Error({
error: "not found"
});
}
const html = await response.text(); // This page has <td>'s without closing el tags (took a while to
// figure this out). This breaks the DOMparser, so I added a workaround
// to add closing tags
let tableHtml = html.match(/(<table class="archext">[\w\W]*<\/table>)/g)[0];
tableHtml = tableHtml.replace(/(<td[^>]*>[\w\W]*?)(?=<(?:td|\/tr))/g, "$1</td>");
let table = new xmldom.DOMParser().parseFromString(tableHtml);
const rows = table.getElementsByTagName("tr");
const results = [];
for (let i = 0; i < rows.length; i++) {
let cells = rows.item(i).getElementsByTagName("td");
if (cells.length != 4) continue;
try {
let a = cells.item(0).getElementsByTagName("a").item(0);
results.push({
key: a.textContent,
href: "https:" + a.getAttribute("href"),
jpegUrl: (() => {
try {
return "https:" + cells.item(1).getElementsByTagName("a").item(0).getAttribute("href");
} catch (e) {
return null;
}
})(),
timestamp: cells.item(2).textContent,
size: cells.item(3).textContent
});
} catch (e) {}
}
return results;
}
}
const iajs = {
Auth: new Auth(),
BookReaderAPI: new BookReaderAPI(),
GifcitiesAPI: new GifcitiesAPI(),
FavoritesAPI: new FavoritesAPI(),
MetadataAPI: new MetadataAPI(),
RelatedAPI: new RelatedAPI(),
ReviewsAPI: new ReviewsAPI(),
SearchAPI: new SearchAPI(),
SearchTextAPI: new SearchTextAPI(),
S3API: new S3API(),
ViewsAPI: new ViewsAPI(),
WaybackAPI: new WaybackAPI(),
ZipFileAPI: new ZipFileAPI()
};
return iajs;
});