UNPKG

suttacentral-api

Version:

SuttaCentral API cached proxy

588 lines (554 loc) 23.1 kB
(function(exports) { const fs = require('fs'); const path = require('path'); const http = require('http'); const https = require('https'); const Definitions = require('./definitions'); const SuttaCentralId = require('./sutta-central-id'); const { Memoizer, GuidStore, Files } = require('memo-again'); const { MerkleJson } = require('merkle-json'); const LOCAL = Files.LOCAL_DIR; const EXPANSION_PATH = path.join(LOCAL, 'expansion.json'); const UID_EXPANSION_PATH = path.join(LOCAL, 'uid_expansion.json'); const DEFAULT_LANGUAGE = 'en'; const DEFAULT_API_URL = 'https://suttacentral.net/api'; const DEFAULT_API2_URL = 'https://staging.suttacentral.net/api'; const UID_EXPANSION_URL = 'https://raw.githubusercontent.com/'+ 'suttacentral/sc-data/master/misc/uid_expansion.json'; const ANY_LANGUAGE = '*'; const ANY_TRANSLATOR = '*'; const PO_SUFFIX_LENGTH = '.po'.length; const { logger } = require('log-instance'); const APP_DIR = path.join(__dirname, '..'); const API_DIR = path.join(APP_DIR, 'api'); var singleton; var httpMonitor = 0; var DAY_SECONDS = 24*60*60; var scapi_instances = 0; class ScApi { constructor(opts={}) { (opts.logger || logger).logInstance(this, opts); scapi_instances++; this.name = opts.name || `ScApi@${scapi_instances}`; this.language = opts.language || DEFAULT_LANGUAGE; this.translator = opts.translator; this.expansion = opts.expansion || [{}]; this.memoizer = opts.memoizer || new Memoizer({ storeName: 'scapi_memo', storePath: API_DIR, readFile: opts.readFile, writeFile: opts.writeFile, writeMem: opts.writeMem, logger: this, }); this.apiStore = opts.apiStore || new GuidStore({ type: 'ApiStore', suffix: '.json', storeName: 'api', }); this.apiCacheSeconds = opts.apiCacheSeconds || 7*DAY_SECONDS; this.mj = new MerkleJson({ hashTag: 'guid', }); this.apiUrl = opts.apiUrl || DEFAULT_API_URL; this.apiUrl2 = opts.apiUrl2 || DEFAULT_API2_URL; } static loadJson(url) { singleton = singleton || new ScApi({name:'ScApi@singleton'}); return singleton.loadJson(url); } async loadJson(url) { try { var guid = this.mj.hash({ method: 'get', url, }); var cachedPath = this.apiStore.guidPath(guid); var stat = fs.existsSync(cachedPath) && fs.statSync(cachedPath); var age = stat && (Date.now() - stat.ctimeMs)/1000 || this.apiCacheSeconds; if (age < this.apiCacheSeconds) { var result = JSON.parse(await fs.promises.readFile(cachedPath)); this.debug(`ScApi.loadJson(${url}) => cached:${guid}`); } else { var result = await this.loadJsonRest(url); fs.writeFileSync(cachedPath, JSON.stringify(result,null,2)); this.log(`loadJson(${url}) => fresh:${guid}`); } return result; } catch (e) { this.warn(`loadJson(${url})`,e); throw e; }} async loadJsonRest(url) { let { name, apiUrl, apiUrl2, memoizer, } = this; try { if (/suttaplex/.test(url)) { let that = this; let suttaplex = async u=>{ return await that.loadJsonRestMaybe(u); } if (!this.memo_suttaplex) { this.memo_suttaplex = memoizer.memoize(suttaplex, 'sc'); } return await this.memo_suttaplex(url); } else { return await this.loadJsonRestMaybe(url); } } catch(e) { if (apiUrl2) { let writeFile = memoizer.writeFile; let url2 = url.replace(apiUrl, apiUrl2); this.warn(`loadJsonRest(RETRY) ${url2}`, e.message); try { memoizer.writeFile = false; // don't cache apiUrl2 data return await this.loadJsonRestMaybe(url2); } catch(e) { this.warn(`loadJsonRest(URL2-FAILED) ${url2}`, e.message); throw(e); } finally { memoizer.writeFile = writeFile; } } else { this.warn(`loadJsonRest(FAILED) ${url}`, e.message); } throw e; } } loadJsonRestMaybe(url) { var that = this; var pbody = (resolve, reject) => { try { let result; let httpx = url.startsWith('https') ? https : http; if (++httpMonitor > 2) { // We are overwhelming SuttaCentralApi // implement throttling using Queue // (see abstract-tts.js) that.warn(`ScApi.loadJsonRestMaybe() `+ `httpMonitor:${httpMonitor} ${url}`); } that.info(`loadJsonRestMaybe() ${url}`); var req = httpx.get(url, res => { httpMonitor--; const { statusCode } = res; const contentType = res.headers['content-type']; let error; if (statusCode !== 200) { error = new Error(`Request Failed.\n` + `Status Code: ${statusCode}`); } else if (/^application\/json/.test(contentType)) { // OK } else if (/^text\/plain/.test(contentType)) { // OK } else { error = new Error( `Invalid content-type:${contentType}\n` + `Expected application/json for url:${url}`); } if (error) { // consume response data to free up memory res.resume(); reject(error); return; } res.setEncoding('utf8'); let rawData = ''; res.on('data', (chunk) => { rawData += chunk; }); res.on('end', () => { try { result = JSON.parse(rawData); that.info(`loadJsonRestMaybe()`, `${url} => ${rawData.length}C`); resolve(result); } catch (e) { reject(e); } }); }).on('error', (e) => { that.warn(`loadJsonRestMaybe(ERROR)`, e.message); httpMonitor--; reject(e); }).on('timeout', (e) => { that.warn(`loadJsonRestMaybe(TIMEOUT)`, e&&e.message); req.abort(); reject(e); }); } catch(e) { reject(e); }}; return new Promise(pbody); } static async loadUidExpansion(url=UID_EXPANSION_URL) { try { if (fs.existsSync(UID_EXPANSION_PATH)) { return JSON.parse(await fs.promises.readFile(UID_EXPANSION_PATH)); } else { var res = await ScApi.loadJson(url); logger.info(`${url}`); fs.writeFileSync(UID_EXPANSION_PATH, JSON.stringify(res,null,2)); return res; } } catch(e) { logger.error(`${url} ${e.message}`); throw e; }} static async loadExpansion(apiUrl=DEFAULT_API_URL) { try { var url = `${apiUrl}/expansion`; if (fs.existsSync(EXPANSION_PATH)) { let buf= await fs.promises.readFile(EXPANSION_PATH); return JSON.parse(buf); } else { var res = await ScApi.loadJson(url); logger.info(`${url}`); fs.writeFileSync(EXPANSION_PATH, JSON.stringify(res,null,2)); return res; } } catch(e) { logger.error(`${url}`, e); throw e; }} async initialize() { try { if (this.initialized === false) { throw new Error("initialize() in progress"); } if (this.initialized === true) { return this; } this.initialized = false; this.log(`initialize() apiUrl:${this.apiUrl}`); this.expansion = await ScApi.loadExpansion(this.apiUrl); this.uid_expansion = await ScApi.loadUidExpansion(); this.initialized = true; return this; } catch(e) { this.warn(e); throw e; }} expandAbbreviation(abbr) { if (!this.initialized) { throw new Error('initialize() must be called'); } return this.expansion[0][abbr]; } suttaFromHtml(html, opts={}) { if (!this.initialized) { throw new Error('initialize() must be called'); } var lang = opts.lang || 'en'; var author_uid = opts.author_uid || 'sujato'; this.info(`suttaFromHtml(${JSON.stringify(opts)})`); var apiText = Object.assign({ lang, uid: "uid?", }, opts); apiText.translation = Object.assign({ title: "title?", text: html, lang, author_uid, }, opts.translation); apiText.suttaplex = Object.assign({ uid: "suttaplex.uid?", root_lang: "pli", original_title: "suttaplex.original_title?", }, opts.suttaplex); return this.suttaFromApiText(apiText); } suttaFromApiText(apiJson) { if (!this.initialized) { throw new Error('initialize() must be called'); } var { translation, segmented, suttaplex, } = apiJson; var lang = translation.lang; var uid = suttaplex.uid; var author_uid = translation.author_uid; var html = translation.text.trim(); var debug = 0; var msStart = Date.now(); var resultAside = (/<aside/um).exec(html); if (resultAside) { let start = html.indexOf('>', resultAside.index)+1; let end = html.indexOf('</aside', start); var metaarea = html.substring(start, end); let reAside = new RegExp('<aside[^]*</aside>', 'gum'); html = html.replace(reAside, ''); } else { var metaArea = ''; } var iH = html.match(/<h[0-9]/um); var iP = html.indexOf('<p'); if (iH >= 0) { html = html.replace(/[^]*?(<h[0-9][^>]*)>/um, '$1'); } else { html = html.replace(/[^]*?<p[^>]*>/um, '<p>'); } html = html.replace(/<\/?div[^>]*>\n*/gum,''); html = html.replace(/<\/?blockquote[^>]*>\n*/gum,''); html = html.replace(/<\/?br[^>]*>\n*/gum,' '); var ipLast = html.lastIndexOf('</p>'); var pEnd = '</p>'; var ipEnd = html.lastIndexOf(pEnd); ipEnd >= 0 && (html = html.substring(0, ipEnd+pEnd.length)); var lines = html.split('\n'); var debug1 = 0; var debug2 = debug1 + 10; if (debug) { lines.slice(debug1, debug2).forEach((l,i) => { var head = l.substring(0,40); var tail = l.substring(l.length-10); console.log(`${i+debug1} ${head}...${tail}"`) }); } var section = 1; if (html.indexOf('id="sc') >= 0) { } else { } var id = '.0'; var textSegments = lines.map((line,i) => { if (line.indexOf('id="sc') > 0) { id = line.replace(/.*"sc([0-9]+)[^]*/u,'.$1'); } if (i) { section = line.match(/^<h[2-9]/u) ? section+1 : section; } var scid = `${uid}:${section}${id}.${i+1}`; line = line.replace(/<\/?[^>]*>/gu,''); return { scid, [lang]: line, } }); if (debug) { console.log('elapsed', ((Date.now() - msStart)/1000).toFixed(3)); textSegments.slice(debug1, debug2).forEach((seg,i) => { var l = seg.en; var len = Math.min(20,l.length/2-1); console.log(`${i+debug1} ${seg.scid} "` + l.substring(0,len)+ '...'+ `${l.substring(seg.en.length-len)}"`) }); } var collId = uid.replace(/[0-9.-]+/u, ''); var collNum = uid.replace(/[a-z]*/iu, ''); var collNames = this.expandAbbreviation(collId); var collName = collNames && collNames[collNames.length-1]; var headerSegments = [{ scid:`${uid}:0.1`, [lang]: `${collName || collId} ${collNum}`, },{ scid:`${uid}:0.2`, [lang]: `${translation.title}`, [suttaplex.root_lang]: `${suttaplex.original_title}`, }]; var segments = headerSegments.concat(textSegments); var suttaRef = `${uid}/${lang}/${author_uid}`; this.info(`suttaFromApiText(${suttaRef}) `+ `segs:${segments.length}`); return { author_uid, sutta_uid: uid, lang, segmented, metaarea, segments, translation, }; } normalizeScid(id) { // DEPRECATED if (id == null) { throw new Error('Sutta reference identifier is required'); } var scid = SuttaCentralId.normalizeSuttaId(id); if (scid == null) { throw new Error(`Keyword search is not yet implemented:${id}`); } return { support: Definitions.SUPPORT_LEVELS.Legacy, scid, }; } async loadSuttaJson(id, language, translator) { try { var { scid, support, } = this.normalizeScid({ scid: id, language, translator, }); var apiSuttas = `${this.apiUrl}/suttas`; var request = `${apiSuttas}/${scid}`; if (translator && translator !== ANY_TRANSLATOR) { request += `/${translator}`; } if (language && language !== ANY_LANGUAGE) { request += `?lang=${language}`; } this.debug(`loadSuttaJson() ${request}`); var result = await this.loadJson(request); result.support = support; var suttaplex = result.suttaplex; var translations = suttaplex && suttaplex.translations; if (translations == null || translations.length === 0) { throw new Error(`loadSuttaJson() no sutta found for id:${scid}`); } if (translations && language && language !== ANY_LANGUAGE) { suttaplex.translations = translations.filter(t => t.lang === language); } return result; } catch(e) { this.warn(`loadSuttaJson()`, {id,language,translator}, e); throw e; }} async loadSuttaplexJson(scid, lang, author_uid) { try { let that = this; let sutta_uid = SuttaCentralId.normalizeSuttaId(scid); let request = `${this.apiUrl}/suttaplex/${sutta_uid}`; let result = await that.loadJsonRest(request); var splx = JSON.parse(JSON.stringify(result[0])); // copy if (!splx) { throw new Error(`loadSuttaplexJson() no suttaplex`); } var translations = splx && splx.translations; if (translations == null || translations.length === 0) { throw new Error(`loadSuttaplexJson() no translations`); } if (lang || author_uid) { splx.translations = translations.filter(t => (!lang || lang === ANY_LANGUAGE || t.lang === lang) && (!author_uid || t.author_uid === author_uid) ); translations.sort((a,b) => { if (a.segmented === b.segmented) { return (a.author_uid||'').localeCompare(b.author_uid||''); } return a.segmented ? 1 : -1; }); this.debug(`ScApi.loadSuttaplexJson`+ `(${scid}, ${lang}, ${author_uid}) `+ `${JSON.stringify(splx,null,2)}`); } return splx; } catch(e) { this.warn(`loadSuttaplexJson()`, {scid, lang, author_uid}, e); throw e; }} async loadSutta(...args) { console.trace(`deprecated`); return this.loadLegacySutta(...args); } async loadLegacySutta(...args) { try { if (typeof args[0] === "string") { var opts = { scid: args[0], language: args[1] || this.language, translator: args[2] || this.translator, } } else { opts = args[0]; } var sutta_uid = SuttaCentralId.normalizeSuttaId(opts.scid); var language = opts.language; var author_uid = opts.translator; var suttaplex = await this.loadSuttaplexJson( sutta_uid, language, author_uid); var translations = suttaplex.translations; if (translations == null || translations.length == 0) { this.log(`loadSutta(${sutta_uid},${language}) => no translations`); return null; } author_uid = translations[0].author_uid; var result = await this.loadSuttaJson(sutta_uid, language, author_uid); if (result.translation == null && translations.length>0) { var trans = translations.filter(t=>t.segmented)[0]; if (trans == null) { this.info([ `loadSutta() ${sutta_uid}/${language}/${author_uid}`, `=> legacy unsegmented text`, ].join(' ')); trans = translations[0]; } var { author_uid, lang, } = trans; var uid = result.suttaplex.uid; // multiple translations found, using first var result = await this.loadSuttaJson(uid, lang, author_uid); } var translation = result.translation; if (translation) { var author_uid = translation.author_uid; if (translation.text) { var sutta = this.suttaFromApiText(result); } else { var rootStrings = result.root_text.strings || {}; var segObj = {}; //console.log(JSON.stringify(result.rootText, null,2)); Object.keys(rootStrings).forEach(scid => { segObj[scid] = segObj[scid] || { scid }; segObj[scid].pli = rootStrings[scid]; segObj[scid].en = ""; }); var transStrings = translation.strings || {}; Object.keys(transStrings).forEach(scid => { segObj[scid] = segObj[scid] || { scid }; var text = transStrings[scid]; text = text.replace(/<\/?i>/gum, ''); segObj[scid][translation.lang] = text; }); var segments = Object.keys(segObj) .map(scid => segObj[scid]); var sutta = { sutta_uid: result.suttaplex.uid, segmented: result.segmented, segments, translation, }; } sutta.author_uid = translation.author_uid; sutta.suttaplex = result.suttaplex; let LEGACY_PROPS = [ 'acronym', 'original_title', 'translations', ]; for (let lp of LEGACY_PROPS) { if (!sutta.hasOwnProperty(lp)) { let value = sutta.suttaplex[lp]; let dbgValue = value instanceof Array ? value.map(v=>v.id) : value; Object.defineProperty(sutta, lp, { get: ()=>{ console.log(`DEPRECATED: loadLegacySutta.${lp}:`, dbgValue); return value; } }); } } sutta.segments = sutta.segments || []; return sutta; } else { // no unique translation return result; } } catch(e) { this.warn(`loadSutta() args:`, JSON.stringify(args), e); throw e; }} } module.exports = exports.ScApi = ScApi; })(typeof exports === "object" ? exports : (exports = {}));