UNPKG

scv-bilara

Version:

SuttaCentral bilara-data library

397 lines (374 loc) • 13.1 kB
(function(exports) { const fs = require('fs'); const path = require('path'); const { logger } = require('log-instance'); const { Files } = require('memo-again'); const { AuthorsV2, BilaraPath, SuttaCentralId } = require("scv-esm"); const STUBFILESIZE = 5; class BilaraPathMap { constructor(opts = {}) { (opts.logger || logger).logInstance(this); this.root = opts.root || path.join(Files.LOCAL_DIR, "ebt-data"); let rootDir = path.dirname(this.root); let rootName = this.root.replace(rootDir,'').substring(1); this.rootLang = opts.rootLang || 'pli'; this.rootAuthor = opts.rootAuthor || 'ms'; if (opts.suidMapFile) { this.suidMapFile = opts.suidMapFile; } else if ( rootName === 'ebt-data') { this.suidMapFile = path.join(__dirname, "auto", "suidmap.json"); } else { this.suidMapFile = path.join(rootDir, `suidmap-${rootName}.json`); } this.validatePath = opts.validatePath || ((key,value,suid)=>true); this.publication = opts.publication; this.initialized = false; } static get ALL_TYPES() { return [ 'root', 'translation', 'html', 'variant', 'reference', 'comment', ]}; static langAuthorRegExp(lang, author) { return lang && author ? new RegExp(`-${lang}-${author}.json`, "i") : undefined; } async initialize() { try { var { suidMapFile } = this; let suidMap; if (fs.existsSync(suidMapFile)) { try { suidMap = JSON.parse( await fs.promises.readFile(suidMapFile)) } catch (e) { this.warn(`initialize() ${e.message} => rebuilding ${suidMapFile}`); } } if (!suidMap) { this.info(`initialize() building ${suidMapFile}`); suidMap = this.buildSuidMap(); } this.suidMap = await suidMap; this.initialized = true; return this; } catch(e) { this.warn(`initialize()`, suidMapFile, e.message); throw e; }} async tipitakaPaths(opts={}) { try { var { root, rootLang, rootAuthor, } = this; let { lang=rootLang, author=rootAuthor, } = opts; var takaPath = path.join(root, lang===rootLang ? 'root' : 'translation', lang, author, '/', ); var readOpts = {withFileTypes:true}; var exclude = [ '[0-9]', 'playground', // Blake ].join('|'); var reExclude = new RegExp(`(${exclude})`,"ui"); let pathStack = []; let pathList = []; if (fs.existsSync(takaPath)) { pathStack.push(takaPath); } while (pathStack.length) { let dirPath = pathStack.pop(); if (reExclude.test(dirPath)) { this.info(`tipitakaPaths() exclude:`, dirPath); continue; } var dirKids = fs.readdirSync(dirPath, readOpts); let relativePath = dirPath.replace(takaPath, ''); relativePath && pathList.push(relativePath); for (var i = 0; i < dirKids.length; i++) { var e = dirKids[i]; if (e.isDirectory()) { let kidPath = path.join(dirPath, e.name); pathStack.push(kidPath); } } } return pathList.sort(); } catch(e) { this.warn(`tipitakaPaths()`, e.message); throw e; }} bilaraPaths(opts={}) { const msg = 'BilaraPathMap.bilaraPaths()'; const dbg = 0; if (typeof opts === 'string') { opts = { suid: opts }; } var { suid, lang, author, types, refAuthor, } = opts; types = types || ['root','translation']; var reTypes = new RegExp(`^(${types.join('|')})`, 'u'); var paths = this.suidPaths(suid) || []; dbg && console.log(msg, {paths}); var bps = Object.keys(paths).reduce((a,k) => { var bp = paths[k]; reTypes.test(bp) && a.push(bp); return a; },[]); if (typeof lang === 'string') { let re = new RegExp(`/${lang}/`,'u'); bps = bps.filter(bp => re.test(bp)); } else if (lang instanceof Array) { let re = new RegExp(`/(${lang.join('|')})/`,'u'); bps = bps.filter(bp => re.test(bp)); } if (author) { let re = new RegExp(`.*-${author}.json`,'u'); bps = bps.filter(bp => re.test(bp)); } return bps.map(bp => new BilaraPath(bp));; } transPath(rootPath, lang, author) { const msg = "BilaraPathMap.transPat()"; const dbg = 0; let { root } = this; let bilaraPath = rootPath .replace(/root/g, 'translation') .replace(/pli/g, lang) .replace(/ms/g, author); let fullPath = path.join(root, bilaraPath); if (!fs.existsSync(fullPath)) { dbg && console.log(msg, '[1]!existsSync', {fullPath}); return null; } let stats = fs.statSync(fullPath); if (stats.size < 5) { dbg && console.log(msg, '[2]!size', {fullPath}); return null; } dbg && console.log(msg, '[3]ok', bilaraPath); return bilaraPath; } trilingualPaths(opts={}) { const msg = "BilaraPathMap.trilingualPaths() "; const dbg = 0; let { suid, rootLang = 'pli', rootAuthor = 'ms', refLang, refAuthor, docLang, docAuthor, } = opts; let { root } = this; if (!refAuthor && !refLang) { refLang = 'en'; refAuthor = 'sujato'; } let pathMap = this.suidPaths(suid); let paths = Object.keys(pathMap).reduce((a,k)=>{ a.push(pathMap[k]); return a; }, []); let { langAuthorRegExp } = BilaraPathMap; let reRoot = langAuthorRegExp(rootLang, rootAuthor); let mldPaths = paths.filter(p=>reRoot && reRoot.test(p)); let rootPath = mldPaths[0]; if (rootPath) { let docPath = this.transPath(rootPath, docLang, docAuthor); let refPath = this.transPath(rootPath, refLang, refAuthor); if (docPath) { mldPaths.push(docPath); } if (refPath && refPath !== docPath) { mldPaths.push(refPath); } dbg && console.log(msg, mldPaths); } dbg && console.log(msg, {mldPaths}); return mldPaths; } suidPaths(suid) { if (!this.initialized) { throw new Error(`initialize() has not been called`); } if (!suid) { throw new Error('BilaraPathMap.suidPaths() suid is required'); } var suidParts = suid.split('/'); var key = suidParts[0]; let map = this.suidMap[key]; return map && Object.keys(map).reduce((a,k) => { let v = map[k]; let kParts = k.split('/'); let vParts = v.split('/'); let suidParts = suid.split('/'); a[k] = `${k}/${v}/${suidParts[0]}_${kParts.join('-')}.json`; return a; }, {}); } suidLanguages(suid) { let suidPaths = this.suidPaths(suid); return !suidPaths ? undefined : Object.keys(suidPaths) .sort() .reduce((a,k)=>{ let bilaraPath = suidPaths[k]; if (/^(translation|root)/.test(bilaraPath)) { let [type,lang,author,category,nikaya] = bilaraPath.split('/'); a.push({ suid: suid.split('/')[0], lang, author, category, nikaya, bilaraPath, }); } return a; },[]); } suidPath(suid) { if (!suid) { throw new Error('BilaraPathMap.suidPath() suid is required'); } var pathInfo = this.suidPaths(suid); var suidParts = suid.split('/'); var key = suidParts.length === 1 ? `root/pli/ms` : `translation/${suidParts[1]}/${suidParts[2]}`; return pathInfo && pathInfo[key]; } async _loadPaths(key) { try { var { root, suidMap, validatePath, publication, } = this; var keyRoot = path.join(root, key); var rootPrefix = `${root}/`; var readOpts = {withFileTypes:true}; var exclude = [ 'abhidhamma', // (later) '\bma\b', // Chinese '\bsa\b', // Chinese 'site', 'sc-page', 'blurb', 'name', // metadata 'playground', // Blake ].join('|'); var reExclude = new RegExp(`(${exclude})`,"ui"); var traverse = (dirPath)=>{ if (reExclude.test(dirPath)) { this.debug(`_loadPaths(${key}) exclude:`, dirPath); return; } var dirKids = fs.readdirSync(dirPath, readOpts); for (var i = 0; i < dirKids.length; i++) { var e = dirKids[i]; if (e.isDirectory()) { let kidPath = path.join(dirPath, e.name); traverse(kidPath); } else if (/.*json$/u.test(e.name)) { var ePath = path.join(dirPath, e.name); var stat = fs.statSync(ePath); if (stat.size <= STUBFILESIZE) { // ignore stub file } else if (publication && !publication.isPublishedPath(ePath)) { this.debug(`skipping ${ePath}`); } else { let suid = e.name.replace(/_.*/,''); let suidPath = dirPath.replace(rootPrefix, '') + `/${e.name}`; let valueParts = suidPath .replace(new RegExp(`${key}/`),'') .split('/'); valueParts.pop(); let value = valueParts.join('/'); if (validatePath(key,value,suid)) { (suidMap[suid] = suidMap[suid] || {})[key] = value; } } } } }; let msStart = Date.now(); fs.existsSync(keyRoot) && traverse(keyRoot); return suidMap; } catch(e) { this.warn(`_loadPaths(${key})`, e.message); throw e; }} async buildSuidMap(opts={}) { let msStart = Date.now(); let { loadHtml = false, loadVariant = false, loadReference = false, loadComment = false, } = opts; var readdir = fs.promises.readdir; let suidMap = this.suidMap = {}; var transPath = path.join(this.root, "translation"); var rdOpts = {withFileTypes:true}; var langs = (await readdir(transPath, rdOpts)) .reduce((a,e)=> (e.isDirectory() ? [...a,e.name] : a),[]); for (var il=0; il < langs.length; il++) { var l = langs[il]; var authPath = path.join(transPath,l); var auths = (await readdir(authPath, rdOpts)) .reduce((a,e)=>(e.isDirectory() ? [...a,e.name] : a), []); for (var ia=0; ia < auths.length; ia++) { var key = `translation/${l}/${auths[ia]}`; if (/en\/(patton|comm-team)/.test(key)) { continue; // ignore lzh->en translations } await this._loadPaths(key); if (loadComment) { let key = `comment/${l}/${auths[ia]}`; await this._loadPaths(key); } } } suidMap = Object.keys(suidMap) .sort(SuttaCentralId.compareLow) .reduce((a,k) =>{ a[k] = suidMap[k]; return a; }, {}); this.suidMap = suidMap; await this._loadPaths("root/pli/ms"); loadHtml && await this._loadPaths("html/pli/ms"); loadReference &&await this._loadPaths("reference/pli/ms"); loadVariant && await this._loadPaths("variant/pli/ms"); let { suidMapFile } = this; await fs.promises.writeFile(suidMapFile, JSON.stringify(suidMap, null, '\t')); this.info(`buildSuidMap() ${suidMapFile} ${Date.now()-msStart}ms`); return suidMap; } } module.exports = exports.BilaraPathMap = BilaraPathMap; })(typeof exports === "object" ? exports : (exports = {}));