UNPKG

@nvif1989/groups

Version:

Categorization of food by their common names.

81 lines (70 loc) 2.07 kB
const Sql = require('sql-extra'); const lunr = require('lunr'); const path = require('path'); var corpus = new Map(); var ranges = new Map(); var exacts = new Map(); var index = null; var ready = false; function loadCorpus() { for(var [k, v] of require('./corpus')) corpus.set(k, v); }; function setupIndex() { index = lunr(function() { this.ref('code'); this.field('code'); this.field('group'); this.field('tags'); this.pipeline.remove(lunr.stopWordFilter); for(var r of corpus.values()) this.add(r); }); }; function setupRanges() { for(var {code, entries} of corpus.values()) { for(var entry of entries.split(';')) { if(!entry.includes('-')) exacts.set(entry, code); else ranges.set(entry.split('-').map(v => parseInt(v, 10)), code); } } } function csv() { return path.join(__dirname, 'index.csv'); }; function sql(tab='groups', opt={}) { return Sql.setupTable(tab, {code: 'TEXT', group: 'TEXT', entries: 'INT', tags: 'TEXT'}, require('./corpus').values(), Object.assign({pk: 'code', index: true, tsvector: {code: 'A', group: 'B', tags: 'C'}}, opt)); }; function load() { if(ready) return true; loadCorpus(); setupIndex(); setupRanges(); return ready = true; }; function findEntry(entry) { if(exacts.has(entry)) return exacts.get(entry); var n = parseInt(entry, 10); for(var [[bgn, end], code] of ranges) if(bgn<=n && n<=end) return code; return null; } function groups(txt) { if(index==null) return []; var z = [], txt = txt.replace(/\W/g, ' '); var mats = index.search(txt), max = 0; for(var mat of mats) max = Math.max(max, Object.keys(mat.matchData.metadata).length); for(var mat of mats) if(Object.keys(mat.matchData.metadata).length===max) z.push(corpus.get(mat.ref)); var code = findEntry(txt); if(!code) return z; var mat = corpus.get(code); if(z.includes(mat)) z.splice(z.indexOf(math), 1); z.unshift(mat); return z; }; groups.csv = csv; groups.sql = sql; groups.load = load; groups.corpus = corpus; module.exports = groups;