@ifct2017/about
Version:
On the history of malnutrition, current status, and data details.
246 lines (228 loc) • 5.69 kB
JavaScript
const T = {
TEXT: 0x00,
NORMAL: 0x01,
QUOTED: 0x02,
NUMBER: 0x10,
CARDINAL: 0x11,
ORDINAL: 0x12,
UNIT: 0x20,
MASS: 0x21,
ENTITY: 0x30,
TABLE: 0x31,
COLUMN: 0x32,
ROW: 0x33,
BRACKET: 0x40,
OPEN: 0x41,
CLOSE: 0x42,
OPERATOR: 0x50,
UNARY: 0x51,
BINARY: 0x52,
TERNARY: 0x53,
FUNCTION: 0x60,
KEYWORD: 0x70,
EXPRESSION: 0x80,
VALUE: 0x81,
BOOLEAN: 0x82,
};
const DECIMAL = new Set(['dot', 'point', 'decimal']);
const SPECIAL = new Map([
['infinity', Infinity],
['infinite', Infinity],
['inf', Infinity],
['∞', Infinity],
['not-a-number', NaN],
['not-number', NaN],
['nan', NaN]
]);
const CARDINAL = new Map([
['oh', 0],
['nil', 0],
['zero', 0],
['nought', 0],
['naught', 0],
['one', 1],
['two', 2],
['three', 3],
['four', 4],
['five', 5],
['six', 6],
['seven', 7],
['eight', 8],
['nine', 9],
['ten', 10],
['eleven', 11],
['twelve', 12],
['thirteen', 13],
['fourteen', 14],
['fifteen', 15],
['sixteen', 16],
['seventeen', 17],
['eighteen', 18],
['nineteen', 19],
['twenty', 20],
['thirty', 30],
['forty', 40],
['fifty', 50],
['sixty', 60],
['seventy', 70],
['eighty', 80],
['ninety', 90],
['hundred', 1e+2],
['thousand', 1e+3],
['lakh', 1e+5],
['million', 1e+6],
['crore', 1e+7],
['billion', 1e+9],
['trillion', 1e+12],
['quadrillion', 1e+15],
['quintillion', 1e+18],
['sextillion', 1e+21],
['septillion', 1e+24],
['octillion', 1e+27],
['nonillion', 1e+30],
['decillion', 1e+33]
]);
const ORDINAL = new Map([
['zeroth', 0],
['first', 1],
['second', 2],
['third', 3],
['fourth', 4],
['fifth', 5],
['sixth', 6],
['seventh', 7],
['eighth', 8],
['ninth', 9],
['tenth', 10],
['eleventh', 11],
['twelfth', 12],
['thirteenth', 13],
['fourteenth', 14],
['fifteenth', 15],
['sixteenth', 16],
['seventeenth', 17],
['eighteenth', 18],
['nineteenth', 19],
['twentieth', 20],
['thirtieth', 30],
['fortieth', 40],
['fiftieth', 50],
['sixtieth', 60],
['seventieth', 70],
['eightieth', 80],
['ninetieth', 90],
['hundredth', 1e+2],
['thousandth', 1e+3],
['lakhth', 1e+5],
['millionth', 1e+6],
['croreth', 1e+7],
['billionth', 1e+9],
['trillionth', 1e+12],
['quadrillionth', 1e+15],
['quintillionth', 1e+18],
['sextillionth', 1e+21],
['septillionth', 1e+24],
['octillionth', 1e+27],
['nonillionth', 1e+30],
['decillionth', 1e+33]
]);
function trailingZeros(num) {
for (var a=0, num=Math.floor(num); num%10===0; a++)
num = Math.floor(num/10);
return a;
}
function digitCount(num) {
return num>0? Math.floor(Math.log10(num)) + 1 : 1;
}
function round(num) {
var p = 10 ** (15 - digitCount(num));
return Math.round(num*p)/p;
}
function merge(n1, n2) {
var a1 = n2!==0? trailingZeros(n1) : 0;
var d2 = digitCount(n2);
return a1<d2? n1*(10**d2) + n2 : n1 + n2;
}
function mergeAll(arr) {
var l = arr.length;
var a = l>0? arr[l-1] : 0;
for (var i=l-2; i>=0; i--)
a = merge(arr[i], a);
return a;
}
function addExp(arr, exp) {
var l = arr.length;
var a = l>0? arr[l-1] : 0;
for (var i=l-2; i>=0 && arr[i]<=a*exp; i--)
a = merge(arr[i], a);
arr[i+1] = round(a*exp);
arr.length = i+2;
return arr;
}
function process(s, txt) {
var l = s.arr.length;
var has = true, v = NaN;
if (CARDINAL.has(txt)) v = CARDINAL.get(txt);
else if (ORDINAL.has(txt)) v = ORDINAL.get(txt);
else { v = parseFloat(txt); has = false; }
if (Number.isNaN(v)) { s.end = true; return false; }
if (!has || l===0 || v<100 || v<s.exp) {
if (s.exp>0) { s.arr[l] = v; s.exp = has && v>=100? v : 0; }
else s.arr[l-1] = merge(s.arr[l-1], v);
}
else { addExp(s.arr, v); s.exp = v; }
s.end = s.ord = ORDINAL.has(txt);
return true;
}
function has(s) {
return s.end && s.arr.length>0;
}
function get(s) {
var a = mergeAll(s.arr);
s.arr.length = 0;
s.end = false;
s.ord = false;
s.exp = 1;
return a;
}
function decimal(s, dec, pre) {
var type = s.ord? T.ORDINAL : T.CARDINAL, v = get(s);
var value = round(dec? pre + v*10**(-digitCount(v)) : v);
return {type, value};
}
function number(tkns) {
var dec = false, pre = NaN, p = false, a = [];
var s = {arr: [], end: false, ord: false, exp: 1};
for(var tkn of tkns) {
var txt = tkn.type===T.TEXT? tkn.value.toLowerCase().replace(/[\s,]/g, '') : null;
if (txt!=null && (p = process(s, txt)) && !s.end) continue;
if (DECIMAL.has(txt)) { pre = get(s); dec = true; p = true; }
else if (dec || has(s)) { a.push(decimal(s, dec, pre)); dec = false; pre = NaN; }
if (SPECIAL.has(txt)) { a.push({type: T.CARDINAL, value: SPECIAL.get(txt)}); p = true; }
if (!p) a.push(tkn);
}
return a;
}
function token(type, value) {
return {type, value};
}
function tokenize(txt) {
var quo = null, y = '', a = [];
for (var c of txt) {
if ((quo!=null && quo!=c) || /\w/.test(c)) { y += c; continue; }
if (y) { a.push(token(quo!=null? T.QUOTED : T.TEXT, y)); y = ''; }
if (/[\'\"\`]/.test(c)) quo = quo==null? c:null;
else if (/\S/g.test(c)) a.push(token(T.TEXT, c));
}
if (y) a.push(token(quo!=null? T.QUOTED : T.TEXT, y));
return a;
}
function nlp(txt) {
var tkns = tokenize(txt), a = '';
tkns.push(token(T.TEXT, ''));
tkns = number(tkns);
for (var tkn of tkns)
a += tkn.value+' ';
return a.trim();
}
module.exports = nlp;