UNPKG

concepts-parser

Version:
254 lines (253 loc) 8.96 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const base_1 = require("./base"); const utils = require("../utils"); const concepts_1 = require("../concepts"); const POINT = "."; const SPACE = " "; class Parser extends base_1.BaseParser { parse(context) { const input = context.text; let p = 0; let pivot = 0; let self = this; let start = 0; const concepts = new concepts_1.Concepts(context); function addConcept(input, i, start) { let concept = self.formatConcept(context, input, i, start); p = 0; start = 0; concepts.add(concept); } for (let i = 0; i < input.length; i++) { let c = input[i]; switch (p) { case 0: if (utils.isLetter(c)) { start = i; p = utils.isUpper(c) ? 1 : 11; } pivot = 0; break; case 1: if (c === POINT) { p = 21; } else if (utils.isLower(c)) { p = 2; } else if (utils.isUpper(c) || utils.isDigit(c) || self.isInConnectChars(c)) { p = 31; } else if (c === SPACE && self.isLowerStartUpperWord(input, i)) { p = 2; } else { if (i - start < 3) { p = 0; } else { addConcept(input, i + 1, start); } } break; case 2: if (i === input.length - 1) { addConcept(input, i + 2, start); } else if (utils.isLetterOrDigit(c)) { p = 2; } else if (self.isInConnectChars(c)) { p = -2; } else if (c === SPACE) { p = 3; } else { if (i - start < 3) { p = 0; } else { addConcept(input, i + 1, start); } } break; case -2: if (utils.isLetterOrDigit(c)) { p = 2; } else { let prefix = input.substr(start, i - start); if (!self.isInPrefixes(prefix.toLowerCase())) { addConcept(input, i, start); } } break; case 3: if (utils.isDigit(c)) { p = 41; } else if (utils.isLower(c) || self.isInConceptWords(c)) { if (self.isLowerStartUpperWord(input, i)) { p = 1; } else { p = 4; pivot = i; } } else if (utils.isUpper(c)) { p = 1; } else if (self.isInStartQuotes(c)) { p = 51; pivot = i; } else { addConcept(input, i, start); } break; case 4: let startInput = input.substr(pivot); let startConnectWord = self.getStartConceptWord(startInput); if (startConnectWord) { p = 5; i += startConnectWord.length - 1; continue; } if (p === 4) { addConcept(input, pivot, start); pivot = 0; } break; case 5: if (utils.isUpper(c)) { p = 1; } else { addConcept(input, pivot, start); pivot = 0; } break; case 11: if (utils.isLower(c) || self.isInConnectChars(c)) { p = 11; } else if (utils.isUpper(c)) { p = 31; } else { p = 0; } break; case 21: if (utils.isUpper(c)) { p = 22; } else { p = 3; } break; case 22: if (c === POINT) { p = 21; } else if (utils.isLetter(c)) { p = 2; } else { p = 0; } break; case 31: if (c === SPACE) { p = 32; } else if (utils.isLetterOrDigit(c)) { p = 31; } else if (self.isInConnectChars(c)) { p = -31; } else { addConcept(input, i + 1, start); } break; case -31: if (utils.isLetterOrDigit(c)) { p = 31; } else { addConcept(input, i, start); } break; case 32: if (utils.isUpper(c)) { p = 1; } else if (utils.isDigit(c)) { p = 41; } else if (self.isInStartQuotes(c)) { p = 51; pivot = i; } else { addConcept(input, i, start); } break; case 41: if (!utils.isLetterOrDigit(c)) { if (["-", ":"].indexOf(c) >= 0) { let spacei = input.substr(start, i - start).lastIndexOf(" "); if (spacei > 1) { addConcept(input, spacei + start + 1, start); } else { start = p = 0; } } else { addConcept(input, i + 1, start); } } break; case 51: if (utils.isUpper(c)) { p = 52; } else { addConcept(input, pivot, start); } break; case 52: if (c === SPACE) { p = 53; } else if (utils.isLetterOrDigit(c) || self.isInConnectChars(c)) { p = 52; } else if (self.isInEndQuotes(c)) { addConcept(input, i + 2, start); } else { addConcept(input, pivot, start); } break; case 53: if (utils.isUpper(c)) { p = 52; } else { addConcept(input, pivot, start); } break; } } return concepts; } } exports.Parser = Parser;