UNPKG

string-punctuation-tokenizer

Version:

Small library that provides functions to tokenize a string into an array of words with or without punctuation

239 lines (192 loc) 23.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.classifyTokens = exports.tokenize = exports.tokenizeOrigLang = exports.number_ = exports.greedyNumber = exports.number = exports.whitespace = exports.punctuation = exports.origGreedyWord = exports.greedyWord = exports.origWord = exports.word = exports._greedyNumber = exports._origGreedyWord = exports._greedyWord = exports._origWordOrNumber = exports._wordOrNumber = exports._number = exports._origWord = exports._word = void 0; var _xregexp = _interopRequireDefault(require("xregexp")); var _occurrences2 = require("./occurrences"); var _normalizers = require("./normalizers"); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } // constants var _word = "[\\pL\\pM\\u200D\\u2060]+"; // TRICKY: original languages do not use single quotes so u2019 is considered part of a word exports._word = _word; var _origWord = "[\\pL\\pM\\u200D\\u2060\\u2019]+"; exports._origWord = _origWord; var _number = '[\\pN\\pNd\\pNl\\pNo]+'; exports._number = _number; var _wordOrNumber = '(' + _word + '|' + _number + ')'; exports._wordOrNumber = _wordOrNumber; var _origWordOrNumber = '(' + _origWord + '|' + _number + ')'; exports._origWordOrNumber = _origWordOrNumber; var _greedyWord = '(' + _wordOrNumber + '([-\'’]' + _word + ')+|' + _word + '’?)'; exports._greedyWord = _greedyWord; var _origGreedyWord = '(' + _origWordOrNumber + '([-\'’]' + _origWord + ')+|' + _origWord + '’?)'; exports._origGreedyWord = _origGreedyWord; var _greedyNumber = '(' + _number + '([:.,]?' + _number + ')+|' + _number + ')'; exports._greedyNumber = _greedyNumber; var word = (0, _xregexp["default"])(_word, ''); exports.word = word; var origWord = (0, _xregexp["default"])(_origWord, ''); exports.origWord = origWord; var greedyWord = (0, _xregexp["default"])(_greedyWord, ''); exports.greedyWord = greedyWord; var origGreedyWord = (0, _xregexp["default"])(_origGreedyWord, ''); exports.origGreedyWord = origGreedyWord; var punctuation = (0, _xregexp["default"])('(^\\p{P}|[<>]{2})', ''); exports.punctuation = punctuation; var whitespace = /\s+/; exports.whitespace = whitespace; var number = (0, _xregexp["default"])(_number); exports.number = number; var greedyNumber = (0, _xregexp["default"])(_greedyNumber); // /(\d+([:.,]?\d)+|\d+)/; exports.greedyNumber = greedyNumber; var number_ = (0, _xregexp["default"])(number); exports.number_ = number_; var tokenizeOrigLang = function tokenizeOrigLang(params) { return tokenize(_objectSpread({ parsers: { word: origWord, greedyWord: origGreedyWord, whitespace: whitespace, punctuation: punctuation, number: number } }, params)); }; /** * Tokenize a string into an array of words * @param {Object} params - string to be tokenized * @return {Array} - array of tokenized words/strings */ exports.tokenizeOrigLang = tokenizeOrigLang; var tokenize = function tokenize(_ref) { var _ref$text = _ref.text, text = _ref$text === void 0 ? '' : _ref$text, _ref$includeWords = _ref.includeWords, includeWords = _ref$includeWords === void 0 ? true : _ref$includeWords, _ref$includeNumbers = _ref.includeNumbers, includeNumbers = _ref$includeNumbers === void 0 ? true : _ref$includeNumbers, _ref$includePunctuati = _ref.includePunctuation, includePunctuation = _ref$includePunctuati === void 0 ? false : _ref$includePunctuati, _ref$includeWhitespac = _ref.includeWhitespace, includeWhitespace = _ref$includeWhitespac === void 0 ? false : _ref$includeWhitespac, _ref$includeUnknown = _ref.includeUnknown, includeUnknown = _ref$includeUnknown === void 0 ? false : _ref$includeUnknown, _ref$greedy = _ref.greedy, greedy = _ref$greedy === void 0 ? false : _ref$greedy, _ref$verbose = _ref.verbose, verbose = _ref$verbose === void 0 ? false : _ref$verbose, _ref$occurrences = _ref.occurrences, occurrences = _ref$occurrences === void 0 ? false : _ref$occurrences, _ref$parsers = _ref.parsers, parsers = _ref$parsers === void 0 ? { word: word, greedyWord: greedyWord, whitespace: whitespace, punctuation: punctuation, number: number } : _ref$parsers, _ref$normalize = _ref.normalize, normalize = _ref$normalize === void 0 ? false : _ref$normalize, _ref$normalizations = _ref.normalizations, normalizations = _ref$normalizations === void 0 ? null : _ref$normalizations; var string = text.slice(0); if (normalize) string = (0, _normalizers.normalizer)(string); if (normalize && normalizations) { string = (0, _normalizers.normalizerDestructive)(string, normalizations); } var greedyParsers = _objectSpread({}, parsers, { word: parsers.greedyWord, number: greedyNumber }); var _parsers = greedy ? greedyParsers : parsers; delete _parsers.greedyWord; var tokens = classifyTokens(string, _parsers, 'unknown'); var types = []; if (includeWords) types.push('word'); if (includeNumbers) types.push('number'); if (includeWhitespace) types.push('whitespace'); if (includePunctuation) types.push('punctuation'); if (includeUnknown) types.push('unknown'); tokens = tokens.filter(function (token) { return types.includes(token.type); }); if (occurrences) { tokens = tokens.map(function (token, index) { var _occurrences = (0, _occurrences2.occurrencesInTokens)(tokens, token.token); var _occurrence = (0, _occurrences2.occurrenceInTokens)(tokens, index, token.token); return _objectSpread({}, token, { occurrence: _occurrence, occurrences: _occurrences }); }); } if (verbose) { tokens = tokens.map(function (token) { delete token.matches; return token; }); } else { tokens = tokens.map(function (token) { return token.token; }); } return tokens; }; /** * Tiny tokenizer - https://gist.github.com/borgar/451393 * @param {String} string - string to be tokenized * @param {Object} parsers - { word:/\w+/, whitespace:/\s+/, punctuation:/[^\w\s]/ } * @param {String} deftok - type to label tokens that are not classified with the above parsers * @return {Array} - array of objects => [{ token:"this", type:"word" },{ token:" ", type:"whitespace" }, Object { token:"is", type:"word" }, ... ] **/ exports.tokenize = tokenize; var classifyTokens = function classifyTokens(string, parsers, deftok) { string = !string ? '' : string; // if string is undefined, make it an empty string if (typeof string !== 'string') { throw new Error("tokenizer.tokenize() string is not String: ".concat(string)); } var m; var r; var t; var tokens = []; while (string) { t = null; m = string.length; var key = void 0; for (key in parsers) { if (Object.prototype.hasOwnProperty.call(parsers, key)) { r = parsers[key].exec(string); // try to choose the best match if there are several // where "best" is the closest to the current starting point if (r && r.index < m) { var token = r[0]; t = { token: token, type: key, matches: r.slice(1) }; m = r.index; } } } if (m) { // there is text between last token and currently // matched token - push that out as default or "unknown" tokens.push({ token: string.substr(0, m), type: deftok || 'unknown' }); } if (t) { // push current token onto sequence tokens.push(t); } string = string.substr(m + (t ? t.token.length : 0)); } return tokens; }; exports.classifyTokens = classifyTokens; //# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJzb3VyY2VzIjpbIi4uL3NyYy90b2tlbml6ZXJzLmpzIl0sIm5hbWVzIjpbIl93b3JkIiwiX29yaWdXb3JkIiwiX251bWJlciIsIl93b3JkT3JOdW1iZXIiLCJfb3JpZ1dvcmRPck51bWJlciIsIl9ncmVlZHlXb3JkIiwiX29yaWdHcmVlZHlXb3JkIiwiX2dyZWVkeU51bWJlciIsIndvcmQiLCJvcmlnV29yZCIsImdyZWVkeVdvcmQiLCJvcmlnR3JlZWR5V29yZCIsInB1bmN0dWF0aW9uIiwid2hpdGVzcGFjZSIsIm51bWJlciIsImdyZWVkeU51bWJlciIsIm51bWJlcl8iLCJ0b2tlbml6ZU9yaWdMYW5nIiwicGFyYW1zIiwidG9rZW5pemUiLCJwYXJzZXJzIiwidGV4dCIsImluY2x1ZGVXb3JkcyIsImluY2x1ZGVOdW1iZXJzIiwiaW5jbHVkZVB1bmN0dWF0aW9uIiwiaW5jbHVkZVdoaXRlc3BhY2UiLCJpbmNsdWRlVW5rbm93biIsImdyZWVkeSIsInZlcmJvc2UiLCJvY2N1cnJlbmNlcyIsIm5vcm1hbGl6ZSIsIm5vcm1hbGl6YXRpb25zIiwic3RyaW5nIiwic2xpY2UiLCJncmVlZHlQYXJzZXJzIiwiX3BhcnNlcnMiLCJ0b2tlbnMiLCJjbGFzc2lmeVRva2VucyIsInR5cGVzIiwicHVzaCIsImZpbHRlciIsInRva2VuIiwiaW5jbHVkZXMiLCJ0eXBlIiwibWFwIiwiaW5kZXgiLCJfb2NjdXJyZW5jZXMiLCJfb2NjdXJyZW5jZSIsIm9jY3VycmVuY2UiLCJtYXRjaGVzIiwiZGVmdG9rIiwiRXJyb3IiLCJtIiwiciIsInQiLCJsZW5ndGgiLCJrZXkiLCJPYmplY3QiLCJwcm90b3R5cGUiLCJoYXNPd25Qcm9wZXJ0eSIsImNhbGwiLCJleGVjIiwic3Vic3RyIl0sIm1hcHBpbmdzIjoiOzs7Ozs7O0FBQUE7O0FBQ0E7O0FBQ0E7Ozs7Ozs7Ozs7QUFFQTtBQUNPLElBQU1BLEtBQUssR0FBRywyQkFBZCxDLENBQ1A7OztBQUNPLElBQU1DLFNBQVMsR0FBRyxrQ0FBbEI7O0FBQ0EsSUFBTUMsT0FBTyxHQUFHLHdCQUFoQjs7O0FBQ0EsSUFBTUMsYUFBYSxHQUFHLE1BQU1ILEtBQU4sR0FBYyxHQUFkLEdBQW9CRSxPQUFwQixHQUE4QixHQUFwRDs7OztBQUNBLElBQU1FLGlCQUFpQixHQUFHLE1BQU1ILFNBQU4sR0FBa0IsR0FBbEIsR0FBd0JDLE9BQXhCLEdBQWtDLEdBQTVEOzs7O0FBQ0EsSUFBTUcsV0FBVyxHQUFHLE1BQU1GLGFBQU4sR0FBc0IsU0FBdEIsR0FBa0NILEtBQWxDLEdBQTBDLEtBQTFDLEdBQWtEQSxLQUFsRCxHQUEwRCxLQUE5RTs7OztBQUNBLElBQU1NLGVBQWUsR0FBRyxNQUFNRixpQkFBTixHQUEwQixTQUExQixHQUFzQ0gsU0FBdEMsR0FBa0QsS0FBbEQsR0FBMERBLFNBQTFELEdBQXNFLEtBQTlGOzs7O0FBQ0EsSUFBTU0sYUFBYSxHQUFHLE1BQU1MLE9BQU4sR0FBZ0IsU0FBaEIsR0FBNEJBLE9BQTVCLEdBQXNDLEtBQXRDLEdBQThDQSxPQUE5QyxHQUF3RCxHQUE5RTs7O0FBQ0EsSUFBTU0sSUFBSSxHQUFHLHlCQUFRUixLQUFSLEVBQWUsRUFBZixDQUFiOztBQUNBLElBQU1TLFFBQVEsR0FBRyx5QkFBUVIsU0FBUixFQUFtQixFQUFuQixDQUFqQjs7QUFDQSxJQUFNUyxVQUFVLEdBQUcseUJBQVFMLFdBQVIsRUFBcUIsRUFBckIsQ0FBbkI7O0FBQ0EsSUFBTU0sY0FBYyxHQUFHLHlCQUFRTCxlQUFSLEVBQXlCLEVBQXpCLENBQXZCOztBQUNBLElBQU1NLFdBQVcsR0FBRyx5QkFBUSxtQkFBUixFQUE2QixFQUE3QixDQUFwQjs7QUFDQSxJQUFNQyxVQUFVLEdBQUcsS0FBbkI7O0FBQ0EsSUFBTUMsTUFBTSxHQUFHLHlCQUFRWixPQUFSLENBQWY7O0FBQ0EsSUFBTWEsWUFBWSxHQUFHLHlCQUFRUixhQUFSLENBQXJCLEMsQ0FBNkM7OztBQUM3QyxJQUFNUyxPQUFPLEdBQUcseUJBQVFGLE1BQVIsQ0FBaEI7OztBQUdBLElBQU1HLGdCQUFnQixHQUFHLFNBQW5CQSxnQkFBbUIsQ0FBQ0MsTUFBRDtBQUFBLFNBQVlDLFFBQVE7QUFDbERDLElBQUFBLE9BQU8sRUFBRTtBQUNQWixNQUFBQSxJQUFJLEVBQUVDLFFBREM7QUFFUEMsTUFBQUEsVUFBVSxFQUFFQyxjQUZMO0FBR1BFLE1BQUFBLFVBQVUsRUFBVkEsVUFITztBQUlQRCxNQUFBQSxXQUFXLEVBQVhBLFdBSk87QUFLUEUsTUFBQUEsTUFBTSxFQUFOQTtBQUxPO0FBRHlDLEtBUS9DSSxNQVIrQyxFQUFwQjtBQUFBLENBQXpCO0FBV1A7Ozs7Ozs7OztBQUtPLElBQU1DLFFBQVEsR0FBRyxTQUFYQSxRQUFXLE9BYWxCO0FBQUEsdUJBWkpFLElBWUk7QUFBQSxNQVpKQSxJQVlJLDBCQVpHLEVBWUg7QUFBQSwrQkFYSkMsWUFXSTtBQUFBLE1BWEpBLFlBV0ksa0NBWFcsSUFXWDtBQUFBLGlDQVZKQyxjQVVJO0FBQUEsTUFWSkEsY0FVSSxvQ0FWYSxJQVViO0FBQUEsbUNBVEpDLGtCQVNJO0FBQUEsTUFUSkEsa0JBU0ksc0NBVGlCLEtBU2pCO0FBQUEsbUNBUkpDLGlCQVFJO0FBQUEsTUFSSkEsaUJBUUksc0NBUmdCLEtBUWhCO0FBQUEsaUNBUEpDLGNBT0k7QUFBQSxNQVBKQSxjQU9JLG9DQVBhLEtBT2I7QUFBQSx5QkFOSkMsTUFNSTtBQUFBLE1BTkpBLE1BTUksNEJBTkssS0FNTDtBQUFBLDBCQUxKQyxPQUtJO0FBQUEsTUFMSkEsT0FLSSw2QkFMTSxLQUtOO0FBQUEsOEJBSkpDLFdBSUk7QUFBQSxNQUpKQSxXQUlJLGlDQUpVLEtBSVY7QUFBQSwwQkFISlQsT0FHSTtBQUFBLE1BSEpBLE9BR0ksNkJBSE07QUFBQ1osSUFBQUEsSUFBSSxFQUFKQSxJQUFEO0FBQU9FLElBQUFBLFVBQVUsRUFBVkEsVUFBUDtBQUFtQkcsSUFBQUEsVUFBVSxFQUFWQSxVQUFuQjtBQUErQkQsSUFBQUEsV0FBVyxFQUFYQSxXQUEvQjtBQUE0Q0UsSUFBQUEsTUFBTSxFQUFOQTtBQUE1QyxHQUdOO0FBQUEsNEJBRkpnQixTQUVJO0FBQUEsTUFGSkEsU0FFSSwrQkFGUSxLQUVSO0FBQUEsaUNBREpDLGNBQ0k7QUFBQSxNQURKQSxjQUNJLG9DQURhLElBQ2I7QUFDSixNQUFJQyxNQUFNLEdBQUdYLElBQUksQ0FBQ1ksS0FBTCxDQUFXLENBQVgsQ0FBYjtBQUNBLE1BQUlILFNBQUosRUFBZUUsTUFBTSxHQUFHLDZCQUFXQSxNQUFYLENBQVQ7O0FBQ2YsTUFBSUYsU0FBUyxJQUFJQyxjQUFqQixFQUFpQztBQUMvQkMsSUFBQUEsTUFBTSxHQUFHLHdDQUFzQkEsTUFBdEIsRUFBOEJELGNBQTlCLENBQVQ7QUFDRDs7QUFFRCxNQUFNRyxhQUFhLHFCQUNkZCxPQURjO0FBRWpCWixJQUFBQSxJQUFJLEVBQUVZLE9BQU8sQ0FBQ1YsVUFGRztBQUdqQkksSUFBQUEsTUFBTSxFQUFFQztBQUhTLElBQW5COztBQUtBLE1BQU1vQixRQUFRLEdBQUdSLE1BQU0sR0FBR08sYUFBSCxHQUFtQmQsT0FBMUM7O0FBQ0EsU0FBT2UsUUFBUSxDQUFDekIsVUFBaEI7QUFDQSxNQUFJMEIsTUFBTSxHQUFHQyxjQUFjLENBQUNMLE1BQUQsRUFBU0csUUFBVCxFQUFtQixTQUFuQixDQUEzQjtBQUNBLE1BQU1HLEtBQUssR0FBRyxFQUFkO0FBQ0EsTUFBSWhCLFlBQUosRUFBa0JnQixLQUFLLENBQUNDLElBQU4sQ0FBVyxNQUFYO0FBQ2xCLE1BQUloQixjQUFKLEVBQW9CZSxLQUFLLENBQUNDLElBQU4sQ0FBVyxRQUFYO0FBQ3BCLE1BQUlkLGlCQUFKLEVBQXVCYSxLQUFLLENBQUNDLElBQU4sQ0FBVyxZQUFYO0FBQ3ZCLE1BQUlmLGtCQUFKLEVBQXdCYyxLQUFLLENBQUNDLElBQU4sQ0FBVyxhQUFYO0FBQ3hCLE1BQUliLGNBQUosRUFBb0JZLEtBQUssQ0FBQ0MsSUFBTixDQUFXLFNBQVg7QUFDcEJILEVBQUFBLE1BQU0sR0FBR0EsTUFBTSxDQUFDSSxNQUFQLENBQWMsVUFBQ0MsS0FBRDtBQUFBLFdBQVdILEtBQUssQ0FBQ0ksUUFBTixDQUFlRCxLQUFLLENBQUNFLElBQXJCLENBQVg7QUFBQSxHQUFkLENBQVQ7O0FBQ0EsTUFBSWQsV0FBSixFQUFpQjtBQUNmTyxJQUFBQSxNQUFNLEdBQUdBLE1BQU0sQ0FBQ1EsR0FBUCxDQUFXLFVBQUNILEtBQUQsRUFBUUksS0FBUixFQUFrQjtBQUNwQyxVQUFNQyxZQUFZLEdBQUcsdUNBQW9CVixNQUFwQixFQUE0QkssS0FBSyxDQUFDQSxLQUFsQyxDQUFyQjs7QUFDQSxVQUFNTSxXQUFXLEdBQUcsc0NBQW1CWCxNQUFuQixFQUEyQlMsS0FBM0IsRUFBa0NKLEtBQUssQ0FBQ0EsS0FBeEMsQ0FBcEI7O0FBQ0EsK0JBQVdBLEtBQVg7QUFBa0JPLFFBQUFBLFVBQVUsRUFBRUQsV0FBOUI7QUFBMkNsQixRQUFBQSxXQUFXLEVBQUVpQjtBQUF4RDtBQUNELEtBSlEsQ0FBVDtBQUtEOztBQUNELE1BQUlsQixPQUFKLEVBQWE7QUFDWFEsSUFBQUEsTUFBTSxHQUFHQSxNQUFNLENBQUNRLEdBQVAsQ0FBVyxVQUFDSCxLQUFELEVBQVc7QUFDN0IsYUFBT0EsS0FBSyxDQUFDUSxPQUFiO0FBQ0EsYUFBT1IsS0FBUDtBQUNELEtBSFEsQ0FBVDtBQUlELEdBTEQsTUFLTztBQUNMTCxJQUFBQSxNQUFNLEdBQUdBLE1BQU0sQ0FBQ1EsR0FBUCxDQUFXLFVBQUNILEtBQUQ7QUFBQSxhQUFXQSxLQUFLLENBQUNBLEtBQWpCO0FBQUEsS0FBWCxDQUFUO0FBQ0Q7O0FBQ0QsU0FBT0wsTUFBUDtBQUNELENBbkRNO0FBcURQOzs7Ozs7Ozs7OztBQU9PLElBQU1DLGNBQWMsR0FBRyxTQUFqQkEsY0FBaUIsQ0FBQ0wsTUFBRCxFQUFTWixPQUFULEVBQWtCOEIsTUFBbEIsRUFBNkI7QUFDekRsQixFQUFBQSxNQUFNLEdBQUksQ0FBQ0EsTUFBRixHQUFZLEVBQVosR0FBaUJBLE1BQTFCLENBRHlELENBQ3ZCOztBQUNsQyxNQUFJLE9BQU9BLE1BQVAsS0FBa0IsUUFBdEIsRUFBZ0M7QUFDOUIsVUFBTSxJQUFJbUIsS0FBSixzREFBd0RuQixNQUF4RCxFQUFOO0FBQ0Q7O0FBQ0QsTUFBSW9CLENBQUo7QUFDQSxNQUFJQyxDQUFKO0FBQ0EsTUFBSUMsQ0FBSjtBQUNBLE1BQUlsQixNQUFNLEdBQUcsRUFBYjs7QUFDQSxTQUFPSixNQUFQLEVBQWU7QUFDYnNCLElBQUFBLENBQUMsR0FBRyxJQUFKO0FBQ0FGLElBQUFBLENBQUMsR0FBR3BCLE1BQU0sQ0FBQ3VCLE1BQVg7QUFDQSxRQUFJQyxHQUFHLFNBQVA7O0FBQ0EsU0FBS0EsR0FBTCxJQUFZcEMsT0FBWixFQUFxQjtBQUNuQixVQUFJcUMsTUFBTSxDQUFDQyxTQUFQLENBQWlCQyxjQUFqQixDQUFnQ0MsSUFBaEMsQ0FBcUN4QyxPQUFyQyxFQUE4Q29DLEdBQTlDLENBQUosRUFBd0Q7QUFDdERILFFBQUFBLENBQUMsR0FBR2pDLE9BQU8sQ0FBQ29DLEdBQUQsQ0FBUCxDQUFhSyxJQUFiLENBQWtCN0IsTUFBbEIsQ0FBSixDQURzRCxDQUV0RDtBQUNBOztBQUNBLFlBQUlxQixDQUFDLElBQUtBLENBQUMsQ0FBQ1IsS0FBRixHQUFVTyxDQUFwQixFQUF3QjtBQUN0QixjQUFJWCxLQUFLLEdBQUdZLENBQUMsQ0FBQyxDQUFELENBQWI7QUFDQUMsVUFBQUEsQ0FBQyxHQUFHO0FBQ0ZiLFlBQUFBLEtBQUssRUFBTEEsS0FERTtBQUVGRSxZQUFBQSxJQUFJLEVBQUVhLEdBRko7QUFHRlAsWUFBQUEsT0FBTyxFQUFFSSxDQUFDLENBQUNwQixLQUFGLENBQVEsQ0FBUjtBQUhQLFdBQUo7QUFLQW1CLFVBQUFBLENBQUMsR0FBR0MsQ0FBQyxDQUFDUixLQUFOO0FBQ0Q7QUFDRjtBQUNGOztBQUNELFFBQUlPLENBQUosRUFBTztBQUNMO0FBQ0E7QUFDQWhCLE1BQUFBLE1BQU0sQ0FBQ0csSUFBUCxDQUFZO0FBQ1ZFLFFBQUFBLEtBQUssRUFBRVQsTUFBTSxDQUFDOEIsTUFBUCxDQUFjLENBQWQsRUFBaUJWLENBQWpCLENBREc7QUFFVlQsUUFBQUEsSUFBSSxFQUFFTyxNQUFNLElBQUk7QUFGTixPQUFaO0FBSUQ7O0FBQ0QsUUFBSUksQ0FBSixFQUFPO0FBQ0w7QUFDQWxCLE1BQUFBLE1BQU0sQ0FBQ0csSUFBUCxDQUFZZSxDQUFaO0FBQ0Q7O0FBQ0R0QixJQUFBQSxNQUFNLEdBQUdBLE1BQU0sQ0FBQzhCLE1BQVAsQ0FBY1YsQ0FBQyxJQUFJRSxDQUFDLEdBQUdBLENBQUMsQ0FBQ2IsS0FBRixDQUFRYyxNQUFYLEdBQW9CLENBQXpCLENBQWYsQ0FBVDtBQUNEOztBQUNELFNBQU9uQixNQUFQO0FBQ0QsQ0E1Q00iLCJzb3VyY2VzQ29udGVudCI6WyJpbXBvcnQgeFJlZ0V4cCBmcm9tICd4cmVnZXhwJztcbmltcG9ydCB7b2NjdXJyZW5jZUluVG9rZW5zLCBvY2N1cnJlbmNlc0luVG9rZW5zfSBmcm9tICcuL29jY3VycmVuY2VzJztcbmltcG9ydCB7bm9ybWFsaXplciwgbm9ybWFsaXplckRlc3RydWN0aXZlfSBmcm9tICcuL25vcm1hbGl6ZXJzJztcblxuLy8gY29uc3RhbnRzXG5leHBvcnQgY29uc3QgX3dvcmQgPSAnW1xcXFxwTFxcXFxwTVxcXFx1MjAwRFxcXFx1MjA2MF0rJztcbi8vIFRSSUNLWTogb3JpZ2luYWwgbGFuZ3VhZ2VzIGRvIG5vdCB1c2Ugc2luZ2xlIHF1b3RlcyBzbyB1MjAxOSBpcyBjb25zaWRlcmVkIHBhcnQgb2YgYSB3b3JkXG5leHBvcnQgY29uc3QgX29yaWdXb3JkID0gJ1tcXFxccExcXFxccE1cXFxcdTIwMERcXFxcdTIwNjBcXFxcdTIwMTldKyc7XG5leHBvcnQgY29uc3QgX251bWJlciA9ICdbXFxcXHBOXFxcXHBOZFxcXFxwTmxcXFxccE5vXSsnO1xuZXhwb3J0IGNvbnN0IF93b3JkT3JOdW1iZXIgPSAnKCcgKyBfd29yZCArICd8JyArIF9udW1iZXIgKyAnKSc7XG5leHBvcnQgY29uc3QgX29yaWdXb3JkT3JOdW1iZXIgPSAnKCcgKyBfb3JpZ1dvcmQgKyAnfCcgKyBfbnVtYmVyICsgJyknO1xuZXhwb3J0IGNvbnN0IF9ncmVlZHlXb3JkID0gJygnICsgX3dvcmRPck51bWJlciArICcoWy1cXCfigJldJyArIF93b3JkICsgJykrfCcgKyBfd29yZCArICfigJk/KSc7XG5leHBvcnQgY29uc3QgX29yaWdHcmVlZHlXb3JkID0gJygnICsgX29yaWdXb3JkT3JOdW1iZXIgKyAnKFstXFwn4oCZXScgKyBfb3JpZ1dvcmQgKyAnKSt8JyArIF9vcmlnV29yZCArICfigJk/KSc7XG5leHBvcnQgY29uc3QgX2dyZWVkeU51bWJlciA9ICcoJyArIF9udW1iZXIgKyAnKFs6LixdPycgKyBfbnVtYmVyICsgJykrfCcgKyBfbnVtYmVyICsgJyknO1xuZXhwb3J0IGNvbnN0IHdvcmQgPSB4UmVnRXhwKF93b3JkLCAnJyk7XG5leHBvcnQgY29uc3Qgb3JpZ1dvcmQgPSB4UmVnRXhwKF9vcmlnV29yZCwgJycpO1xuZXhwb3J0IGNvbnN0IGdyZWVkeVdvcmQgPSB4UmVnRXhwKF9ncmVlZHlXb3JkLCAnJyk7XG5leHBvcnQgY29uc3Qgb3JpZ0dyZWVkeVdvcmQgPSB4UmVnRXhwKF9vcmlnR3JlZWR5V29yZCwgJycpO1xuZXhwb3J0IGNvbnN0IHB1bmN0dWF0aW9uID0geFJlZ0V4cCgnKF5cXFxccHtQfXxbPD5dezJ9KScsICcnKTtcbmV4cG9ydCBjb25zdCB3aGl0ZXNwYWNlID0gL1xccysvO1xuZXhwb3J0IGNvbnN0IG51bWJlciA9IHhSZWdFeHAoX251bWJlcik7XG5leHBvcnQgY29uc3QgZ3JlZWR5TnVtYmVyID0geFJlZ0V4cChfZ3JlZWR5TnVtYmVyKTsgLy8gIC8oXFxkKyhbOi4sXT9cXGQpK3xcXGQrKS87XG5leHBvcnQgY29uc3QgbnVtYmVyXyA9IHhSZWdFeHAobnVtYmVyKTtcblxuXG5leHBvcnQgY29uc3QgdG9rZW5pemVPcmlnTGFuZyA9IChwYXJhbXMpID0+IHRva2VuaXplKHtcbiAgcGFyc2Vyczoge1xuICAgIHdvcmQ6IG9yaWdXb3JkLFxuICAgIGdyZWVkeVdvcmQ6IG9yaWdHcmVlZHlXb3JkLFxuICAgIHdoaXRlc3BhY2UsXG4gICAgcHVuY3R1YXRpb24sXG4gICAgbnVtYmVyLFxuICB9LFxuICAuLi5wYXJhbXMsXG59KTtcblxuLyoqXG4gKiBUb2tlbml6ZSBhIHN0cmluZyBpbnRvIGFuIGFycmF5IG9mIHdvcmRzXG4gKiBAcGFyYW0ge09iamVjdH0gcGFyYW1zIC0gc3RyaW5nIHRvIGJlIHRva2VuaXplZFxuICogQHJldHVybiB7QXJyYXl9IC0gYXJyYXkgb2YgdG9rZW5pemVkIHdvcmRzL3N0cmluZ3NcbiAqL1xuZXhwb3J0IGNvbnN0IHRva2VuaXplID0gKHtcbiAgdGV4dCA9ICcnLFxuICBpbmNsdWRlV29yZHMgPSB0cnVlLFxuICBpbmNsdWRlTnVtYmVycyA9IHRydWUsXG4gIGluY2x1ZGVQdW5jdHVhdGlvbiA9IGZhbHNlLFxuICBpbmNsdWRlV2hpdGVzcGFjZSA9IGZhbHNlLFxuICBpbmNsdWRlVW5rbm93biA9IGZhbHNlLFxuICBncmVlZHkgPSBmYWxzZSxcbiAgdmVyYm9zZSA9IGZhbHNlLFxuICBvY2N1cnJlbmNlcyA9IGZhbHNlLFxuICBwYXJzZXJzID0ge3dvcmQsIGdyZWVkeVdvcmQsIHdoaXRlc3BhY2UsIHB1bmN0dWF0aW9uLCBudW1iZXJ9LFxuICBub3JtYWxpemUgPSBmYWxzZSxcbiAgbm9ybWFsaXphdGlvbnMgPSBudWxsLFxufSkgPT4ge1xuICBsZXQgc3RyaW5nID0gdGV4dC5zbGljZSgwKTtcbiAgaWYgKG5vcm1hbGl6ZSkgc3RyaW5nID0gbm9ybWFsaXplcihzdHJpbmcpO1xuICBpZiAobm9ybWFsaXplICYmIG5vcm1hbGl6YXRpb25zKSB7XG4gICAgc3RyaW5nID0gbm9ybWFsaXplckRlc3RydWN0aXZlKHN0cmluZywgbm9ybWFsaXphdGlvbnMpO1xuICB9XG5cbiAgY29uc3QgZ3JlZWR5UGFyc2VycyA9IHtcbiAgICAuLi5wYXJzZXJzLFxuICAgIHdvcmQ6IHBhcnNlcnMuZ3JlZWR5V29yZCxcbiAgICBudW1iZXI6IGdyZWVkeU51bWJlcixcbiAgfTtcbiAgY29uc3QgX3BhcnNlcnMgPSBncmVlZHkgPyBncmVlZHlQYXJzZXJzIDogcGFyc2VycztcbiAgZGVsZXRlIF9wYXJzZXJzLmdyZWVkeVdvcmQ7XG4gIGxldCB0b2tlbnMgPSBjbGFzc2lmeVRva2VucyhzdHJpbmcsIF9wYXJzZXJzLCAndW5rbm93bicpO1xuICBjb25zdCB0eXBlcyA9IFtdO1xuICBpZiAoaW5jbHVkZVdvcmRzKSB0eXBlcy5wdXNoKCd3b3JkJyk7XG4gIGlmIChpbmNsdWRlTnVtYmVycykgdHlwZXMucHVzaCgnbnVtYmVyJyk7XG4gIGlmIChpbmNsdWRlV2hpdGVzcGFjZSkgdHlwZXMucHVzaCgnd2hpdGVzcGFjZScpO1xuICBpZiAoaW5jbHVkZVB1bmN0dWF0aW9uKSB0eXBlcy5wdXNoKCdwdW5jdHVhdGlvbicpO1xuICBpZiAoaW5jbHVkZVVua25vd24pIHR5cGVzLnB1c2goJ3Vua25vd24nKTtcbiAgdG9rZW5zID0gdG9rZW5zLmZpbHRlcigodG9rZW4pID0+IHR5cGVzLmluY2x1ZGVzKHRva2VuLnR5cGUpKTtcbiAgaWYgKG9jY3VycmVuY2VzKSB7XG4gICAgdG9rZW5zID0gdG9rZW5zLm1hcCgodG9rZW4sIGluZGV4KSA9PiB7XG4gICAgICBjb25zdCBfb2NjdXJyZW5jZXMgPSBvY2N1cnJlbmNlc0luVG9rZW5zKHRva2VucywgdG9rZW4udG9rZW4pO1xuICAgICAgY29uc3QgX29jY3VycmVuY2UgPSBvY2N1cnJlbmNlSW5Ub2tlbnModG9rZW5zLCBpbmRleCwgdG9rZW4udG9rZW4pO1xuICAgICAgcmV0dXJuIHsuLi50b2tlbiwgb2NjdXJyZW5jZTogX29jY3VycmVuY2UsIG9jY3VycmVuY2VzOiBfb2NjdXJyZW5jZXN9O1xuICAgIH0pO1xuICB9XG4gIGlmICh2ZXJib3NlKSB7XG4gICAgdG9rZW5zID0gdG9rZW5zLm1hcCgodG9rZW4pID0+IHtcbiAgICAgIGRlbGV0ZSB0b2tlbi5tYXRjaGVzO1xuICAgICAgcmV0dXJuIHRva2VuO1xuICAgIH0pO1xuICB9IGVsc2Uge1xuICAgIHRva2VucyA9IHRva2Vucy5tYXAoKHRva2VuKSA9PiB0b2tlbi50b2tlbik7XG4gIH1cbiAgcmV0dXJuIHRva2Vucztcbn07XG5cbi8qKlxuICogVGlueSB0b2tlbml6ZXIgLSBodHRwczovL2dpc3QuZ2l0aHViLmNvbS9ib3JnYXIvNDUxMzkzXG4gKiBAcGFyYW0ge1N0cmluZ30gc3RyaW5nIC0gc3RyaW5nIHRvIGJlIHRva2VuaXplZFxuICogQHBhcmFtIHtPYmplY3R9IHBhcnNlcnMgLSB7IHdvcmQ6L1xcdysvLCB3aGl0ZXNwYWNlOi9cXHMrLywgcHVuY3R1YXRpb246L1teXFx3XFxzXS8gfVxuICogQHBhcmFtIHtTdHJpbmd9IGRlZnRvayAtIHR5cGUgdG8gbGFiZWwgdG9rZW5zIHRoYXQgYXJlIG5vdCBjbGFzc2lmaWVkIHdpdGggdGhlIGFib3ZlIHBhcnNlcnNcbiAqIEByZXR1cm4ge0FycmF5fSAtIGFycmF5IG9mIG9iamVjdHMgPT4gW3sgdG9rZW46XCJ0aGlzXCIsIHR5cGU6XCJ3b3JkXCIgfSx7IHRva2VuOlwiIFwiLCB0eXBlOlwid2hpdGVzcGFjZVwiIH0sIE9iamVjdCB7IHRva2VuOlwiaXNcIiwgdHlwZTpcIndvcmRcIiB9LCAuLi4gXVxuKiovXG5leHBvcnQgY29uc3QgY2xhc3NpZnlUb2tlbnMgPSAoc3RyaW5nLCBwYXJzZXJzLCBkZWZ0b2spID0+IHtcbiAgc3RyaW5nID0gKCFzdHJpbmcpID8gJycgOiBzdHJpbmc7IC8vIGlmIHN0cmluZyBpcyB1bmRlZmluZWQsIG1ha2UgaXQgYW4gZW1wdHkgc3RyaW5nXG4gIGlmICh0eXBlb2Ygc3RyaW5nICE9PSAnc3RyaW5nJykge1xuICAgIHRocm93IG5ldyBFcnJvcihgdG9rZW5pemVyLnRva2VuaXplKCkgc3RyaW5nIGlzIG5vdCBTdHJpbmc6ICR7c3RyaW5nfWApO1xuICB9XG4gIGxldCBtO1xuICBsZXQgcjtcbiAgbGV0IHQ7XG4gIGxldCB0b2tlbnMgPSBbXTtcbiAgd2hpbGUgKHN0cmluZykge1xuICAgIHQgPSBudWxsO1xuICAgIG0gPSBzdHJpbmcubGVuZ3RoO1xuICAgIGxldCBrZXk7XG4gICAgZm9yIChrZXkgaW4gcGFyc2Vycykge1xuICAgICAgaWYgKE9iamVjdC5wcm90b3R5cGUuaGFzT3duUHJvcGVydHkuY2FsbChwYXJzZXJzLCBrZXkpKSB7XG4gICAgICAgIHIgPSBwYXJzZXJzW2tleV0uZXhlYyhzdHJpbmcpO1xuICAgICAgICAvLyB0cnkgdG8gY2hvb3NlIHRoZSBiZXN0IG1hdGNoIGlmIHRoZXJlIGFyZSBzZXZlcmFsXG4gICAgICAgIC8vIHdoZXJlIFwiYmVzdFwiIGlzIHRoZSBjbG9zZXN0IHRvIHRoZSBjdXJyZW50IHN0YXJ0aW5nIHBvaW50XG4gICAgICAgIGlmIChyICYmIChyLmluZGV4IDwgbSkpIHtcbiAgICAgICAgICBsZXQgdG9rZW4gPSByWzBdO1xuICAgICAgICAgIHQgPSB7XG4gICAgICAgICAgICB0b2tlbixcbiAgICAgICAgICAgIHR5cGU6IGtleSxcbiAgICAgICAgICAgIG1hdGNoZXM6IHIuc2xpY2UoMSksXG4gICAgICAgICAgfTtcbiAgICAgICAgICBtID0gci5pbmRleDtcbiAgICAgICAgfVxuICAgICAgfVxuICAgIH1cbiAgICBpZiAobSkge1xuICAgICAgLy8gdGhlcmUgaXMgdGV4dCBiZXR3ZWVuIGxhc3QgdG9rZW4gYW5kIGN1cnJlbnRseVxuICAgICAgLy8gbWF0Y2hlZCB0b2tlbiAtIHB1c2ggdGhhdCBvdXQgYXMgZGVmYXVsdCBvciBcInVua25vd25cIlxuICAgICAgdG9rZW5zLnB1c2goe1xuICAgICAgICB0b2tlbjogc3RyaW5nLnN1YnN0cigwLCBtKSxcbiAgICAgICAgdHlwZTogZGVmdG9rIHx8ICd1bmtub3duJyxcbiAgICAgIH0pO1xuICAgIH1cbiAgICBpZiAodCkge1xuICAgICAgLy8gcHVzaCBjdXJyZW50IHRva2VuIG9udG8gc2VxdWVuY2VcbiAgICAgIHRva2Vucy5wdXNoKHQpO1xuICAgIH1cbiAgICBzdHJpbmcgPSBzdHJpbmcuc3Vic3RyKG0gKyAodCA/IHQudG9rZW4ubGVuZ3RoIDogMCkpO1xuICB9XG4gIHJldHVybiB0b2tlbnM7XG59O1xuXG4iXX0=