UNPKG

@kermank/nldp

Version:

A modular date/time parser for converting natural language into dates and times

55 lines 1.47 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.tokenize = tokenize; const DEFAULT_OPTIONS = { preserveQuotes: false, normalizeSpaces: true, lowercaseTokens: true, }; /** * Tokenizes an input string into an array of tokens */ function tokenize(input, options = DEFAULT_OPTIONS) { if (!input) return []; let processed = input; // Normalize spaces if requested if (options.normalizeSpaces) { processed = processed.replace(/\s+/g, ' ').trim(); } // Convert to lowercase if requested if (options.lowercaseTokens) { processed = processed.toLowerCase(); } // Handle quoted strings const tokens = []; let currentToken = ''; let inQuotes = false; for (let i = 0; i < processed.length; i++) { const char = processed[i]; if (char === '"' || char === "'") { if (options.preserveQuotes) { currentToken += char; } inQuotes = !inQuotes; continue; } if (inQuotes) { currentToken += char; } else if (char === ' ') { if (currentToken) { tokens.push(currentToken); currentToken = ''; } } else { currentToken += char; } } if (currentToken) { tokens.push(currentToken); } return tokens; } //# sourceMappingURL=tokenizer.js.map