@kermank/nldp
Version:
A modular date/time parser for converting natural language into dates and times
55 lines • 1.47 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.tokenize = tokenize;
const DEFAULT_OPTIONS = {
preserveQuotes: false,
normalizeSpaces: true,
lowercaseTokens: true,
};
/**
* Tokenizes an input string into an array of tokens
*/
function tokenize(input, options = DEFAULT_OPTIONS) {
if (!input)
return [];
let processed = input;
// Normalize spaces if requested
if (options.normalizeSpaces) {
processed = processed.replace(/\s+/g, ' ').trim();
}
// Convert to lowercase if requested
if (options.lowercaseTokens) {
processed = processed.toLowerCase();
}
// Handle quoted strings
const tokens = [];
let currentToken = '';
let inQuotes = false;
for (let i = 0; i < processed.length; i++) {
const char = processed[i];
if (char === '"' || char === "'") {
if (options.preserveQuotes) {
currentToken += char;
}
inQuotes = !inQuotes;
continue;
}
if (inQuotes) {
currentToken += char;
}
else if (char === ' ') {
if (currentToken) {
tokens.push(currentToken);
currentToken = '';
}
}
else {
currentToken += char;
}
}
if (currentToken) {
tokens.push(currentToken);
}
return tokens;
}
//# sourceMappingURL=tokenizer.js.map