UNPKG

@kermank/nldp

Version:

A modular date/time parser for converting natural language into dates and times

306 lines 12.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.resolveComponents = exports.resolveGroup = exports.groupCompatibleComponents = exports.hasOverlap = exports.sortComponents = void 0; /** * Sort components by: * 1. Span length (longer matches first) * 2. Confidence (higher confidence first) * 3. Position in text (earlier matches first) */ const sortComponents = (components) => { const sorted = [...components].sort((a, b) => { // First by span length const aLength = a.span.end - a.span.start; const bLength = b.span.end - b.span.start; if (aLength !== bLength) return bLength - aLength; // Then by confidence if (a.confidence !== b.confidence) return b.confidence - a.confidence; // Finally by position return a.span.start - b.span.start; }); return sorted; }; exports.sortComponents = sortComponents; /** * Check if two components have overlapping spans in the original text */ const hasOverlap = (a, b) => { const overlaps = !(a.span.end <= b.span.start || b.span.end <= a.span.start); return overlaps; }; exports.hasOverlap = hasOverlap; /** * Group components that can be combined together (non-overlapping spans and compatible types) */ const groupCompatibleComponents = (components) => { const groups = []; const sorted = (0, exports.sortComponents)(components); for (const component of sorted) { // Try to add to existing group let added = false; for (const group of groups) { const canAddToGroup = group.every(existing => !(0, exports.hasOverlap)(existing, component)); if (canAddToGroup) { group.push(component); added = true; break; } } // Create new group if couldn't add to existing if (!added) { groups.push([component]); } } return groups; }; exports.groupCompatibleComponents = groupCompatibleComponents; /** * Combine a date component with a time component */ const combineDateAndTime = (date, time) => { return date.set({ hour: time.hour, minute: time.minute, second: time.second, millisecond: 0 }); }; /** * Resolve a group of compatible components into a final parse result */ const resolveGroup = (components) => { var _a, _b, _c, _d, _e; // Sort components by type priority: date > time > range > modifier const sortedByType = [...components].sort((a, b) => { var _a, _b; const priority = { date: 4, time: 3, range: 2, modifier: 1 }; // If one component is a time and the other is a range with timeOfDay type, // prioritize the specific time if (a.type === 'time' && b.type === 'range' && ((_a = b.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay') { return -1; } if (b.type === 'time' && a.type === 'range' && ((_b = a.metadata) === null || _b === void 0 ? void 0 : _b.rangeType) === 'timeOfDay') { return 1; } return priority[b.type] - priority[a.type]; }); let baseDate = null; let baseTime = null; let dateRange = null; let timeRange = null; // Process each component for (const component of sortedByType) { switch (component.type) { case 'date': baseDate = component.value; break; case 'time': baseTime = component.value; break; case 'range': // Skip time-of-day ranges if we have a specific time if (((_a = component.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay' && baseTime) { continue; } const range = component.value; // If this is a date-based range (like next week), store it separately if (((_b = component.metadata) === null || _b === void 0 ? void 0 : _b.rangeType) === 'relativeWeek' || ((_c = component.metadata) === null || _c === void 0 ? void 0 : _c.rangeType) === 'ordinalWeek' || ((_d = component.metadata) === null || _d === void 0 ? void 0 : _d.dateType) === 'relative') { dateRange = range; } else if (((_e = component.metadata) === null || _e === void 0 ? void 0 : _e.rangeType) === 'timeOfDay') { // If this is a time-of-day range, preserve the hours but use the base date if (baseDate) { const start = combineDateAndTime(baseDate, range.start); const end = combineDateAndTime(baseDate, range.end); timeRange = { start, end }; } else { timeRange = range; } } else { // Assume it's a time range timeRange = range; } break; } } // Get the combined span from all components const span = { start: Math.min(...components.map(c => c.span.start)), end: Math.max(...components.map(c => c.span.end)) }; // Combine components into final result if (dateRange && timeRange) { // If we have both date range and time range, apply time to the date range's start const start = dateRange.start.set({ hour: timeRange.start.hour, minute: timeRange.start.minute, second: 0, millisecond: 0 }); const end = dateRange.end.set({ hour: timeRange.end.hour, minute: timeRange.end.minute, second: 0, millisecond: 0 }); // Preserve original timezone if available const startZone = timeRange.start.zoneName || 'UTC'; const endZone = timeRange.end.zoneName || 'UTC'; return { type: 'range', span, confidence: Math.min(...components.map(c => c.confidence)), value: { start: start.setZone(startZone), end: end.setZone(endZone) }, metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '), dateType: 'relative', rangeType: 'time' } }; } else if (timeRange) { // If we have a date, apply it to the range if (baseDate) { const start = combineDateAndTime(baseDate, timeRange.start); const end = combineDateAndTime(baseDate, timeRange.end); // Preserve original timezone if available const startZone = timeRange.start.zoneName || 'UTC'; const endZone = timeRange.end.zoneName || 'UTC'; return { type: 'range', value: { start: start.setZone(startZone), end: end.setZone(endZone) }, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } // Otherwise just return the range with its original timezone const startZone = timeRange.start.zoneName || 'UTC'; const endZone = timeRange.end.zoneName || 'UTC'; return { type: 'range', value: { start: timeRange.start.setZone(startZone), end: timeRange.end.setZone(endZone) }, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } else if (dateRange) { // If we have just a date range (like ordinal weeks), return it as is return { type: 'range', value: dateRange, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } // If we have both date and time if (baseDate && baseTime) { const datetime = combineDateAndTime(baseDate, baseTime); return { type: 'date', value: datetime, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).filter(Boolean).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } // If we just have a date if (baseDate) { // Only consider range types that weren't skipped const hasRangeType = components.some(c => { var _a; return c.type === 'range' && !(((_a = c.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay' && baseTime); }); return { type: hasRangeType ? 'range' : 'date', value: hasRangeType ? { start: baseDate, end: baseDate.plus({ hours: 1 }) } : baseDate, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } // If we just have a time if (baseTime) { return { type: 'time', value: baseTime, span, confidence: components.reduce((acc, c) => acc * c.confidence, 1), metadata: { originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).filter(Boolean).join(' '), ...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {}) } }; } return null; }; exports.resolveGroup = resolveGroup; /** * Calculate coverage score for a parse result based on how much of the input text it explains */ const calculateCoverage = (result, inputLength) => { var _a; const text = ((_a = result.metadata) === null || _a === void 0 ? void 0 : _a.originalText) || ''; const words = text.split(' '); const coveredLength = words.reduce((acc, word) => acc + word.length, 0); return coveredLength / inputLength; }; /** * Main resolution function that takes all parsed components and returns the best parse result */ const resolveComponents = (components, inputText) => { // Group compatible components const groups = (0, exports.groupCompatibleComponents)(components); // console.log(JSON.stringify(groups, null, 2)); // Resolve each group const results = groups .map(group => (0, exports.resolveGroup)(group)) .filter((result) => result !== null); // console.log(JSON.stringify(results, null, 2)); if (results.length === 0) return null; // Select best result based on: // 1. Coverage of input text // 2. Confidence score // 3. Number of components combined return results.reduce((best, current) => { const bestScore = calculateCoverage(best, inputText.length) * best.confidence; const currentScore = calculateCoverage(current, inputText.length) * current.confidence; return currentScore > bestScore ? current : best; }, results[0]); }; exports.resolveComponents = resolveComponents; //# sourceMappingURL=resolution-engine.js.map