@kermank/nldp
Version:
A modular date/time parser for converting natural language into dates and times
306 lines • 12.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.resolveComponents = exports.resolveGroup = exports.groupCompatibleComponents = exports.hasOverlap = exports.sortComponents = void 0;
/**
* Sort components by:
* 1. Span length (longer matches first)
* 2. Confidence (higher confidence first)
* 3. Position in text (earlier matches first)
*/
const sortComponents = (components) => {
const sorted = [...components].sort((a, b) => {
// First by span length
const aLength = a.span.end - a.span.start;
const bLength = b.span.end - b.span.start;
if (aLength !== bLength)
return bLength - aLength;
// Then by confidence
if (a.confidence !== b.confidence)
return b.confidence - a.confidence;
// Finally by position
return a.span.start - b.span.start;
});
return sorted;
};
exports.sortComponents = sortComponents;
/**
* Check if two components have overlapping spans in the original text
*/
const hasOverlap = (a, b) => {
const overlaps = !(a.span.end <= b.span.start || b.span.end <= a.span.start);
return overlaps;
};
exports.hasOverlap = hasOverlap;
/**
* Group components that can be combined together (non-overlapping spans and compatible types)
*/
const groupCompatibleComponents = (components) => {
const groups = [];
const sorted = (0, exports.sortComponents)(components);
for (const component of sorted) {
// Try to add to existing group
let added = false;
for (const group of groups) {
const canAddToGroup = group.every(existing => !(0, exports.hasOverlap)(existing, component));
if (canAddToGroup) {
group.push(component);
added = true;
break;
}
}
// Create new group if couldn't add to existing
if (!added) {
groups.push([component]);
}
}
return groups;
};
exports.groupCompatibleComponents = groupCompatibleComponents;
/**
* Combine a date component with a time component
*/
const combineDateAndTime = (date, time) => {
return date.set({
hour: time.hour,
minute: time.minute,
second: time.second,
millisecond: 0
});
};
/**
* Resolve a group of compatible components into a final parse result
*/
const resolveGroup = (components) => {
var _a, _b, _c, _d, _e;
// Sort components by type priority: date > time > range > modifier
const sortedByType = [...components].sort((a, b) => {
var _a, _b;
const priority = { date: 4, time: 3, range: 2, modifier: 1 };
// If one component is a time and the other is a range with timeOfDay type,
// prioritize the specific time
if (a.type === 'time' && b.type === 'range' && ((_a = b.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay') {
return -1;
}
if (b.type === 'time' && a.type === 'range' && ((_b = a.metadata) === null || _b === void 0 ? void 0 : _b.rangeType) === 'timeOfDay') {
return 1;
}
return priority[b.type] - priority[a.type];
});
let baseDate = null;
let baseTime = null;
let dateRange = null;
let timeRange = null;
// Process each component
for (const component of sortedByType) {
switch (component.type) {
case 'date':
baseDate = component.value;
break;
case 'time':
baseTime = component.value;
break;
case 'range':
// Skip time-of-day ranges if we have a specific time
if (((_a = component.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay' && baseTime) {
continue;
}
const range = component.value;
// If this is a date-based range (like next week), store it separately
if (((_b = component.metadata) === null || _b === void 0 ? void 0 : _b.rangeType) === 'relativeWeek' ||
((_c = component.metadata) === null || _c === void 0 ? void 0 : _c.rangeType) === 'ordinalWeek' ||
((_d = component.metadata) === null || _d === void 0 ? void 0 : _d.dateType) === 'relative') {
dateRange = range;
}
else if (((_e = component.metadata) === null || _e === void 0 ? void 0 : _e.rangeType) === 'timeOfDay') {
// If this is a time-of-day range, preserve the hours but use the base date
if (baseDate) {
const start = combineDateAndTime(baseDate, range.start);
const end = combineDateAndTime(baseDate, range.end);
timeRange = { start, end };
}
else {
timeRange = range;
}
}
else {
// Assume it's a time range
timeRange = range;
}
break;
}
}
// Get the combined span from all components
const span = {
start: Math.min(...components.map(c => c.span.start)),
end: Math.max(...components.map(c => c.span.end))
};
// Combine components into final result
if (dateRange && timeRange) {
// If we have both date range and time range, apply time to the date range's start
const start = dateRange.start.set({
hour: timeRange.start.hour,
minute: timeRange.start.minute,
second: 0,
millisecond: 0
});
const end = dateRange.end.set({
hour: timeRange.end.hour,
minute: timeRange.end.minute,
second: 0,
millisecond: 0
});
// Preserve original timezone if available
const startZone = timeRange.start.zoneName || 'UTC';
const endZone = timeRange.end.zoneName || 'UTC';
return {
type: 'range',
span,
confidence: Math.min(...components.map(c => c.confidence)),
value: {
start: start.setZone(startZone),
end: end.setZone(endZone)
},
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '),
dateType: 'relative',
rangeType: 'time'
}
};
}
else if (timeRange) {
// If we have a date, apply it to the range
if (baseDate) {
const start = combineDateAndTime(baseDate, timeRange.start);
const end = combineDateAndTime(baseDate, timeRange.end);
// Preserve original timezone if available
const startZone = timeRange.start.zoneName || 'UTC';
const endZone = timeRange.end.zoneName || 'UTC';
return {
type: 'range',
value: {
start: start.setZone(startZone),
end: end.setZone(endZone)
},
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
// Otherwise just return the range with its original timezone
const startZone = timeRange.start.zoneName || 'UTC';
const endZone = timeRange.end.zoneName || 'UTC';
return {
type: 'range',
value: {
start: timeRange.start.setZone(startZone),
end: timeRange.end.setZone(endZone)
},
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
else if (dateRange) {
// If we have just a date range (like ordinal weeks), return it as is
return {
type: 'range',
value: dateRange,
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
// If we have both date and time
if (baseDate && baseTime) {
const datetime = combineDateAndTime(baseDate, baseTime);
return {
type: 'date',
value: datetime,
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).filter(Boolean).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
// If we just have a date
if (baseDate) {
// Only consider range types that weren't skipped
const hasRangeType = components.some(c => {
var _a;
return c.type === 'range' &&
!(((_a = c.metadata) === null || _a === void 0 ? void 0 : _a.rangeType) === 'timeOfDay' && baseTime);
});
return {
type: hasRangeType ? 'range' : 'date',
value: hasRangeType ? { start: baseDate, end: baseDate.plus({ hours: 1 }) } : baseDate,
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
// If we just have a time
if (baseTime) {
return {
type: 'time',
value: baseTime,
span,
confidence: components.reduce((acc, c) => acc * c.confidence, 1),
metadata: {
originalText: components.map(c => { var _a; return (_a = c.metadata) === null || _a === void 0 ? void 0 : _a.originalText; }).filter(Boolean).join(' '),
...components.reduce((acc, c) => ({ ...acc, ...c.metadata }), {})
}
};
}
return null;
};
exports.resolveGroup = resolveGroup;
/**
* Calculate coverage score for a parse result based on how much of the input text it explains
*/
const calculateCoverage = (result, inputLength) => {
var _a;
const text = ((_a = result.metadata) === null || _a === void 0 ? void 0 : _a.originalText) || '';
const words = text.split(' ');
const coveredLength = words.reduce((acc, word) => acc + word.length, 0);
return coveredLength / inputLength;
};
/**
* Main resolution function that takes all parsed components and returns the best parse result
*/
const resolveComponents = (components, inputText) => {
// Group compatible components
const groups = (0, exports.groupCompatibleComponents)(components);
// console.log(JSON.stringify(groups, null, 2));
// Resolve each group
const results = groups
.map(group => (0, exports.resolveGroup)(group))
.filter((result) => result !== null);
// console.log(JSON.stringify(results, null, 2));
if (results.length === 0)
return null;
// Select best result based on:
// 1. Coverage of input text
// 2. Confidence score
// 3. Number of components combined
return results.reduce((best, current) => {
const bestScore = calculateCoverage(best, inputText.length) * best.confidence;
const currentScore = calculateCoverage(current, inputText.length) * current.confidence;
return currentScore > bestScore ? current : best;
}, results[0]);
};
exports.resolveComponents = resolveComponents;
//# sourceMappingURL=resolution-engine.js.map