compromise
Version:
modest natural language processing
51 lines (45 loc) • 2.28 kB
JavaScript
export default [
[/^@1?[0-9](am|pm)$/i, 'Time', '3pm'],
[/^@1?[0-9]:[0-9]{2}(am|pm)?$/i, 'Time', '3:30pm'],
[/^'[0-9]{2}$/, 'Year'],
// times
[/^[012]?[0-9](:[0-5][0-9])(:[0-5][0-9])$/, 'Time', '3:12:31'],
[/^[012]?[0-9](:[0-5][0-9])?(:[0-5][0-9])? ?(am|pm)$/i, 'Time', '1:12pm'],
[/^[012]?[0-9](:[0-5][0-9])(:[0-5][0-9])? ?(am|pm)?$/i, 'Time', '1:12:31pm'], //can remove?
// iso-dates
[/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}/i, 'Date', 'iso-date'],
[/^[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}$/, 'Date', 'iso-dash'],
[/^[0-9]{1,4}\/[0-9]{1,2}\/([0-9]{4}|[0-9]{2})$/, 'Date', 'iso-slash'],
[/^[0-9]{1,4}\.[0-9]{1,2}\.[0-9]{1,4}$/, 'Date', 'iso-dot'],
[/^[0-9]{1,4}-[a-z]{2,9}-[0-9]{1,4}$/i, 'Date', '12-dec-2019'],
// timezones
[/^utc ?[+-]?[0-9]+$/, 'Timezone', 'utc-9'],
[/^(gmt|utc)[+-][0-9]{1,2}$/i, 'Timezone', 'gmt-3'],
//phone numbers
[/^[0-9]{3}-[0-9]{4}$/, 'PhoneNumber', '421-0029'],
[/^(\+?[0-9][ -])?[0-9]{3}[ -]?[0-9]{3}-[0-9]{4}$/, 'PhoneNumber', '1-800-'],
//money
//like $5.30
[/^[-+]?\p{Currency_Symbol}[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?([kmb]|bn)?\+?$/u, ['Money', 'Value'], '$5.30'],
//like 5.30$
[/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?\p{Currency_Symbol}\+?$/u, ['Money', 'Value'], '5.30£'],
//like
[/^[-+]?[$£]?[0-9]([0-9,.])+(usd|eur|jpy|gbp|cad|aud|chf|cny|hkd|nzd|kr|rub)$/i, ['Money', 'Value'], '$400usd'],
//numbers
// 50 | -50 | 3.23 | 5,999.0 | 10+
[/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?\+?$/, ['Cardinal', 'NumericValue'], '5,999'],
[/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?(st|nd|rd|r?th)$/, ['Ordinal', 'NumericValue'], '53rd'],
// .73th
[/^\.[0-9]+\+?$/, ['Cardinal', 'NumericValue'], '.73th'],
//percent
[/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?%\+?$/, ['Percent', 'Cardinal', 'NumericValue'], '-4%'],
[/^\.[0-9]+%$/, ['Percent', 'Cardinal', 'NumericValue'], '.3%'],
//fraction
[/^[0-9]{1,4}\/[0-9]{1,4}(st|nd|rd|th)?s?$/, ['Fraction', 'NumericValue'], '2/3rds'],
//range
[/^[0-9.]{1,3}[a-z]{0,2}[-–—][0-9]{1,3}[a-z]{0,2}$/, ['Value', 'NumberRange'], '3-4'],
//time-range
[/^[0-9]{1,2}(:[0-9][0-9])?(am|pm)? ?[-–—] ?[0-9]{1,2}(:[0-9][0-9])?(am|pm)$/, ['Time', 'NumberRange'], '3-4pm'],
//number with unit
[/^[0-9.]+([a-z°]{1,4})$/, 'NumericValue', '9km'],
]