UNPKG

interweave-autolink

Version:

URL, IP, email, and hashtag autolinking support for Interweave.

473 lines (446 loc) 9.84 kB
/* eslint-disable unicorn/better-regex, unicorn/no-unsafe-regex */ export interface CombinePatternsOptions { capture?: boolean; flags?: string; join?: string; match?: string; nonCapture?: boolean; } export function combinePatterns(patterns: RegExp[], options: CombinePatternsOptions = {}) { let regex = patterns.map((pattern) => pattern.source).join(options.join ?? ''); if (options.capture) { regex = `(${regex})`; } else if (options.nonCapture) { regex = `(?:${regex})`; } if (options.match) { regex += options.match; } return new RegExp(regex, options.flags ?? ''); } // https://www.ietf.org/rfc/rfc3986.txt // https://blog.codinghorror.com/the-problem-with-urls/ // http://www.regular-expressions.info/email.html export const VALID_ALNUM_CHARS = /[a-z0-9]/; export const VALID_PATH_CHARS = /(?:[a-zA-Z\u0400-\u04FF0-9\-_~!$&'()[\]\\/*+,;=.%]*)/; export const URL_SCHEME = /(https?:\/\/)?/; export const URL_AUTH = combinePatterns( [ /[a-z\u0400-\u04FF0-9\-_~!$&'()*+,;=.:]+/, // Includes colon /@/, ], { capture: true, match: '?', }, ); export const URL_HOST = combinePatterns( [ /(?:(?:[a-z0-9](?:[-a-z0-9_]*[a-z0-9])?)\.)*/, // Subdomain /(?:(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?)\.)/, // Domain /(?:[a-z](?:[-a-z0-9]*[a-z0-9])?)/, // TLD ], { capture: true, }, ); export const URL_PORT = /(?::(\d{1,5}))?/; export const URL_PATH = combinePatterns( [ /\//, combinePatterns( [ /[-+a-z0-9!*';:=,.$/%[\]_~@|&]*/, /[-+a-z0-9/]/, // Valid ending chars ], { match: '*', nonCapture: true, }, ), ], { capture: true, match: '?', }, ); export const URL_QUERY = combinePatterns( [ /\?/, combinePatterns( [ VALID_PATH_CHARS, /[a-z0-9_&=]/, // Valid ending chars ], { match: '?', nonCapture: true, }, ), ], { capture: true, match: '?', }, ); export const URL_FRAGMENT = combinePatterns( [ /#/, combinePatterns( [ VALID_PATH_CHARS, /[a-z0-9]/, // Valid ending chars ], { match: '?', nonCapture: true, }, ), ], { capture: true, match: '?', }, ); export const URL_PATTERN = combinePatterns( [URL_SCHEME, URL_AUTH, URL_HOST, URL_PORT, URL_PATH, URL_QUERY, URL_FRAGMENT], { flags: 'i', }, ); export const IP_V4_PART = /(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/; export const IP_V4 = combinePatterns([IP_V4_PART, IP_V4_PART, IP_V4_PART, IP_V4_PART], { capture: true, join: '\\.', }); export const IP_PATTERN = combinePatterns( [URL_SCHEME, URL_AUTH, IP_V4, URL_PORT, URL_PATH, URL_QUERY, URL_FRAGMENT], { flags: 'i', }, ); const NON_LATIN = [ // Chinese /[\d_\u4E00-\u9FFF-]+/, // Japanese /[\d_\u3000-\u30FF-]+/, // Korean /[\d_\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7FF-]+/, // Thai /[\d_\u0E00-\u0E7F-]+/, // Russian, Ukrainian // eslint-disable-next-line no-misleading-character-class /[\d_a-z\u0400-\u052F\u1C80-\u1C8F\u2DE0-\u2DFF\uA640-\uA69F-]+/, // Latin based /[\d_a-z\u0080-\u00FF\u0100-\u017F\u0180-\u024F-]+/, ]; export const HASHTAG_PATTERN = combinePatterns( [ /#/, combinePatterns(NON_LATIN, { capture: true, join: '|', }), ], { flags: 'i', }, ); export const MENTION_PATTERN = /@([\dA-z-_]+)/; export const EMAIL_USERNAME_PART = /[.a-z0-9!#$%&?*+=_{|}~-]*/; export const EMAIL_USERNAME = combinePatterns( [VALID_ALNUM_CHARS, EMAIL_USERNAME_PART, VALID_ALNUM_CHARS], { capture: true, }, ); export const EMAIL_PATTERN = combinePatterns([EMAIL_USERNAME, URL_HOST], { flags: 'i', join: '@', }); export const EMAIL_DISTINCT_PATTERN = new RegExp(`^${EMAIL_PATTERN.source}$`, EMAIL_PATTERN.flags); // Properly and efficiently detecting URLs + all TLDs is nigh impossible, // instead we will only support the most common top-level TLDs. // https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains export const TOP_LEVEL_TLDS = [ // Original 'com', 'org', 'net', 'int', 'edu', 'gov', 'mil', // Sponsored 'aero', 'asia', 'biz', 'cat', 'coop', 'jobs', 'mobi', 'museum', 'post', 'tel', 'travel', 'xxx', // Misc 'app', 'arpa', 'test', // Countries 'ac', // Ascension Island 'ad', // Andorra 'ae', // United Arab Emirates 'af', // Afghanistan 'ag', // Antigua and Barbuda 'ai', // Anguilla 'al', // Albania 'am', // Armenia 'an', // Netherlands Antilles 'ao', // Angola 'aq', // Antarctica 'ar', // Argentina 'as', // American Samoa 'at', // Austria 'au', // Australia 'aw', // Aruba 'ax', // Aland Islands 'az', // Azerbaijan 'ba', // Bosnia and Herzegovina 'bb', // Barbados 'bd', // Bangladesh 'be', // Belgium 'bf', // Burkina Faso 'bg', // Bulgaria 'bh', // Bahrain 'bi', // Burundi 'bj', // Benin 'bl', // Saint Barthelemy 'bm', // Bermuda 'bn', // Brunei Darussalam 'bo', // Bolivia 'bq', // Bonaire, Sint Eustatius and Saba 'br', // Brazil 'bs', // Bahamas 'bt', // Bhutan 'bv', // Bouvet Island 'bw', // Botswana 'by', // Belarus 'bz', // Belize 'ca', // Canada 'cc', // Cocos (Keeling) Islands 'cd', // Congo, The Democratic Republic of the 'cf', // Central African Republic 'cg', // Congo 'ch', // Switzerland 'ci', // Cote d'Ivoire 'ck', // Cook Islands 'cl', // Chile 'cm', // Cameroon 'cn', // China 'co', // Colombia 'cr', // Costa Rica 'cu', // Cuba 'cv', // Cape Verde 'cw', // Curaçao 'cx', // Christmas Island 'cy', // Cyprus 'cz', // Czech Republic 'de', // Germany 'dj', // Djibouti 'dk', // Denmark 'dm', // Dominica 'do', // Dominican Republic 'dz', // Algeria 'ec', // Ecuador 'ee', // Estonia 'eg', // Egypt 'eh', // Western Sahara 'er', // Eritrea 'es', // Spain 'et', // Ethiopia 'eu', // European Union 'fi', // Finland 'fj', // Fiji 'fk', // Falkland Islands (Malvinas) 'fm', // Micronesia, Federated States of 'fo', // Faroe Islands 'fr', // France 'ga', // Gabon 'gb', // United Kingdom 'gd', // Grenada 'ge', // Georgia 'gf', // French Guiana 'gg', // Guernsey 'gh', // Ghana 'gi', // Gibraltar 'gl', // Greenland 'gm', // Gambia 'gn', // Guinea 'gp', // Guadeloupe 'gq', // Equatorial Guinea 'gr', // Greece 'gs', // South Georgia and the South Sandwich Islands 'gt', // Guatemala 'gu', // Guam 'gw', // Guinea-Bissau 'gy', // Guyana 'hk', // Hong Kong 'hm', // Heard Island and McDonald Islands 'hn', // Honduras 'hr', // Croatia 'ht', // Haiti 'hu', // Hungary 'id', // Indonesia 'ie', // Ireland 'il', // Israel 'im', // Isle of Man 'in', // India 'io', // British Indian Ocean Territory 'iq', // Iraq 'ir', // Iran, Islamic Republic of 'is', // Iceland 'it', // Italy 'je', // Jersey 'jm', // Jamaica 'jo', // Jordan 'jp', // Japan 'ke', // Kenya 'kg', // Kyrgyzstan 'kh', // Cambodia 'ki', // Kiribati 'km', // Comoros 'kn', // Saint Kitts and Nevis 'kp', // Korea, Democratic People's Republic of 'kr', // Korea, Republic of 'kw', // Kuwait 'ky', // Cayman Islands 'kz', // Kazakhstan 'la', // Lao People's Democratic Republic 'lb', // Lebanon 'lc', // Saint Lucia 'li', // Liechtenstein 'lk', // Sri Lanka 'lr', // Liberia 'ls', // Lesotho 'lt', // Lithuania 'lu', // Luxembourg 'lv', // Latvia 'ly', // Libyan Arab Jamahiriya 'ma', // Morocco 'mc', // Monaco 'md', // Moldova, Republic of 'me', // Montenegro 'mf', // Saint Martin (French part) 'mg', // Madagascar 'mh', // Marshall Islands 'mk', // Macedonia, The Former Yugoslav Republic of 'ml', // Mali 'mm', // Myanmar 'mn', // Mongolia 'mo', // Macao 'mp', // Northern Mariana Islands 'mq', // Martinique 'mr', // Mauritania 'ms', // Montserrat 'mt', // Malta 'mu', // Mauritius 'mv', // Maldives 'mw', // Malawi 'mx', // Mexico 'my', // Malaysia 'mz', // Mozambique 'na', // Namibia 'nc', // New Caledonia 'ne', // Niger 'nf', // Norfolk Island 'ng', // Nigeria 'ni', // Nicaragua 'nl', // Netherlands 'no', // Norway 'np', // Nepal 'nr', // Nauru 'nu', // Niue 'nz', // New Zealand 'om', // Oman 'pa', // Panama 'pe', // Peru 'pf', // French Polynesia 'pg', // Papua New Guinea 'ph', // Philippines 'pk', // Pakistan 'pl', // Poland 'pm', // Saint Pierre and Miquelon 'pn', // Pitcairn 'pr', // Puerto Rico 'ps', // Palestinian Territory, Occupied 'pt', // Portugal 'pw', // Palau 'py', // Paraguay 'qa', // Qatar 're', // Reunion 'ro', // Romania 'rs', // Serbia 'ru', // Russian Federation 'rw', // Rwanda 'sa', // Saudi Arabia 'sb', // Solomon Islands 'sc', // Seychelles 'sd', // Sudan 'se', // Sweden 'sg', // Singapore 'sh', // Saint Helena 'si', // Slovenia 'sj', // Svalbard and Jan Mayen 'sk', // Slovakia 'sl', // Sierra Leone 'sm', // San Marino 'sn', // Senegal 'so', // Somalia 'sr', // Suriname 'st', // Sao Tome and Principe 'su', // Soviet Union (being phased out) 'sv', // El Salvador 'sx', // Sint Maarten (Dutch part) 'sy', // Syrian Arab Republic 'sz', // Swaziland 'tc', // Turks and Caicos Islands 'td', // Chad 'tf', // French Southern Territories 'tg', // Togo 'th', // Thailand 'tj', // Tajikistan 'tk', // Tokelau 'tl', // Timor-Leste 'tm', // Turkmenistan 'tn', // Tunisia 'to', // Tonga 'tp', // Portuguese Timor (being phased out) 'tr', // Turkey 'tt', // Trinidad and Tobago 'tv', // Tuvalu 'tw', // Taiwan, Province of China 'tz', // Tanzania, United Republic of 'ua', // Ukraine 'ug', // Uganda 'uk', // United Kingdom 'um', // United States Minor Outlying Islands 'us', // United States 'uy', // Uruguay 'uz', // Uzbekistan 'va', // Holy See (Vatican City State) 'vc', // Saint Vincent and the Grenadines 've', // Venezuela, Bolivarian Republic of 'vg', // Virgin Islands, British 'vi', // Virgin Islands, U.S. 'vn', // Viet Nam 'vu', // Vanuatu 'wf', // Wallis and Futuna 'ws', // Samoa 'ye', // Yemen 'yt', // Mayotte 'za', // South Africa 'zm', // Zambia 'zw', // Zimbabwe ];