@wordpress/wordcount
Version:
WordPress word count utility.
71 lines (70 loc) • 1.99 kB
JavaScript
/**
* Internal dependencies
*/
/**
* Default settings for word counting operations.
*/
export const defaultSettings = {
HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
spaceRegExp: / | /gi,
HTMLEntityRegExp: /&\S+?;/g,
// \u2014 = em-dash.
connectorRegExp: /--|\u2014/g,
// Characters to be removed from input text.
removeRegExp: new RegExp(['[',
// Basic Latin (extract)
'\u0021-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E',
// Latin-1 Supplement (extract)
'\u0080-\u00BF\u00D7\u00F7',
/*
* The following range consists of:
* General Punctuation
* Superscripts and Subscripts
* Currency Symbols
* Combining Diacritical Marks for Symbols
* Letterlike Symbols
* Number Forms
* Arrows
* Mathematical Operators
* Miscellaneous Technical
* Control Pictures
* Optical Character Recognition
* Enclosed Alphanumerics
* Box Drawing
* Block Elements
* Geometric Shapes
* Miscellaneous Symbols
* Dingbats
* Miscellaneous Mathematical Symbols-A
* Supplemental Arrows-A
* Braille Patterns
* Supplemental Arrows-B
* Miscellaneous Mathematical Symbols-B
* Supplemental Mathematical Operators
* Miscellaneous Symbols and Arrows
*/
'\u2000-\u2BFF',
// Supplemental Punctuation.
'\u2E00-\u2E7F', ']'].join(''), 'g'),
// Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
wordsRegExp: /\S\s+/g,
characters_excluding_spacesRegExp: /\S/g,
/*
* Match anything that is not a formatting character, excluding:
* \f = form feed
* \n = new line
* \r = carriage return
* \t = tab
* \v = vertical tab
* \u00AD = soft hyphen
* \u2028 = line separator
* \u2029 = paragraph separator
*/
characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
l10n: {
type: 'words'
}
};
//# sourceMappingURL=defaultSettings.js.map