UNPKG

hexo-generator-readtime

Version:

HEXO package that provides analytics on the read time to review a post. Generates word, character, image, video, and codeblock counts in the front-matter of the markdown file. Plus provides read-time estimates for given language profile. Supports 48 langu

608 lines (518 loc) 18.2 kB
/* eslint-env es6 */ 'use strict'; const { sprintf } = require("sprintf-js"); /** * @author Richie Bartlett (Rich @ RichieBartlett.com) * @version 1.4.0 * @class readTime * @classdesc parses hexo `post.content` and provides analytics on the read time to review the document. * * supports UTF-8 characters - includes Arabic, Chinese, Japanese, Korean, and Vietnamese * * @example * `let readTime = (new readTime(langProfile, post.content)).calculate(); //fuzzy time string` * * @example ```js let post = {content: "柏泰感切独台団碁度似難札精終南立愛配日容。島切負返検上孟彩密携終第連校価権戦。導防学覧相薦東基士口載契。横乗属産家載法結蘇支徴開幌夜。"}; let langProfile = { "name": "Chinese (Taiwan)", "nativeName": "中文 (繁體中文)", "family": "Sino-Tibetan", "region": ["Taiwan"], "unicodeRange": ["\u0021-\u007E", "\u0021-\u007E", "\u4E00-\u9FFF"], "charPerMin": 285, "wordsPerMin": 161, "fuzzyTime": { "pattern": "%(approx)s%(count)d%(time_unit)s", "approx": "約", "time_unit": { "second": "秒", "seconds": "秒", "minute": "分鐘", "minutes": "分鐘", "hour": "小時", "hours": "小時", "day": "日", "days": "日", "month": "月", "months": "月", "year": "年", "years": "年" } } }; const rtObj = new readTime(langProfile, post.content, {defaultTime: "minutes"}); let rtString = rtObj.calculate(); let rtWordCount = rtObj.wordCount; let rtCharCount = rtObj.charCount; let rtImgCount = rtObj.imgCount; let rtVidCount = rtObj.vidCount; console.log(`rtString: ${rtString}`); ``` */ class readTime { /** * @property {number} wordCount * @description total count of words in document * @memberof readTime */ wordCount = 0; /** * @property {number} charCount * @description total (UTF-8) characters count in document * @memberof readTime */ charCount = 0; /** * @property {number} imgCount * @description number of <img> <image> tags found in document * @memberof readTime */ imgCount = 0; /** * @property {number} vidCount * @description number of `{% video|videojs|vimeo|youtube|youtuber %}` tags found in document * @memberof readTime */ vidCount = 0; /** * @property {number} wsCount * @description number of unicode whitespace characters in document * @memberof readTime */ wsCount = 0; /** * @property {number} codeBlockCount * @description total number of code blocks * @memberof readTime */ codeBlockCount = 0; /** * @property {number} time * @description estimated time to read the document in seconds. Preset assumes that the reader will take time to adjust as the webpage loads. * @memberof readTime */ time = 5; /** * @property {string} fuzzyTime * @description string in given langProfile for estimated time to read the document * @memberof readTime */ fuzzyTime = ""; /** * @property {string} timeUnit * @description selected time unit from `this.langProfile.fuzzytime.time_unit` to estimate reading time * @memberof readTime */ timeUnit = "auto"; /** * @property {string} text * @description HTML/document string to process * @private * @memberof readTime */ text = ""; /** * @property {string} _HTMLregEx * @description HTML regex to filer from `this.text` * @private * @memberof readTime */ _HTMLregEx = /(<([^>]+)>)/gi; /** * @property {string} _commentsRegEx * @description HTML comments regex to filer from `this.text` * @private * @memberof readTime */ _commentsRegEx =/(```[\s\S]*?```|`[\s\S]*?`|<!--[\s\S]*?-->)|(<!--[\s\S]*?-->)/gm; /** * @property {string} _wordRegEx * @description regex to match word boundaries. * @private * @memberof readTime */ _wordRegEx = /\b\S+\b/gm; /** * @property {regexp} _wsRegEx * @description Unicode whitespace definition for regular expression. Includes tabs, newlines, and uncommon whitespace characters (even within word boundries) across all documented glyphs. * Note: supports UTF-8 * @private * @memberof readTime */ _wsRegEx = /\b[\f\n\r\t\v\u00a0\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]\b/g; /** * @property {string} _imgRegEx * @description regex to match mage tags in the HTML, markdown text, and HEXO tags `{% img [...] %}`, `{% image [...] %}`, and `{% inlineimage [...] %}` tags. * @private * @memberof readTime */ _imgRegEx = /<img[^>]+>|<image[^>]+>|!\[(.*?)\]\((.*?)\)|\{\%\s*(?:img|image|inlineimage)\s+[^%]*\%\}/gi; /** * @property {string} _vidRegEx * @description regex to match video tags in the HTML, markdown text, and HEXO * @private * @memberof readTime */ _vidRegEx = /<video[^>]+>|<object[^>]+>|\{\%\s*(?:video|videojs|vimeo|youtube|youtuber)\s+[^%]*\%\}/gi; /** * @property {string} _codeBlockRegex * @description regex to match video tags in the HTML, markdown text, and HEXO * @private * @memberof readTime */ _codeBlockRegex = /```([\s\S]*?)```|<code\b[^>]*>([\s\S]*?)<\/code>/gi; /** * @property {object} langProfile * @description profile of the language to build metrics. Default is English. * @private * @memberof readTime */ langProfile = { /** * @property {number} name * @description language name as written natively in its language */ name: "English", /** * @property {array} unicodeRange * @description UTF-8 codes that define the range of characters used in the language */ unicodeRange: ["\u0021-\u007E", "\u00A1-\u024F", "\u2C60-\u2C7F"], /** * @property {number} charPerMin * @description characters per minute, on average, the typical reader can process */ charPerMin: 987, /** * @property {number} wordsPerMin * @description words per minute, on average, the typical reader can process */ wordsPerMin: 228, /** * @property {object} fuzzyTime * @description properties for making human readable text for given read time */ fuzzyTime: { /** * @property {string} pattern * @description defines how to print the human readable time; requires `sprintf()` string pattern */ pattern: "%(about)s %(count)d %(time_unit)s", /** * @property {string} approx * @description text for the word "about" */ approx: "About", /** * @property {object} time_unit * @description properties for words that define units of time in language */ time_unit: { second: "second", seconds: "seconds", minute: "minute", minutes: "minutes", hour: "hour", hours: "hours", day: "day", days: "days", month: "month", months: "months", year: "year", years: "years" } } }; /** * @property {object} option * @description options for output * @private * @memberof readTime */ option = { /** * @property defaultTime * @description option to reference langProfile timeUnit for estimated time to read the document; * Can be set to any of the `this.langProfile.fuzzytime.time_unit` properties. * Setting this to `auto` enables human readable output string */ defaultTime: "auto", /** * @property {number} imgReadTime * @description Assumed average time, in seconds, for reader to review the first image. */ imgReadTime: 12, /** * @property {number} vidReadTime * @description Assumed average time, in seconds, for viewer to watch the first video. */ vidReadTime: 60 }; /** * constructor * * @param {object} _langProfile imported profile data from ../Settings/ folder * @param {string} _text post.content data from HEXO * @param {object} _option override options from ../Settings/ folder * @memberof readTime */ constructor(_langProfile, _text, _option) { this.text = _text; if (typeof (_langProfile) == "object" && _langProfile != null) this.langProfile = _langProfile; if (typeof (_option) == "object" && _option != null) { this.option = Object.assign(this.option, _option); } this.filterHiddenComments(); this.imgCounter(); this.vidCounter(); this.filterHTML(); this.countWhitespace(); this.countCharacters(); this.countWords(); } /** * @method countCharacters * @desc Counts the number of UTF-8 characters in the post content * @private * @memberof readTime */ countCharacters() { let _RE = new RegExp("[" + this.langProfile.unicodeRange.join('') + "]", "gum"); let matches = this.text.match(_RE) || []; this.charCount = matches? matches.length : 0; } /** * @method countWords * @desc Counts the number of words in `this.text` * Note: supports UTF-8 * * TODO: change logic for Asian languages that don't use (white)spaces for word boundaries. * @private * @memberof readTime */ countWords() { const matchedWords = this.text.trim().match(this._wordRegEx); this.wordCount = matchedWords ? matchedWords.length : 0; } /** * @method countWhitespace * @desc Counts the number of "words" in `this.text` * @public * @memberof readTime */ countWhitespace() { let wsArray = this.text.trim().match(this._wsRegEx); this.wsCount = (wsArray ? wsArray.length : 0); } /** * @method imgCounter * @desc count number of HTML/EJS image tags * @private * @memberof readTime */ imgCounter() { const imgMatches = this.text.match(this._imgRegEx); // If images found, update the imgCount property if (imgMatches) { this.imgCount = imgMatches.length; } } /** * @method imgTime * @description counts the number of images in the document and calculates the estimated read time for the images. Assumes that images are evenly spaced throughout the document. * @returns {number} - the estimated read time for the images in seconds * @memberof readTime */ imgTime() { let seconds = 0; if (this.imgCount > 0) { if (this.imgCount > 10) { seconds = ((this.imgCount / 2) * (this.option.imgReadTime + 3)) + (this.imgCount - 10) * 3; // n/2(a+b) + 3 sec/image } else { seconds = (this.imgCount / 2) * (2 * this.option.imgReadTime + (1 - this.imgCount)); // n/2[2a+(n-1)d] } // ensure a minimum of 3 seconds per image seconds = Math.max(seconds, this.imgCount * 3); } return seconds; } /** * @method vidCounter * @desc count number of video tags in the HTML, markdown text, and HEXO image tags * @private * @memberof readTime */ vidCounter() { const vidMatches = this.text.match(this._vidRegEx); // If videos found, update the vidCount property if (vidMatches) { this.vidCount = vidMatches.length; } } /** * @method vidTime * @description counts the number of videos in the document and calculates the estimated read time. Assumes that videos are short and are evenly spaced throughout the document. * @returns {number} - the estimated time for videos in seconds * @memberof readTime */ vidTime() { let seconds = 0; if (this.vidCount > 0) { if (this.vidCount > 3) { seconds = ((this.vidCount / 2) * (this.option.vidReadTime + 15)) + (this.vidCount - 3) * 15; // n/2(a+b) + 15 sec/video } else { seconds = (this.vidCount / 2) * (2 * this.option.vidReadTime + (1 - this.vidCount)); // n/2[2a+(n-1)d] } // ensure a minimum of 15 seconds per video seconds = Math.max(seconds, this.vidCount * 15); } return seconds; } /** * @method codeBlocksCounter * @desc count encapsulated characters and number of code blocks * @returns {number} cbCharCount - total codeblock characters * @private * @memberof readTime */ codeBlocksCounter() { let match, cbCharCount = 0; while ((match = this._codeBlockRegex.exec(this.text)) !== null) { let codeBlockContent = match[0] || match[1]; this.codeBlockCount++; if (codeBlockContent !== undefined) cbCharCount += codeBlockContent.length; } return cbCharCount; } /** * @method filterHTML * @desc Filters out the HTML tags from `this.text` * @private * @memberof readTime */ filterHTML() { this.text.replace(this._HTMLregEx, ""); } /** * @method filterHiddenComments * @desc Filters out the HTML comments from `this.text` * @private * @memberof readTime */ filterHiddenComments() { this.text.replace(this._commentsRegEx, (match, p1, p2) => p1 ? p1 : ''); } /** * @method getPluralStr * @desc Get the correct form of the unit (singular/plural) * in different languages based on the given number * @private * @memberof readTime */ getPluralStr() { let isPlural = this.timeUnit[this.timeUnit.length - 1] == 's'; // Make it plural by default if (!isPlural && this.timeUnit != 'auto') this.timeUnit += 's'; // plural if (this.langProfile.name == "Arabic") { // In arabic, use plural form only with numbers [2-10] if (this.time > 10) { this.timeUnit = this.timeUnit.slice(0, this.timeUnit.length - 1); // singlular } } if (this.time < 2) { this.timeUnit = this.timeUnit.slice(0, this.timeUnit.length - 1); // singlular } } /** * @method getFuzzyTime * @private * @desc Calculate the fuzzy time based on the least "count" time for the {this.time}. Ex, 89sec becomes "About 1 minute"; 3663sec becomes "About 1 hour" * @memberof readTime */ getFuzzyTime() { let prevTU = "seconds", timeUnit = { "seconds": 1, // base unit "minutes": 60, "hours": 3600, // 60*60, "days": 86400, // 60*60*24 "months": 2592000, // 60*60*24*30 "years": 31557600, // 60*60*24*365.25 }; for (let tu in timeUnit) { // Find the lowest {timeUnit} that would // make the {this.time} greater than 1. if (this.time >= timeUnit[tu]) { prevTU = tu; // Don't skip the rest of the code if the {timeCount} // could be calculated by the "years". if (tu != 'years') continue; } // Calculate the time after finding the suitable timeUnit this.time = Math.round(this.time / timeUnit[prevTU]); this.timeUnit = prevTU; break; } } /** * @method getUnitTime * @desc Calculate the time based on the given `this.option.defaultTime` * @private * @memberof readTime */ getUnitTime() { let divisor; switch (this.option.defaultTime) { case 'year', 'years': divisor = 12; // months per year this.time /= divisor; case 'month', 'months': divisor = 30; this.time /= divisor; case 'day', 'days': divisor = 24; this.time /= divisor; case 'hour', 'hours': divisor = 60; this.time /= divisor; case 'minute', 'minutes': divisor = 60; this.time /= divisor; default: //seconds break; } } /** * @method calculate * @public * @description builds the analytic metrics * @returns {string} readTime * @memberof readTime */ calculate() { let cSec = Math.round((this.charCount / this.langProfile.charPerMin)); let wSec = Math.round((this.wordCount * 60 / this.langProfile.wordsPerMin)); // add word (in minutes) [or char in seconds] with image times (in seconds) + video times (in seconds) this.time += Math.max(cSec,wSec); this.time += this.imgTime(); this.time += this.vidTime(); this.time += Math.ceil(this.codeBlocksCounter() / this.langProfile.charPerMin) * 2; // make human readable? if (this.option.defaultTime == "auto") { this.getFuzzyTime(); } else { this.getUnitTime(); } // get the correct time_unit string per language this.getPluralStr(); // build string pattern this.fuzzyTime = sprintf(this.langProfile.fuzzyTime.pattern, { approx: this.langProfile.fuzzyTime.approx, count: this.time, time_unit: this.langProfile.fuzzyTime.time_unit[this.timeUnit] }); return this.fuzzyTime; } } module.exports = readTime;