UNPKG

@lunarisapp/readability

Version:

A library for calculating readability scores of texts

664 lines (645 loc) 18.2 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { TextReadability: () => TextReadability, VARIANTS: () => VARIANTS, automatedReadabilityIndex: () => automatedReadabilityIndex, colemanLiauIndex: () => colemanLiauIndex, crawford: () => crawford, fleschKincaidGrade: () => fleschKincaidGrade, fleschReadingEase: () => fleschReadingEase, gulpeaseIndex: () => gulpeaseIndex, gutierrezPolini: () => gutierrezPolini, linsearWriteFormula: () => linsearWriteFormula, lix: () => lix, mcalpineEflaw: () => mcalpineEflaw, rix: () => rix, smogIndex: () => smogIndex, wienerSachtextformel: () => wienerSachtextformel }); module.exports = __toCommonJS(index_exports); var import_stats = require("@lunarisapp/stats"); // src/utils/config.ts var langs = { en: { fre_base: 206.835, fre_sentence_length: 1.015, fre_syllables_per_word: 84.6, syllable_threshold: 3 }, de: { fre_base: 180, fre_sentence_length: 1, fre_syllables_per_word: 58.5 }, es: { fre_base: 206.84, fre_sentence_length: 1.02, fre_syllables_per_word: 0.6 }, fr: { fre_base: 207, fre_sentence_length: 1.015, fre_syllables_per_word: 73.6 }, it: { fre_base: 217, fre_sentence_length: 1.3, fre_syllables_per_word: 0.6 }, nl: { fre_base: 206.835, fre_sentence_length: 0.93, fre_syllables_per_word: 77 }, pl: { fre_base: 0, fre_sentence_length: 0, fre_syllables_per_word: 0, syllable_threshold: 4 }, ru: { fre_base: 206.835, fre_sentence_length: 1.3, fre_syllables_per_word: 60.1 }, hu: { fre_base: 206.835, fre_sentence_length: 1.015, fre_syllables_per_word: 58.5, syllable_threshold: 5 } }; // src/formulas/fleschReadingEase.ts var DEFAULT_BASE_COEF = 206.835; var DEFAULT_SENTENCES_COEF = 1.015; var DEFAULT_SYLLABLES_PER_WORD_COEF = 84.6; function fleschReadingEase(params) { const { sentences, syllablesPerWord, coefficients } = params; const { base: baseCoef = DEFAULT_BASE_COEF, sentences: sentencesCoef = DEFAULT_SENTENCES_COEF, syllablesPerWord: syllablesPerWordCoef = DEFAULT_SYLLABLES_PER_WORD_COEF } = coefficients ?? {}; return baseCoef - sentencesCoef * sentences - syllablesPerWordCoef * syllablesPerWord; } // src/formulas/fleschKincaidGrade.ts var BASE_COEF = -15.59; var SENTENCES_COEF = 0.39; var SYLLABLES_COEF = 11.8; function fleschKincaidGrade(params) { const { sentences, syllablesPerWord } = params; return SENTENCES_COEF * sentences + SYLLABLES_COEF * syllablesPerWord + BASE_COEF; } // src/formulas/smogIndex.ts var BASE_COEF2 = 3.1291; var POLY_COEF = 1.043; var POLY_MULT = 30; function smogIndex(params) { const { sentences, polysyllables } = params; return POLY_COEF * Math.sqrt(polysyllables * POLY_MULT / sentences) + BASE_COEF2; } // src/formulas/colemanLiauIndex.ts var BASE_COEF3 = -15.8; var LETTERS_COEF = 0.058; var SENTENCES_COEF2 = -0.296; function colemanLiauIndex(params) { return LETTERS_COEF * params.letters + SENTENCES_COEF2 * params.sentences + BASE_COEF3; } // src/formulas/automatedReadabilityIndex.ts var BASE_COEF4 = -21.43; var CHARS_PER_WORDS_COEF = 4.71; var WORDS_PER_SENTENCES_COEF = 0.5; function automatedReadabilityIndex(params) { const { chars, words, sentences } = params; try { return CHARS_PER_WORDS_COEF * (chars / words) + WORDS_PER_SENTENCES_COEF * (words / sentences) + BASE_COEF4; } catch { return 0; } } // src/formulas/linsearWriteFormula.ts function linsearWriteFormula(params) { const { sentences, syllablesPerWords } = params; let easyWords = 0; let difficultWords = 0; for (const syllables of syllablesPerWords) { if (syllables < 3) { easyWords += 1; } else { difficultWords += 1; } } try { const score = (easyWords + difficultWords * 3) / sentences; if (score <= 20) { return (score - 2) / 2; } return score / 2; } catch { return 0; } } // src/formulas/gutierrezPolini.ts var BASE_COEF5 = 95.2; var WORDS_PER_SENTENCES_COEF2 = -0.35; var LETTERS_PER_WORDS_COEF = -9.7; function gutierrezPolini(params) { const { words, sentences, letters } = params; try { const wordsPerSentences = words / sentences; const lettersPerWords = letters / words; return BASE_COEF5 + WORDS_PER_SENTENCES_COEF2 * wordsPerSentences + LETTERS_PER_WORDS_COEF * lettersPerWords; } catch { return 0; } } // src/formulas/crawford.ts var BASE_COEF6 = -3.407; var SENTENCE_PER_WORDS_COEF = -0.205; var SYLLABLES_PER_WORDS_COEF = 0.049; function crawford(params) { const { words, sentences, syllables } = params; try { const sentencesPerWords = 100 * (sentences / words); const syllablesPerWords = 100 * (syllables / words); return SENTENCE_PER_WORDS_COEF * sentencesPerWords + SYLLABLES_PER_WORDS_COEF * syllablesPerWords + BASE_COEF6; } catch { return 0; } } // src/formulas/gulpeaseIndex.ts var BASE_COEF7 = 89; var SENTENCES_COEF3 = 300; var CHARS_COEF = 10; function gulpeaseIndex(params) { const { sentences, chars, words } = params; return (SENTENCES_COEF3 * sentences - CHARS_COEF * chars) / words + BASE_COEF7; } // src/formulas/wienerSachtextformel.ts var VARIANTS = { 1: { ms: 0.1935, sl: 0.1672, iw: 0.1297, es: -0.0327, base: -0.875 }, 2: { ms: 0.2007, sl: 0.1682, iw: 0.1373, es: 0, base: -2.779 }, 3: { ms: 0.2963, sl: 0.1905, iw: 0, es: 0, base: -1.1144 }, 4: { ms: 0.2744, sl: 0.2656, iw: 0, es: 0, base: -1.693 } }; function wienerSachtextformel(params) { const { words, sentences, longWords, polysyllables, monosyllables, variant } = params; const { ms, sl, iw, es, base } = VARIANTS[variant]; const msVal = 100 * polysyllables / words; const slVal = words / sentences; const iwVal = 100 * longWords / words; const esVal = 100 * monosyllables / words; return base + ms * msVal + sl * slVal + iw * iwVal + es * esVal; } // src/formulas/mcalpineEflaw.ts function mcalpineEflaw(params) { const { words, sentences, miniWords } = params; return (words + miniWords) / sentences; } // src/formulas/lix.ts function lix(params) { const { words, longWords, wordsPerSentence } = params; if (words === 0) { return 0; } const ratio = longWords * 100 / words; return wordsPerSentence + ratio; } // src/formulas/rix.ts function rix(params) { const { longWords, sentences } = params; if (sentences === 0) { return 0; } return longWords / sentences; } // src/index.ts var import_lru_cache = require("lru-cache"); // src/utils/utils.ts function lruCache(cache, key, values, fn, enabled = true) { const valuesStr = JSON.stringify(values); const cacheKey = `${key}:${valuesStr}`; if (enabled && cache.has(cacheKey)) { return cache.get(cacheKey); } const result = fn(...values); cache.set(cacheKey, result); return result; } // src/index.ts var TextReadability = class { constructor(props) { this.cache = new import_lru_cache.LRUCache({ max: 512 }); this.lang = "en_US"; const { lang, cache } = props ?? {}; this.cacheEnabled = cache ?? true; this.setLang(lang ?? this.lang); } getCfg(key) { const lang = this.lang.split("_")[0]; return langs[lang][key]; } /** * Set the language for the text statistics. * @param lang */ setLang(lang) { this.lang = lang; this.textStats = new import_stats.TextStats({ lang }); this.cache.clear(); } /** * Calculate the Flesch reading ease test for text. * https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease * @param text */ fleschReadingEase(text) { return lruCache( this.cache, "fleschReadingEase", [text], (text2) => this.computeFleschReadingEase(text2), this.cacheEnabled ); } computeFleschReadingEase(text) { if (this.lang === "pl") { throw new Error( "Flesch reading ease test does not support Polish language." ); } const sInterval = ["es", "it"].includes(this.lang) ? 100 : void 0; const sentences = this.textStats.avgSentenceLength(text); const syllablesPerWord = this.textStats.avgSyllablesPerWord( text, sInterval ); return fleschReadingEase({ sentences, syllablesPerWord, coefficients: { base: this.getCfg("fre_base"), sentences: this.getCfg("fre_sentence_length"), syllablesPerWord: this.getCfg("fre_syllables_per_word") } }); } /** * Calculate the Flesch-Kincaid grade level for text. * https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level * TODO: can we support multiple languages? * @param text */ fleschKincaidGrade(text) { return lruCache( this.cache, "fleschKincaidGrade", [text], (text2) => this.computeFleschKincaidGrade(text2), this.cacheEnabled ); } computeFleschKincaidGrade(text) { if (this.lang === "pl") { throw new Error( "Flesch-Kincaid grade level does not support Polish language." ); } const sentences = this.textStats.avgSentenceLength(text); const syllablesPerWord = this.textStats.avgSyllablesPerWord(text); return fleschKincaidGrade({ sentences, syllablesPerWord }); } /** * Calculate the SMOG index for text. * https://en.wikipedia.org/wiki/SMOG * @param text */ smogIndex(text) { return lruCache( this.cache, "smogIndex", [text], (text2) => this.computeSmogIndex(text2), this.cacheEnabled ); } computeSmogIndex(text) { const sentences = this.textStats.sentenceCount(text); if (sentences < 3) { return 0; } const polysyllables = this.textStats.polysyllableCount(text); return smogIndex({ sentences, polysyllables }); } /** * Calculate the Coleman-Liau index for text. * https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index * @param text */ colemanLiauIndex(text) { return lruCache( this.cache, "colemanLiauIndex", [text], (text2) => this.computeColemanLiauIndex(text2), this.cacheEnabled ); } computeColemanLiauIndex(text) { const letters = this.textStats.avgLettersPerWord(text) * 100; const sentences = this.textStats.avgSentencesPerWord(text) * 100; return colemanLiauIndex({ letters, sentences }); } /** * Calculate the automated readability index for text. * https://en.wikipedia.org/wiki/Automated_readability_index * @param text */ automatedReadabilityIndex(text) { return lruCache( this.cache, "automatedReadabilityIndex", [text], (text2) => this.computeAutomatedReadabilityIndex(text2), this.cacheEnabled ); } computeAutomatedReadabilityIndex(text) { const chars = this.textStats.charCount(text); const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); return automatedReadabilityIndex({ chars, words, sentences }); } /** * Calculate the Linsear Write formula for text. * https://en.wikipedia.org/wiki/Linsear_Write * @param text * @param sample Number of words to sample from the text */ linsearWriteFormula(text, sample = 100) { return lruCache( this.cache, "linsearWriteFormula", [text, sample], (text2, sample2) => this.computeLinsearWriteFormula(text2, sample2), this.cacheEnabled ); } computeLinsearWriteFormula(text, sample) { const words = text.split(/\s+/).slice(0, sample).filter((word) => word); const newText = words.join(" "); const sentences = this.textStats.sentenceCount(newText); const syllablesPerWords = words.map( (word) => this.textStats.syllableCount(word) ); return linsearWriteFormula({ sentences, syllablesPerWords }); } /** * Calculate Gutierrez Polini's readability formula for text (Spanish only). * https://www.spanishreadability.com/gutierrez-de-polinis-readability-formula * @param text */ gutierrezPolini(text) { return lruCache( this.cache, "gutierrezPolini", [text], (text2) => this.computeGutierrezPolini(text2), this.cacheEnabled ); } computeGutierrezPolini(text) { if (this.lang !== "es") { console.warn(`Gutierrez Polini's formula only supports Spanish language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); const letters = this.textStats.letterCount(text); return gutierrezPolini({ words, sentences, letters }); } /** * Calculate Crawford's formula for text (Spanish only). * https://www.spanishreadability.com/the-crawford-score-for-spanish-texts * @param text */ crawford(text) { return lruCache( this.cache, "crawford", [text], (text2) => this.computeCrawford(text2), this.cacheEnabled ); } computeCrawford(text) { if (this.lang !== "es") { console.warn(`Crawford's formula only supports Spanish language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const sentences = this.textStats.sentenceCount(text); const words = this.textStats.wordCount(text); const syllables = this.textStats.syllableCount(text); return crawford({ words, sentences, syllables }); } /** * Calculate the Gulpease index for text (Italian only). * https://it.wikipedia.org/wiki/Indice_Gulpease * @param text */ gulpeaseIndex(text) { return lruCache( this.cache, "gulpeaseIndex", [text], (text2) => this.computeGulpeaseIndex(text2), this.cacheEnabled ); } computeGulpeaseIndex(text) { if (this.lang !== "it") { console.warn(`Gulpease index only supports Italian language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } return gulpeaseIndex({ words: this.textStats.wordCount(text), sentences: this.textStats.sentenceCount(text), chars: this.textStats.charCount(text) }); } /** * Calculate the RIX ratio. * https://readable.com/readability/lix-rix-readability-formulas/ * @param text */ rix(text) { return lruCache( this.cache, "rix", [text], (text2) => this.computeRix(text2), this.cacheEnabled ); } computeRix(text) { if (!text) { return 0; } return rix({ longWords: this.textStats.longWordCount(text), sentences: this.textStats.sentenceCount(text) }); } /** * Calculate the LIX ratio. * https://readable.com/readability/lix-rix-readability-formulas/ * @param text */ lix(text) { return lruCache( this.cache, "lix", [text], (text2) => this.computeLix(text2), this.cacheEnabled ); } computeLix(text) { if (!text) { return 0; } return lix({ words: this.textStats.wordCount(text), longWords: this.textStats.longWordCount(text), wordsPerSentence: this.textStats.avgWordsPerSentence(text) }); } /** * Calculate the Wiener Sachtextformel for text (german). * https://de.wikipedia.org/wiki/Lesbarkeitsindex#Wiener_Sachtextformel * @param text * @param variant */ wienerSachtextformel(text, variant = 1) { return lruCache( this.cache, "wienerSachtextformel", [text, variant], (text2, variant2) => this.computeWienerSachtextformel(text2, variant2), this.cacheEnabled ); } computeWienerSachtextformel(text, variant) { if (this.lang !== "de") { console.warn(`Wiener Sachtextformel only supports German language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } return wienerSachtextformel({ words: this.textStats.wordCount(text), sentences: this.textStats.sentenceCount(text), longWords: this.textStats.longWordCount(text), polysyllables: this.textStats.polysyllableCount(text), monosyllables: this.textStats.monosyllableCount(text), variant }); } /** * Calculate the McAlpine EFLAW score for text. * https://www.angelfire.com/nd/nirmaldasan/journalismonline/fpetge.html * @param text */ mcalpineEflaw(text) { return lruCache( this.cache, "mcalpineEflaw", [text], (text2) => this.computeMcalpineEflaw(text2), this.cacheEnabled ); } computeMcalpineEflaw(text) { if (!text) { return 0; } const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); const miniWords = this.textStats.miniWordCount(text); return mcalpineEflaw({ words, sentences, miniWords }); } }; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { TextReadability, VARIANTS, automatedReadabilityIndex, colemanLiauIndex, crawford, fleschKincaidGrade, fleschReadingEase, gulpeaseIndex, gutierrezPolini, linsearWriteFormula, lix, mcalpineEflaw, rix, smogIndex, wienerSachtextformel });