UNPKG

@lunarisapp/readability

Version:

A library for calculating readability scores of texts

768 lines (748 loc) 20.7 kB
// src/index.ts import { TextStats } from "@lunarisapp/stats"; import { LRUCache } from "lru-cache"; // src/formulas/automated-readability-index.ts var BASE_COEF = -21.43; var CHARS_PER_WORDS_COEF = 4.71; var WORDS_PER_SENTENCES_COEF = 0.5; function automatedReadabilityIndex(params) { const { chars, words, sentences } = params; if (words === 0 || sentences === 0) { return 0; } return CHARS_PER_WORDS_COEF * (chars / words) + WORDS_PER_SENTENCES_COEF * (words / sentences) + BASE_COEF; } // src/formulas/coleman-liau-index.ts var BASE_COEF2 = -15.8; var LETTERS_COEF = 0.058; var SENTENCES_COEF = -0.296; function colemanLiauIndex(params) { return LETTERS_COEF * params.letters + SENTENCES_COEF * params.sentences + BASE_COEF2; } // src/formulas/crawford.ts var BASE_COEF3 = -3.407; var SENTENCE_PER_WORDS_COEF = -0.205; var SYLLABLES_PER_WORDS_COEF = 0.049; function crawford(params) { const { words, sentences, syllables } = params; if (words === 0) { return 0; } const sentencesPerWords = 100 * (sentences / words); const syllablesPerWords = 100 * (syllables / words); return SENTENCE_PER_WORDS_COEF * sentencesPerWords + SYLLABLES_PER_WORDS_COEF * syllablesPerWords + BASE_COEF3; } // src/formulas/fernandez-huerta.ts var BASE = 206.84; var SYLLABLES_PER_WORD_COEF = -60; var WORDS_PER_SENTENCE_COEF = -1.02; function fernandezHuerta(params) { const { words, sentences, syllables } = params; if (words === 0) { return 0; } const avgSyllablesPerWord = syllables / words; const avgWordsPerSentence = words / sentences; return BASE + SYLLABLES_PER_WORD_COEF * avgSyllablesPerWord + WORDS_PER_SENTENCE_COEF * avgWordsPerSentence; } // src/formulas/flesch-kincaid-grade.ts var BASE_COEF4 = -15.59; var SENTENCES_COEF2 = 0.39; var SYLLABLES_COEF = 11.8; function fleschKincaidGrade(params) { const { sentences, syllablesPerWord } = params; return SENTENCES_COEF2 * sentences + SYLLABLES_COEF * syllablesPerWord + BASE_COEF4; } // src/formulas/flesch-reading-ease.ts var DEFAULT_BASE_COEF = 206.835; var DEFAULT_SENTENCES_COEF = 1.015; var DEFAULT_SYLLABLES_PER_WORD_COEF = 84.6; function fleschReadingEase(params) { const { sentences, syllablesPerWord, coefficients } = params; const { base: baseCoef = DEFAULT_BASE_COEF, sentences: sentencesCoef = DEFAULT_SENTENCES_COEF, syllablesPerWord: syllablesPerWordCoef = DEFAULT_SYLLABLES_PER_WORD_COEF } = coefficients ?? {}; return baseCoef - sentencesCoef * sentences - syllablesPerWordCoef * syllablesPerWord; } // src/formulas/gulpease-index.ts var BASE_COEF5 = 89; var SENTENCES_COEF3 = 300; var CHARS_COEF = 10; function gulpeaseIndex(params) { const { sentences, chars, words } = params; if (words === 0) { return 0; } return (SENTENCES_COEF3 * sentences - CHARS_COEF * chars) / words + BASE_COEF5; } // src/formulas/gunning-fog.ts function gunningFog(params) { const { avgWordsPerSentence, difficultWords, totalWords } = params; if (totalWords === 0) { return 0; } const pdw = difficultWords / totalWords * 100; return 0.4 * (avgWordsPerSentence + pdw); } // src/formulas/gutierrez-polini.ts var BASE_COEF6 = 95.2; var WORDS_PER_SENTENCES_COEF2 = -0.35; var LETTERS_PER_WORDS_COEF = -9.7; function gutierrezPolini(params) { const { words, sentences, letters } = params; if (words === 0 || sentences === 0) { return 0; } const wordsPerSentences = words / sentences; const lettersPerWords = letters / words; return BASE_COEF6 + WORDS_PER_SENTENCES_COEF2 * wordsPerSentences + LETTERS_PER_WORDS_COEF * lettersPerWords; } // src/formulas/linsear-write-formula.ts function linsearWriteFormula(params) { const { sentences, syllablesPerWords } = params; let easyWords = 0; let difficultWords = 0; for (const syllables of syllablesPerWords) { if (syllables < 3) { easyWords += 1; } else { difficultWords += 1; } } if (sentences === 0) { return 0; } const score = (easyWords + difficultWords * 3) / sentences; if (score <= 20) { return (score - 2) / 2; } return score / 2; } // src/formulas/lix.ts function lix(params) { const { words, longWords, wordsPerSentence } = params; if (words === 0) { return 0; } const ratio = longWords * 100 / words; return wordsPerSentence + ratio; } // src/formulas/mcalpine-eflaw.ts function mcalpineEflaw(params) { const { words, sentences, miniWords } = params; if (sentences === 0) { return 0; } return (words + miniWords) / sentences; } // src/formulas/rix.ts function rix(params) { const { longWords, sentences } = params; if (sentences === 0) { return 0; } return longWords / sentences; } // src/formulas/smog-index.ts var BASE_COEF7 = 3.1291; var POLY_COEF = 1.043; var POLY_MULT = 30; function smogIndex(params) { const { sentences, polysyllables } = params; return POLY_COEF * Math.sqrt(polysyllables * POLY_MULT / sentences) + BASE_COEF7; } // src/formulas/szigriszt-pazos.ts function szigrisztPazos(params) { const { totalSyllables, totalWords, totalSentences, freBase } = params; if (totalWords === 0 || totalSentences === 0) { return 0; } return freBase - 62.3 * (totalSyllables / totalWords) - totalWords / totalSentences; } // src/formulas/wiener-sachtextformel.ts var VARIANTS = { 1: { ms: 0.1935, sl: 0.1672, iw: 0.1297, es: -0.0327, base: -0.875 }, 2: { ms: 0.2007, sl: 0.1682, iw: 0.1373, es: 0, base: -2.779 }, 3: { ms: 0.2963, sl: 0.1905, iw: 0, es: 0, base: -1.1144 }, 4: { ms: 0.2744, sl: 0.2656, iw: 0, es: 0, base: -1.693 } }; function wienerSachtextformel(params) { const { words, sentences, longWords, polysyllables, monosyllables, variant } = params; const { ms, sl, iw, es, base } = VARIANTS[variant]; if (words === 0 || sentences === 0) { return 0; } const msVal = 100 * polysyllables / words; const slVal = words / sentences; const iwVal = 100 * longWords / words; const esVal = 100 * monosyllables / words; return base + ms * msVal + sl * slVal + iw * iwVal + es * esVal; } // src/utils/config.ts var langs = { en: { fre_base: 206.835, fre_sentence_length: 1.015, fre_syllables_per_word: 84.6 }, de: { fre_base: 180, fre_sentence_length: 1, fre_syllables_per_word: 58.5 }, es: { fre_base: 206.84, fre_sentence_length: 1.02, fre_syllables_per_word: 0.6 }, fr: { fre_base: 207, fre_sentence_length: 1.015, fre_syllables_per_word: 73.6 }, it: { fre_base: 217, fre_sentence_length: 1.3, fre_syllables_per_word: 0.6 }, nl: { fre_base: 206.835, fre_sentence_length: 0.93, fre_syllables_per_word: 77 }, pl: { fre_base: 0, fre_sentence_length: 0, fre_syllables_per_word: 0 }, ru: { fre_base: 206.835, fre_sentence_length: 1.3, fre_syllables_per_word: 60.1 }, hu: { fre_base: 206.835, fre_sentence_length: 1.015, fre_syllables_per_word: 58.5 } }; // src/utils/utils.ts function lruCache(cache, key, values, fn, enabled = true) { const valuesStr = JSON.stringify(values); const cacheKey = `${key}:${valuesStr}`; if (enabled && cache.has(cacheKey)) { return cache.get(cacheKey); } const result = fn(...values); cache.set(cacheKey, result); return result; } // src/index.ts var WHITESPACE_RE = /\s+/; var TextReadability = class { constructor(props) { this.cache = new LRUCache({ max: 512 }); this.lang = "en_US"; const { lang, cache } = props ?? {}; this.cacheEnabled = cache ?? true; this.setLang(lang ?? this.lang); } getCfg(key) { const lang = this.lang.split("_")[0]; const config = langs[lang]; if (!config) { throw new Error( `Language "${lang}" is not supported. Supported languages: ${Object.keys(langs).join(", ")}` ); } return config[key]; } /** * Set the language for the text statistics. * @param lang */ setLang(lang) { this.lang = lang; this.textStats = new TextStats({ lang }); this.cache.clear(); } /** * Calculate the Flesch reading ease test for text. * https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease * @param text */ fleschReadingEase(text) { return lruCache( this.cache, "fleschReadingEase", [text], (text2) => this.computeFleschReadingEase(text2), this.cacheEnabled ); } computeFleschReadingEase(text) { if (!text) { return 0; } if (this.lang === "pl") { throw new Error( "Flesch reading ease test does not support Polish language." ); } const sInterval = ["es", "it"].includes(this.lang) ? 100 : void 0; const sentences = this.textStats.avgSentenceLength(text); const syllablesPerWord = this.textStats.avgSyllablesPerWord( text, sInterval ); return fleschReadingEase({ sentences, syllablesPerWord, coefficients: { base: this.getCfg("fre_base"), sentences: this.getCfg("fre_sentence_length"), syllablesPerWord: this.getCfg("fre_syllables_per_word") } }); } /** * Calculate the Flesch-Kincaid grade level for text. * https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level * TODO: can we support multiple languages? * @param text */ fleschKincaidGrade(text) { return lruCache( this.cache, "fleschKincaidGrade", [text], (text2) => this.computeFleschKincaidGrade(text2), this.cacheEnabled ); } computeFleschKincaidGrade(text) { if (!text) { return 0; } if (this.lang === "pl") { throw new Error( "Flesch-Kincaid grade level does not support Polish language." ); } const sentences = this.textStats.avgSentenceLength(text); const syllablesPerWord = this.textStats.avgSyllablesPerWord(text); return fleschKincaidGrade({ sentences, syllablesPerWord }); } /** * Calculate the SMOG index for text. * https://en.wikipedia.org/wiki/SMOG * @param text */ smogIndex(text) { return lruCache( this.cache, "smogIndex", [text], (text2) => this.computeSmogIndex(text2), this.cacheEnabled ); } computeSmogIndex(text) { if (!text) { return 0; } const sentences = this.textStats.sentenceCount(text); if (sentences < 3) { return 0; } const polysyllables = this.textStats.polysyllableCount(text); return smogIndex({ sentences, polysyllables }); } /** * Calculate the Gunning Fog index for text. * https://en.wikipedia.org/wiki/Gunning_fog_index * @param text */ gunningFog(text) { return lruCache( this.cache, "gunningFog", [text], (text2) => this.computeGunningFog(text2), this.cacheEnabled ); } computeGunningFog(text) { if (!text) { return 0; } const avgWordsPerSentence = this.textStats.avgWordsPerSentence(text); const difficultWords = this.textStats.polysyllableCount(text); const totalWords = this.textStats.wordCount(text); return gunningFog({ avgWordsPerSentence, difficultWords, totalWords }); } /** * Calculate the Coleman-Liau index for text. * https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index * @param text */ colemanLiauIndex(text) { return lruCache( this.cache, "colemanLiauIndex", [text], (text2) => this.computeColemanLiauIndex(text2), this.cacheEnabled ); } computeColemanLiauIndex(text) { if (!text) { return 0; } const letters = this.textStats.avgLettersPerWord(text) * 100; const sentences = this.textStats.avgSentencesPerWord(text) * 100; return colemanLiauIndex({ letters, sentences }); } /** * Calculate the automated readability index for text. * https://en.wikipedia.org/wiki/Automated_readability_index * @param text */ automatedReadabilityIndex(text) { return lruCache( this.cache, "automatedReadabilityIndex", [text], (text2) => this.computeAutomatedReadabilityIndex(text2), this.cacheEnabled ); } computeAutomatedReadabilityIndex(text) { if (!text) { return 0; } const chars = this.textStats.charCount(text); const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); return automatedReadabilityIndex({ chars, words, sentences }); } /** * Calculate the Linsear Write formula for text. * https://en.wikipedia.org/wiki/Linsear_Write * @param text * @param sample Number of words to sample from the text */ linsearWriteFormula(text, sample = 100) { return lruCache( this.cache, "linsearWriteFormula", [text, sample], (text2, sample2) => this.computeLinsearWriteFormula(text2, sample2), this.cacheEnabled ); } computeLinsearWriteFormula(text, sample) { if (!text) { return 0; } const words = text.split(WHITESPACE_RE).slice(0, sample).filter((word) => word); const newText = words.join(" "); const sentences = this.textStats.sentenceCount(newText); const syllablesPerWords = words.map( (word) => this.textStats.syllableCount(word) ); return linsearWriteFormula({ sentences, syllablesPerWords }); } /** * Calculate Gutierrez Polini's readability formula for text (Spanish only). * https://www.spanishreadability.com/gutierrez-de-polinis-readability-formula * @param text */ gutierrezPolini(text) { return lruCache( this.cache, "gutierrezPolini", [text], (text2) => this.computeGutierrezPolini(text2), this.cacheEnabled ); } computeGutierrezPolini(text) { if (this.lang !== "es") { console.warn(`Gutierrez Polini's formula only supports Spanish language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); const letters = this.textStats.letterCount(text); return gutierrezPolini({ words, sentences, letters }); } /** * Calculate Crawford's formula for text (Spanish only). * https://www.spanishreadability.com/the-crawford-score-for-spanish-texts * @param text */ crawford(text) { return lruCache( this.cache, "crawford", [text], (text2) => this.computeCrawford(text2), this.cacheEnabled ); } computeCrawford(text) { if (this.lang !== "es") { console.warn(`Crawford's formula only supports Spanish language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const sentences = this.textStats.sentenceCount(text); const words = this.textStats.wordCount(text); const syllables = this.textStats.syllableCount(text); return crawford({ words, sentences, syllables }); } /** * Calculate the Fernandez Huerta readability formula for text (Spanish only). * https://www.spanishreadability.com/the-fernandez-huerta-readability-formula * @param text */ fernandezHuerta(text) { return lruCache( this.cache, "fernandezHuerta", [text], (text2) => this.computeFernandezHuerta(text2), this.cacheEnabled ); } computeFernandezHuerta(text) { if (this.lang !== "es") { console.warn(`Fernandez Huerta's formula only supports Spanish language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const sentences = this.textStats.sentenceCount(text); const words = this.textStats.wordCount(text); const syllables = this.textStats.syllableCount(text); return fernandezHuerta({ words, sentences, syllables }); } /** * Calculate the Szigriszt Pazos readability formula for text (Spanish only). * @param text */ szigrisztPazos(text) { return lruCache( this.cache, "szigrisztPazos", [text], (text2) => this.computeSzigrisztPazos(text2), this.cacheEnabled ); } computeSzigrisztPazos(text) { if (this.lang !== "es") { console.warn(`Szigriszt Pazos is designed for Spanish text. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } const totalSyllables = this.textStats.syllableCount(text); const totalWords = this.textStats.wordCount(text); const totalSentences = this.textStats.sentenceCount(text); const freBase = this.getCfg("fre_base"); return szigrisztPazos({ totalSyllables, totalWords, totalSentences, freBase }); } /** * Calculate the Gulpease index for text (Italian only). * https://it.wikipedia.org/wiki/Indice_Gulpease * @param text */ gulpeaseIndex(text) { return lruCache( this.cache, "gulpeaseIndex", [text], (text2) => this.computeGulpeaseIndex(text2), this.cacheEnabled ); } computeGulpeaseIndex(text) { if (this.lang !== "it") { console.warn(`Gulpease index only supports Italian language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } return gulpeaseIndex({ words: this.textStats.wordCount(text), sentences: this.textStats.sentenceCount(text), chars: this.textStats.charCount(text) }); } /** * Calculate the RIX ratio. * https://readable.com/readability/lix-rix-readability-formulas/ * @param text */ rix(text) { return lruCache( this.cache, "rix", [text], (text2) => this.computeRix(text2), this.cacheEnabled ); } computeRix(text) { if (!text) { return 0; } return rix({ longWords: this.textStats.longWordCount(text), sentences: this.textStats.sentenceCount(text) }); } /** * Calculate the LIX ratio. * https://readable.com/readability/lix-rix-readability-formulas/ * @param text */ lix(text) { return lruCache( this.cache, "lix", [text], (text2) => this.computeLix(text2), this.cacheEnabled ); } computeLix(text) { if (!text) { return 0; } return lix({ words: this.textStats.wordCount(text), longWords: this.textStats.longWordCount(text), wordsPerSentence: this.textStats.avgWordsPerSentence(text) }); } /** * Calculate the Wiener Sachtextformel for text (german). * https://de.wikipedia.org/wiki/Lesbarkeitsindex#Wiener_Sachtextformel * @param text * @param variant */ wienerSachtextformel(text, variant = 1) { return lruCache( this.cache, "wienerSachtextformel", [text, variant], (text2, variant2) => this.computeWienerSachtextformel(text2, variant2), this.cacheEnabled ); } computeWienerSachtextformel(text, variant) { if (this.lang !== "de") { console.warn(`Wiener Sachtextformel only supports German language. Textstat language is set to '${this.lang}'.`); } if (!text) { return 0; } return wienerSachtextformel({ words: this.textStats.wordCount(text), sentences: this.textStats.sentenceCount(text), longWords: this.textStats.longWordCount(text), polysyllables: this.textStats.polysyllableCount(text), monosyllables: this.textStats.monosyllableCount(text), variant }); } /** * Calculate the McAlpine EFLAW score for text. * https://www.angelfire.com/nd/nirmaldasan/journalismonline/fpetge.html * @param text */ mcalpineEflaw(text) { return lruCache( this.cache, "mcalpineEflaw", [text], (text2) => this.computeMcalpineEflaw(text2), this.cacheEnabled ); } computeMcalpineEflaw(text) { if (!text) { return 0; } const words = this.textStats.wordCount(text); const sentences = this.textStats.sentenceCount(text); const miniWords = this.textStats.miniWordCount(text); return mcalpineEflaw({ words, sentences, miniWords }); } }; export { TextReadability, VARIANTS, automatedReadabilityIndex, colemanLiauIndex, crawford, fernandezHuerta, fleschKincaidGrade, fleschReadingEase, gulpeaseIndex, gunningFog, gutierrezPolini, linsearWriteFormula, lix, mcalpineEflaw, rix, smogIndex, szigrisztPazos, wienerSachtextformel };