@lunarisapp/readability
Version:
A library for calculating readability scores of texts
768 lines (748 loc) • 20.7 kB
JavaScript
// src/index.ts
import { TextStats } from "@lunarisapp/stats";
import { LRUCache } from "lru-cache";
// src/formulas/automated-readability-index.ts
var BASE_COEF = -21.43;
var CHARS_PER_WORDS_COEF = 4.71;
var WORDS_PER_SENTENCES_COEF = 0.5;
function automatedReadabilityIndex(params) {
const { chars, words, sentences } = params;
if (words === 0 || sentences === 0) {
return 0;
}
return CHARS_PER_WORDS_COEF * (chars / words) + WORDS_PER_SENTENCES_COEF * (words / sentences) + BASE_COEF;
}
// src/formulas/coleman-liau-index.ts
var BASE_COEF2 = -15.8;
var LETTERS_COEF = 0.058;
var SENTENCES_COEF = -0.296;
function colemanLiauIndex(params) {
return LETTERS_COEF * params.letters + SENTENCES_COEF * params.sentences + BASE_COEF2;
}
// src/formulas/crawford.ts
var BASE_COEF3 = -3.407;
var SENTENCE_PER_WORDS_COEF = -0.205;
var SYLLABLES_PER_WORDS_COEF = 0.049;
function crawford(params) {
const { words, sentences, syllables } = params;
if (words === 0) {
return 0;
}
const sentencesPerWords = 100 * (sentences / words);
const syllablesPerWords = 100 * (syllables / words);
return SENTENCE_PER_WORDS_COEF * sentencesPerWords + SYLLABLES_PER_WORDS_COEF * syllablesPerWords + BASE_COEF3;
}
// src/formulas/fernandez-huerta.ts
var BASE = 206.84;
var SYLLABLES_PER_WORD_COEF = -60;
var WORDS_PER_SENTENCE_COEF = -1.02;
function fernandezHuerta(params) {
const { words, sentences, syllables } = params;
if (words === 0) {
return 0;
}
const avgSyllablesPerWord = syllables / words;
const avgWordsPerSentence = words / sentences;
return BASE + SYLLABLES_PER_WORD_COEF * avgSyllablesPerWord + WORDS_PER_SENTENCE_COEF * avgWordsPerSentence;
}
// src/formulas/flesch-kincaid-grade.ts
var BASE_COEF4 = -15.59;
var SENTENCES_COEF2 = 0.39;
var SYLLABLES_COEF = 11.8;
function fleschKincaidGrade(params) {
const { sentences, syllablesPerWord } = params;
return SENTENCES_COEF2 * sentences + SYLLABLES_COEF * syllablesPerWord + BASE_COEF4;
}
// src/formulas/flesch-reading-ease.ts
var DEFAULT_BASE_COEF = 206.835;
var DEFAULT_SENTENCES_COEF = 1.015;
var DEFAULT_SYLLABLES_PER_WORD_COEF = 84.6;
function fleschReadingEase(params) {
const { sentences, syllablesPerWord, coefficients } = params;
const {
base: baseCoef = DEFAULT_BASE_COEF,
sentences: sentencesCoef = DEFAULT_SENTENCES_COEF,
syllablesPerWord: syllablesPerWordCoef = DEFAULT_SYLLABLES_PER_WORD_COEF
} = coefficients ?? {};
return baseCoef - sentencesCoef * sentences - syllablesPerWordCoef * syllablesPerWord;
}
// src/formulas/gulpease-index.ts
var BASE_COEF5 = 89;
var SENTENCES_COEF3 = 300;
var CHARS_COEF = 10;
function gulpeaseIndex(params) {
const { sentences, chars, words } = params;
if (words === 0) {
return 0;
}
return (SENTENCES_COEF3 * sentences - CHARS_COEF * chars) / words + BASE_COEF5;
}
// src/formulas/gunning-fog.ts
function gunningFog(params) {
const { avgWordsPerSentence, difficultWords, totalWords } = params;
if (totalWords === 0) {
return 0;
}
const pdw = difficultWords / totalWords * 100;
return 0.4 * (avgWordsPerSentence + pdw);
}
// src/formulas/gutierrez-polini.ts
var BASE_COEF6 = 95.2;
var WORDS_PER_SENTENCES_COEF2 = -0.35;
var LETTERS_PER_WORDS_COEF = -9.7;
function gutierrezPolini(params) {
const { words, sentences, letters } = params;
if (words === 0 || sentences === 0) {
return 0;
}
const wordsPerSentences = words / sentences;
const lettersPerWords = letters / words;
return BASE_COEF6 + WORDS_PER_SENTENCES_COEF2 * wordsPerSentences + LETTERS_PER_WORDS_COEF * lettersPerWords;
}
// src/formulas/linsear-write-formula.ts
function linsearWriteFormula(params) {
const { sentences, syllablesPerWords } = params;
let easyWords = 0;
let difficultWords = 0;
for (const syllables of syllablesPerWords) {
if (syllables < 3) {
easyWords += 1;
} else {
difficultWords += 1;
}
}
if (sentences === 0) {
return 0;
}
const score = (easyWords + difficultWords * 3) / sentences;
if (score <= 20) {
return (score - 2) / 2;
}
return score / 2;
}
// src/formulas/lix.ts
function lix(params) {
const { words, longWords, wordsPerSentence } = params;
if (words === 0) {
return 0;
}
const ratio = longWords * 100 / words;
return wordsPerSentence + ratio;
}
// src/formulas/mcalpine-eflaw.ts
function mcalpineEflaw(params) {
const { words, sentences, miniWords } = params;
if (sentences === 0) {
return 0;
}
return (words + miniWords) / sentences;
}
// src/formulas/rix.ts
function rix(params) {
const { longWords, sentences } = params;
if (sentences === 0) {
return 0;
}
return longWords / sentences;
}
// src/formulas/smog-index.ts
var BASE_COEF7 = 3.1291;
var POLY_COEF = 1.043;
var POLY_MULT = 30;
function smogIndex(params) {
const { sentences, polysyllables } = params;
return POLY_COEF * Math.sqrt(polysyllables * POLY_MULT / sentences) + BASE_COEF7;
}
// src/formulas/szigriszt-pazos.ts
function szigrisztPazos(params) {
const { totalSyllables, totalWords, totalSentences, freBase } = params;
if (totalWords === 0 || totalSentences === 0) {
return 0;
}
return freBase - 62.3 * (totalSyllables / totalWords) - totalWords / totalSentences;
}
// src/formulas/wiener-sachtextformel.ts
var VARIANTS = {
1: {
ms: 0.1935,
sl: 0.1672,
iw: 0.1297,
es: -0.0327,
base: -0.875
},
2: {
ms: 0.2007,
sl: 0.1682,
iw: 0.1373,
es: 0,
base: -2.779
},
3: {
ms: 0.2963,
sl: 0.1905,
iw: 0,
es: 0,
base: -1.1144
},
4: {
ms: 0.2744,
sl: 0.2656,
iw: 0,
es: 0,
base: -1.693
}
};
function wienerSachtextformel(params) {
const { words, sentences, longWords, polysyllables, monosyllables, variant } = params;
const { ms, sl, iw, es, base } = VARIANTS[variant];
if (words === 0 || sentences === 0) {
return 0;
}
const msVal = 100 * polysyllables / words;
const slVal = words / sentences;
const iwVal = 100 * longWords / words;
const esVal = 100 * monosyllables / words;
return base + ms * msVal + sl * slVal + iw * iwVal + es * esVal;
}
// src/utils/config.ts
var langs = {
en: {
fre_base: 206.835,
fre_sentence_length: 1.015,
fre_syllables_per_word: 84.6
},
de: {
fre_base: 180,
fre_sentence_length: 1,
fre_syllables_per_word: 58.5
},
es: {
fre_base: 206.84,
fre_sentence_length: 1.02,
fre_syllables_per_word: 0.6
},
fr: {
fre_base: 207,
fre_sentence_length: 1.015,
fre_syllables_per_word: 73.6
},
it: {
fre_base: 217,
fre_sentence_length: 1.3,
fre_syllables_per_word: 0.6
},
nl: {
fre_base: 206.835,
fre_sentence_length: 0.93,
fre_syllables_per_word: 77
},
pl: {
fre_base: 0,
fre_sentence_length: 0,
fre_syllables_per_word: 0
},
ru: {
fre_base: 206.835,
fre_sentence_length: 1.3,
fre_syllables_per_word: 60.1
},
hu: {
fre_base: 206.835,
fre_sentence_length: 1.015,
fre_syllables_per_word: 58.5
}
};
// src/utils/utils.ts
function lruCache(cache, key, values, fn, enabled = true) {
const valuesStr = JSON.stringify(values);
const cacheKey = `${key}:${valuesStr}`;
if (enabled && cache.has(cacheKey)) {
return cache.get(cacheKey);
}
const result = fn(...values);
cache.set(cacheKey, result);
return result;
}
// src/index.ts
var WHITESPACE_RE = /\s+/;
var TextReadability = class {
constructor(props) {
this.cache = new LRUCache({ max: 512 });
this.lang = "en_US";
const { lang, cache } = props ?? {};
this.cacheEnabled = cache ?? true;
this.setLang(lang ?? this.lang);
}
getCfg(key) {
const lang = this.lang.split("_")[0];
const config = langs[lang];
if (!config) {
throw new Error(
`Language "${lang}" is not supported. Supported languages: ${Object.keys(langs).join(", ")}`
);
}
return config[key];
}
/**
* Set the language for the text statistics.
* @param lang
*/
setLang(lang) {
this.lang = lang;
this.textStats = new TextStats({ lang });
this.cache.clear();
}
/**
* Calculate the Flesch reading ease test for text.
* https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
* @param text
*/
fleschReadingEase(text) {
return lruCache(
this.cache,
"fleschReadingEase",
[text],
(text2) => this.computeFleschReadingEase(text2),
this.cacheEnabled
);
}
computeFleschReadingEase(text) {
if (!text) {
return 0;
}
if (this.lang === "pl") {
throw new Error(
"Flesch reading ease test does not support Polish language."
);
}
const sInterval = ["es", "it"].includes(this.lang) ? 100 : void 0;
const sentences = this.textStats.avgSentenceLength(text);
const syllablesPerWord = this.textStats.avgSyllablesPerWord(
text,
sInterval
);
return fleschReadingEase({
sentences,
syllablesPerWord,
coefficients: {
base: this.getCfg("fre_base"),
sentences: this.getCfg("fre_sentence_length"),
syllablesPerWord: this.getCfg("fre_syllables_per_word")
}
});
}
/**
* Calculate the Flesch-Kincaid grade level for text.
* https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level
* TODO: can we support multiple languages?
* @param text
*/
fleschKincaidGrade(text) {
return lruCache(
this.cache,
"fleschKincaidGrade",
[text],
(text2) => this.computeFleschKincaidGrade(text2),
this.cacheEnabled
);
}
computeFleschKincaidGrade(text) {
if (!text) {
return 0;
}
if (this.lang === "pl") {
throw new Error(
"Flesch-Kincaid grade level does not support Polish language."
);
}
const sentences = this.textStats.avgSentenceLength(text);
const syllablesPerWord = this.textStats.avgSyllablesPerWord(text);
return fleschKincaidGrade({ sentences, syllablesPerWord });
}
/**
* Calculate the SMOG index for text.
* https://en.wikipedia.org/wiki/SMOG
* @param text
*/
smogIndex(text) {
return lruCache(
this.cache,
"smogIndex",
[text],
(text2) => this.computeSmogIndex(text2),
this.cacheEnabled
);
}
computeSmogIndex(text) {
if (!text) {
return 0;
}
const sentences = this.textStats.sentenceCount(text);
if (sentences < 3) {
return 0;
}
const polysyllables = this.textStats.polysyllableCount(text);
return smogIndex({ sentences, polysyllables });
}
/**
* Calculate the Gunning Fog index for text.
* https://en.wikipedia.org/wiki/Gunning_fog_index
* @param text
*/
gunningFog(text) {
return lruCache(
this.cache,
"gunningFog",
[text],
(text2) => this.computeGunningFog(text2),
this.cacheEnabled
);
}
computeGunningFog(text) {
if (!text) {
return 0;
}
const avgWordsPerSentence = this.textStats.avgWordsPerSentence(text);
const difficultWords = this.textStats.polysyllableCount(text);
const totalWords = this.textStats.wordCount(text);
return gunningFog({ avgWordsPerSentence, difficultWords, totalWords });
}
/**
* Calculate the Coleman-Liau index for text.
* https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
* @param text
*/
colemanLiauIndex(text) {
return lruCache(
this.cache,
"colemanLiauIndex",
[text],
(text2) => this.computeColemanLiauIndex(text2),
this.cacheEnabled
);
}
computeColemanLiauIndex(text) {
if (!text) {
return 0;
}
const letters = this.textStats.avgLettersPerWord(text) * 100;
const sentences = this.textStats.avgSentencesPerWord(text) * 100;
return colemanLiauIndex({ letters, sentences });
}
/**
* Calculate the automated readability index for text.
* https://en.wikipedia.org/wiki/Automated_readability_index
* @param text
*/
automatedReadabilityIndex(text) {
return lruCache(
this.cache,
"automatedReadabilityIndex",
[text],
(text2) => this.computeAutomatedReadabilityIndex(text2),
this.cacheEnabled
);
}
computeAutomatedReadabilityIndex(text) {
if (!text) {
return 0;
}
const chars = this.textStats.charCount(text);
const words = this.textStats.wordCount(text);
const sentences = this.textStats.sentenceCount(text);
return automatedReadabilityIndex({ chars, words, sentences });
}
/**
* Calculate the Linsear Write formula for text.
* https://en.wikipedia.org/wiki/Linsear_Write
* @param text
* @param sample Number of words to sample from the text
*/
linsearWriteFormula(text, sample = 100) {
return lruCache(
this.cache,
"linsearWriteFormula",
[text, sample],
(text2, sample2) => this.computeLinsearWriteFormula(text2, sample2),
this.cacheEnabled
);
}
computeLinsearWriteFormula(text, sample) {
if (!text) {
return 0;
}
const words = text.split(WHITESPACE_RE).slice(0, sample).filter((word) => word);
const newText = words.join(" ");
const sentences = this.textStats.sentenceCount(newText);
const syllablesPerWords = words.map(
(word) => this.textStats.syllableCount(word)
);
return linsearWriteFormula({
sentences,
syllablesPerWords
});
}
/**
* Calculate Gutierrez Polini's readability formula for text (Spanish only).
* https://www.spanishreadability.com/gutierrez-de-polinis-readability-formula
* @param text
*/
gutierrezPolini(text) {
return lruCache(
this.cache,
"gutierrezPolini",
[text],
(text2) => this.computeGutierrezPolini(text2),
this.cacheEnabled
);
}
computeGutierrezPolini(text) {
if (this.lang !== "es") {
console.warn(`Gutierrez Polini's formula only supports Spanish language.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
const words = this.textStats.wordCount(text);
const sentences = this.textStats.sentenceCount(text);
const letters = this.textStats.letterCount(text);
return gutierrezPolini({ words, sentences, letters });
}
/**
* Calculate Crawford's formula for text (Spanish only).
* https://www.spanishreadability.com/the-crawford-score-for-spanish-texts
* @param text
*/
crawford(text) {
return lruCache(
this.cache,
"crawford",
[text],
(text2) => this.computeCrawford(text2),
this.cacheEnabled
);
}
computeCrawford(text) {
if (this.lang !== "es") {
console.warn(`Crawford's formula only supports Spanish language.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
const sentences = this.textStats.sentenceCount(text);
const words = this.textStats.wordCount(text);
const syllables = this.textStats.syllableCount(text);
return crawford({ words, sentences, syllables });
}
/**
* Calculate the Fernandez Huerta readability formula for text (Spanish only).
* https://www.spanishreadability.com/the-fernandez-huerta-readability-formula
* @param text
*/
fernandezHuerta(text) {
return lruCache(
this.cache,
"fernandezHuerta",
[text],
(text2) => this.computeFernandezHuerta(text2),
this.cacheEnabled
);
}
computeFernandezHuerta(text) {
if (this.lang !== "es") {
console.warn(`Fernandez Huerta's formula only supports Spanish language.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
const sentences = this.textStats.sentenceCount(text);
const words = this.textStats.wordCount(text);
const syllables = this.textStats.syllableCount(text);
return fernandezHuerta({ words, sentences, syllables });
}
/**
* Calculate the Szigriszt Pazos readability formula for text (Spanish only).
* @param text
*/
szigrisztPazos(text) {
return lruCache(
this.cache,
"szigrisztPazos",
[text],
(text2) => this.computeSzigrisztPazos(text2),
this.cacheEnabled
);
}
computeSzigrisztPazos(text) {
if (this.lang !== "es") {
console.warn(`Szigriszt Pazos is designed for Spanish text.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
const totalSyllables = this.textStats.syllableCount(text);
const totalWords = this.textStats.wordCount(text);
const totalSentences = this.textStats.sentenceCount(text);
const freBase = this.getCfg("fre_base");
return szigrisztPazos({
totalSyllables,
totalWords,
totalSentences,
freBase
});
}
/**
* Calculate the Gulpease index for text (Italian only).
* https://it.wikipedia.org/wiki/Indice_Gulpease
* @param text
*/
gulpeaseIndex(text) {
return lruCache(
this.cache,
"gulpeaseIndex",
[text],
(text2) => this.computeGulpeaseIndex(text2),
this.cacheEnabled
);
}
computeGulpeaseIndex(text) {
if (this.lang !== "it") {
console.warn(`Gulpease index only supports Italian language.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
return gulpeaseIndex({
words: this.textStats.wordCount(text),
sentences: this.textStats.sentenceCount(text),
chars: this.textStats.charCount(text)
});
}
/**
* Calculate the RIX ratio.
* https://readable.com/readability/lix-rix-readability-formulas/
* @param text
*/
rix(text) {
return lruCache(
this.cache,
"rix",
[text],
(text2) => this.computeRix(text2),
this.cacheEnabled
);
}
computeRix(text) {
if (!text) {
return 0;
}
return rix({
longWords: this.textStats.longWordCount(text),
sentences: this.textStats.sentenceCount(text)
});
}
/**
* Calculate the LIX ratio.
* https://readable.com/readability/lix-rix-readability-formulas/
* @param text
*/
lix(text) {
return lruCache(
this.cache,
"lix",
[text],
(text2) => this.computeLix(text2),
this.cacheEnabled
);
}
computeLix(text) {
if (!text) {
return 0;
}
return lix({
words: this.textStats.wordCount(text),
longWords: this.textStats.longWordCount(text),
wordsPerSentence: this.textStats.avgWordsPerSentence(text)
});
}
/**
* Calculate the Wiener Sachtextformel for text (german).
* https://de.wikipedia.org/wiki/Lesbarkeitsindex#Wiener_Sachtextformel
* @param text
* @param variant
*/
wienerSachtextformel(text, variant = 1) {
return lruCache(
this.cache,
"wienerSachtextformel",
[text, variant],
(text2, variant2) => this.computeWienerSachtextformel(text2, variant2),
this.cacheEnabled
);
}
computeWienerSachtextformel(text, variant) {
if (this.lang !== "de") {
console.warn(`Wiener Sachtextformel only supports German language.
Textstat language is set to '${this.lang}'.`);
}
if (!text) {
return 0;
}
return wienerSachtextformel({
words: this.textStats.wordCount(text),
sentences: this.textStats.sentenceCount(text),
longWords: this.textStats.longWordCount(text),
polysyllables: this.textStats.polysyllableCount(text),
monosyllables: this.textStats.monosyllableCount(text),
variant
});
}
/**
* Calculate the McAlpine EFLAW score for text.
* https://www.angelfire.com/nd/nirmaldasan/journalismonline/fpetge.html
* @param text
*/
mcalpineEflaw(text) {
return lruCache(
this.cache,
"mcalpineEflaw",
[text],
(text2) => this.computeMcalpineEflaw(text2),
this.cacheEnabled
);
}
computeMcalpineEflaw(text) {
if (!text) {
return 0;
}
const words = this.textStats.wordCount(text);
const sentences = this.textStats.sentenceCount(text);
const miniWords = this.textStats.miniWordCount(text);
return mcalpineEflaw({ words, sentences, miniWords });
}
};
export {
TextReadability,
VARIANTS,
automatedReadabilityIndex,
colemanLiauIndex,
crawford,
fernandezHuerta,
fleschKincaidGrade,
fleschReadingEase,
gulpeaseIndex,
gunningFog,
gutierrezPolini,
linsearWriteFormula,
lix,
mcalpineEflaw,
rix,
smogIndex,
szigrisztPazos,
wienerSachtextformel
};