UNPKG

@anthonyjclark/proofreader

Version:

Simple text proofreader based on 'write-good' (hemingway-app-like suggestions) and 'nodehun' (spelling).

138 lines (117 loc) 3.81 kB
var cheerio = require('cheerio'); var fs = require('fs'); var path = require('path'); var writeGood = require('write-good'); var nodehun = require('nodehun'); var spellcheck = require('nodehun-sentences'); module.exports = Proofreader; function Proofreader() { /** * @type nodehun * @private */ this._dictionary = null; this._whitelist = ''; this._blacklist = ''; this._writeGoodSettings = null; } /** * Adds a dictionary. First dictionary has to provide both dic (words) and aff (grammar) filepaths, all subsequent ones are * expected to provide only dic filepath. * * @param {String} dicFilePath * @param {String} affFilePath */ Proofreader.prototype.addDictionary = function (dicFilePath, affFilePath) { if (!this._dictionary) { this._dictionary = new nodehun(fs.readFileSync(affFilePath), fs.readFileSync(dicFilePath)); } else { this._dictionary.addDictionary(fs.readFileSync(dicFilePath)); } }; /** * Sets a selector with whitelisted elements * @param {String} value */ Proofreader.prototype.setWhitelist = function (value) { if (typeof value !== 'string') { throw new Error('Whitelist must be a string.'); } this._whitelist = value; }; /** * Sets a selector with blacklisted elements * @param {String} value */ Proofreader.prototype.setBlacklist = function (value) { if (typeof value !== 'string') { throw new Error('Blacklist must be a string.'); } this._blacklist = value; }; /** * Sets write-good settings object. * @see https://github.com/btford/write-good#checks * @param {Object} settings */ Proofreader.prototype.setWriteGoodSettings = function (settings) { if (settings !== undefined && typeof settings !== 'object') { throw new Error('Blacklist must be a string.'); } this._writeGoodSettings = settings; }; /** * Returns an array of writeGood and spelling suggestions (via promise) for provided HTML string * @param {String} html * @returns {Promise} */ Proofreader.prototype.proofread = function (html) { // Configure the proofreader (a bit of a hack from the original source) this.setWhitelist("p, li, h1, h2, h3, h4, th, td, dl, figcaption"); this.setBlacklist("pre, code"); this.addDictionary( path.join(__dirname, '../dictionaries/en_US.dic'), path.join(__dirname, '../dictionaries/en_US.aff')); var $ = cheerio.load(html); var dictionary = this._dictionary; var whitelist = this._whitelist; var blacklist = this._blacklist; var writeGoodSettings = this._writeGoodSettings; return new Promise(function (resolve, reject) { var suggestions = []; var promises = []; //Blacklisted elements are removed before text is processed if (blacklist) { $(blacklist).remove(); } //Only whitelisted elements are processed $(whitelist).each(function () { var text = $(this).text(); //remove linebreaks from text text = text.replace(/(\r\n|\n|\r)+/gm, " "); //replace ’ with ' text = text.replace(/’/g, "'"); //replace multiple spaces with a single one text = text.replace(/\s{2,}/g, ' '); //trim text text = text.trim(); if (text.length) { var writeGoodSuggestions = writeGood(text, writeGoodSettings); var spellcheckResolve = null; var promise = new Promise(function (r) { spellcheckResolve = r; }); promises.push(promise); spellcheck(dictionary, text, function (error, spellcheckerSuggestions) { suggestions.push({ text: text, suggestions: { writeGood: writeGoodSuggestions, spelling: spellcheckerSuggestions || [] } }); spellcheckResolve(); }); } }); Promise.all(promises).then(function() {resolve(suggestions);}) }); };