UNPKG

oxford-text-checker

Version:

OxfordTextChecker wrapper - analyzes vocabulary in your text

669 lines 29 kB
"use strict"; /** * This code taken from https://www.oxfordlearnersdictionaries.com/text-checker/ */ var __spreadArray = (this && this.__spreadArray) || function (to, from) { for (var i = 0, il = from.length, j = to.length; i < il; i++, j++) to[j] = from[i]; return to; }; Object.defineProperty(exports, "__esModule", { value: true }); var _filter = require('lodash.filter'); var _forEach = require('lodash.foreach'); var _isEqual = require('lodash.isequal'); var _map = require('lodash.map'); var _pullAllWith = require('lodash.pullallwith'); var _sumBy = require('lodash.sumby'); var OUP3k = require('./OUP3k.json'); var Opal = require('./Opal.json'); var oShortForm = require('./short_form.json'); var resultData = require('./text').resultData; var Main = /** @class */ (function () { function Main(props) { var _this = this; this.includeCommasInNumbers = function (p_arr) { var aFinalArr = [], i, nLen = p_arr.length; for (i = 0; i < nLen; i++) { if (p_arr[i] === ',') { var nInd = aFinalArr.length, sPrev = nInd ? aFinalArr[nInd - 1] : undefined; if (sPrev && p_arr[i + 1] && !isNaN(sPrev.split(',').join('') + p_arr[i + 1])) { var sInteger = sPrev + ',' + p_arr[i + 1]; aFinalArr.pop(); aFinalArr.push(sInteger); i++; } } else { aFinalArr.push(p_arr[i]); } } return aFinalArr; }; this.getShortFormIncludedArr = function (p_arr) { var nLen = p_arr.length, i, aFinalArr = []; for (i = 0; i < nLen; i++) { var sAps = p_arr[i]; if (sAps === "'" || sAps === '’') { if (p_arr[i - 1] && p_arr[i + 1]) { var sTemp = p_arr[i - 1] + p_arr[i + 1], bApostrophe = p_arr[i + 1].toLowerCase() === 's'; if (_this.ignoreCase) { sTemp = sTemp.toLowerCase(); } if (oShortForm[sTemp] || bApostrophe) { aFinalArr.pop(); aFinalArr.push(p_arr[i - 1] + "'" + p_arr[i + 1]); ++i; } else { aFinalArr.push(p_arr[i]); } } else { aFinalArr.push(p_arr[i]); } } else { aFinalArr.push(p_arr[i]); } } return aFinalArr; }; this.getEmailnWebAddrIncluded = function (p_arr) { var sTemp = '', i = 0, nStart = 0, regExp = /^[a-zA-Z0-9.\:!#$%&'*+\/=?^_`{|}~-]+[\.@][a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9\/])?)*$/, bSpace = false; while (i < p_arr.length) { sTemp += p_arr[i]; bSpace = p_arr[i] === ' '; if (bSpace || i === p_arr.length - 1) { sTemp = sTemp.trim(); var bFlag = regExp.test(sTemp); if (bFlag) { if (bSpace) { p_arr.splice(nStart, i - nStart); } else { p_arr.splice(nStart, i - nStart + 1); } p_arr.splice(nStart, 0, sTemp); i = nStart + 2; } else { i++; } sTemp = ''; nStart = i; } else { i++; } } return p_arr; }; this.listOfwordsInogForm = function (p_wordarr) { p_wordarr = p_wordarr.slice(); var aFinalArr = [], i, nLen = p_wordarr.length, aHyphen = ['‐', '‑', '‒', '–', '—', '―', '-']; for (i = 0; i < nLen; i++) { var sWord = p_wordarr[i]; if (sWord.indexOf("'") === 0 || sWord.indexOf('.') === 0) { sWord = sWord.substr(1); } var nWordLen = sWord.length - 1; if (sWord.indexOf("'") === nWordLen || sWord.indexOf('.') === nWordLen) { sWord = sWord.substr(0, nWordLen); } if (aHyphen.indexOf(sWord) === -1) { aFinalArr.push(sWord); } } return aFinalArr; }; this.wordsOfText = function (text) { var words; /* if(this.ignoreNumbers){ words = text.replace(/[0-9;!:—&\/\[\]]/g, ' ').replace(/\.\s+/g, ' ').replace(/[^a-zA-Z-\d\s&':_@#,]/g, '').replace(/,([^0-9])/g, ' $1').replace(/\s-/gim, ' ').replace(/\s-\s/gim, ' ').replace(/-\s/gim, ' ').match(/\S+/g); }else{ words = text.replace(/[;!:—&\/\[\]]/g, ' ').replace(/\.\s+/g, ' ').replace(/[^a-zA-Z-\d\s&':_@#,$£₹%]/g, '').replace(/,([^0-9])/g, ' $1').replace(/\s-/gim, ' ').replace(/\s-\s/gim, ' ').replace(/-\s/gim, ' ').match(/\S+/g); } */ /* */ // let aText = text.split(/[\n\s,]+/), i, nLen = aText.length, aFinal = [], /* commented for comma separated numbers */ var aText, i, nLen, aFinal = [], emailWebRegExp = /^[a-zA-Z0-9.\:!#$%&'*+\/=?^_`{|}~-]+[\.@][a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9\/])?)*$/; /* */ if (_this.ignoreNumbers) { aText = text.split(/[\n\s,]+/); } else { aText = text.split(/[\n\s]+/); var aFinalCom_1 = []; aText.forEach(function (ele, ind) { if (!isNaN(ele.split(',').join(''))) { aFinalCom_1.push(ele); } else { aFinalCom_1 = __spreadArray(__spreadArray([], aFinalCom_1), ele.split(',')); } }); aText = aFinalCom_1; } nLen = aText.length; /* */ for (i = 0; i < nLen; i++) { var aMatch = aText[i].match(emailWebRegExp), aWords = void 0; if (aMatch) { aFinal.push(aText[i]); } else { if (_this.ignoreNumbers) { var bIsNum = /\d/.test(aText[i]); if (!bIsNum) { aWords = aText[i] .replace(/[0-9;!:—&\/\[\]]/g, ' ') .replace(/\.\s+/g, ' ') .replace(/[^a-zA-Z-\d\s&':_@#,]/g, '') .replace(/,([^0-9])/g, ' $1') .replace(/\s-/gim, ' ') .replace(/\s-\s/gim, ' ') .replace(/-\s/gim, ' ') .match(/\S+/g); } } else { aWords = aText[i] .replace(/[;!:—&\/\[\]]/g, ' ') .replace(/\.\s+/g, ' ') .replace(/[^a-zA-Z-\d\s&':_@#,$£₹%]/g, '') .replace(/,([^0-9])/g, ' $1') .replace(/\s-/gim, ' ') .replace(/\s-\s/gim, ' ') .replace(/-\s/gim, ' ') .match(/\S+/g); } if (aWords) { aFinal = __spreadArray(__spreadArray([], aFinal), aWords); } } } words = aFinal; /* */ words = words ? _this.listOfwordsInogForm(words) : []; if (_this.aWordsToIgnore.length) { var aTemp_1 = _this.aWordsToIgnore; var _loop_1 = function (i_1) { /* if (aTemp[i] && words.indexOf(aTemp[i]) > -1) { let nInd = words.indexOf(aTemp[i]); words.splice(nInd, 1); } */ words = words.filter(function (ele) { return aTemp_1[i_1] !== ele; }); }; for (var i_1 in aTemp_1) { _loop_1(i_1); } } if (_this.ignoreNumbers) { words = words.filter(function (ele) { return isNaN(parseFloat(ele)); }); } return words; }; this.findTextWithDelimiters = function (arr) { _this.delimiterArr = []; _this.wordDArr = _map(OUP3k['Words'], 'Word'); var i = 0; var result; while (i < arr.length) { /* if (arr[i].indexOf("'") !== -1) { let _tempWord = arr[i].split("'")[0]; result = this.searchStringInArray(this.wordDArr, _tempWord); } else { result = this.searchStringInArray(this.wordDArr, arr[i]); } */ /* result = this.searchStringInArray(this.wordDArr, arr[i]); */ /* Added to accomodate apostrophe form */ var aWord = arr[i].split("'"), bApostropheForm = aWord[1] ? aWord[1].toLowerCase() === 's' : false, sShortForm = aWord[0] + aWord[1]; if (bApostropheForm && !oShortForm[sShortForm]) { result = _this.searchStringInArray(_this.wordDArr, aWord[0]); } else { result = _this.searchStringInArray(_this.wordDArr, arr[i]); } /* */ if (OUP3k['Words'][result] && OUP3k['Words'][result].title) { OUP3k['Words'][result].title = _this.changeTitleOfToolTip(OUP3k['Words'][result]); } _this.delimiterArr.push({ position: OUP3k['Words'][result], word: arr[i], }); i++; } return _this.delimiterArr; }; this.changeTitleOfToolTip = function (arr) { if (!arr.usedOnce) { arr.usedOnce = true; } else { return arr.title; } var newArr = arr.title.split(/([=;\s])/); var indexText = arr.title.indexOf('='); if (indexText === -1) { return arr.title.toUpperCase(); } // Polyfill for fill method if (!Array.prototype.fill) { Object.defineProperty(Array.prototype, 'fill', { value: function (value) { // Steps 1-2. if (this == null) { throw new TypeError('this is null or not defined'); } var O = Object(this); // Steps 3-5. var len = O.length >>> 0; // Steps 6-7. var start = arguments[1]; var relativeStart = start >> 0; // Step 8. var k = relativeStart < 0 ? Math.max(len + relativeStart, 0) : Math.min(relativeStart, len); // Steps 9-10. var end = arguments[2]; var relativeEnd = end === undefined ? len : end >> 0; // Step 11. var final = relativeEnd < 0 ? Math.max(len + relativeEnd, 0) : Math.min(relativeEnd, len); // Step 12. while (k < final) { O[k] = value; k++; } // Step 13. return O; }, }); } var firstMatrix = new Array(indexText + 1).fill(''); var finalString; var swap; var final = false; var secondMatrix = _map(newArr, equal); function equal(n, i) { if (n === '=') { swap = true; final = false; finalString = ''; _forEach(newArr, function (word, index) { if (index > i && swap && word === ';') { swap = false; final = true; finalString += newArr[i] + newArr[i - 1].toUpperCase() + word + ' '; firstMatrix[indexText] += finalString; } if (index > i && swap) { finalString += newArr[index]; } if (!final && index === newArr.length - 1) { finalString += newArr[i] + newArr[i - 1].toUpperCase(); firstMatrix[indexText] += finalString; } }); } return finalString; } return firstMatrix[firstMatrix.length - 1]; }; this.findTextWithDelimitersOpal = function (arr) { _this.delimiterOpalArr = []; _this.wordDArr = _map(Opal, 'Word'); var i = 0; var result; var position; while (i < arr.length) { /* if (arr[i].indexOf("'") !== -1) { let _tempWord = arr[i].split("'")[0]; result = this.searchStringInArray(this.wordDArr, _tempWord); } else { result = this.searchStringInArray(this.wordDArr, arr[i]); } */ result = _this.searchStringInArray(_this.wordDArr, arr[i]); position = result !== -1 ? { Style: 'opal' } : undefined; _this.delimiterOpalArr.push({ position: position, word: arr[i] }); i++; } return _this.delimiterOpalArr; }; /* need to change */ this.findText = function () { _this.positionArr = []; _this.wordArr = _map(OUP3k['Words'], 'Word'); var i = 0; var result; while (i < _this.words.length) { /* if (this.words[i].indexOf("'") !== -1) { let _tempWord = this.words[i].split("'")[0]; result = this.searchStringInArray(this.wordArr, _tempWord); } else { result = this.searchStringInArray(this.wordArr, this.words[i]); } */ /* result = this.searchStringInArray(this.wordArr, this.words[i]); */ /* Added to accomodate apostrophe form */ var aWord = _this.words[i].split("'"), bApostropheForm = aWord[1] ? aWord[1].toLowerCase() === 's' : false, sShortForm = aWord[0] + aWord[1]; if (bApostropheForm && !oShortForm[sShortForm]) { result = _this.searchStringInArray(_this.wordArr, aWord[0]); } else { result = _this.searchStringInArray(_this.wordArr, _this.words[i]); } /* */ _this.positionArr.push({ position: OUP3k['Words'][result], word: _this.words[i], }); i++; } return _this.positionArr; }; this.searchStringInArray = function (strArray, str) { if (_this.aWordsToIgnore.indexOf(str) === -1) { for (var j = 0; j < strArray.length; j++) { if ((_this.ignoreCase && strArray[j].toLowerCase() === str.toLowerCase()) || strArray[j] === str) return j; } } return -1; }; this.findWordInDictionary = function (string) { var oxfordWords = []; _forEach(_this.positionArr, function (value) { if (value.position !== undefined && value.position['Ox3K/5K'].search(string) > -1 && string !== 'ox5k_only') { oxfordWords.push(value); } if (value.position !== undefined && string === 'ox5k_only' && _this.checkIfOnly5k(value.position['Ox3K/5K'])) { oxfordWords.push(value); } }); return oxfordWords; }; this.findCERFDictionary = function (type, string) { var oxfordWords = []; _forEach(_this.findWordInDictionary(type), function (value) { if (value.position !== undefined && value.position['Style'].search(string) > -1) { oxfordWords.push(value); } }); return oxfordWords; }; this.text = props.text; this.ignoreCase = props.ignoreCase; this.ignoreNumbers = props.ignoreNumbers; this.aWordsToIgnore = props.texttobeIgnored; this.words = this.wordsOfText(this.text); this.sentences = this.text.replace(/(\.+|\:|\!|\?)(\"*|\'*|\)*|}*|]*)(\s|\n|\r|\r\n)/gm, '|').split('|'); this.positionArr = []; /* const tempArr = this.text.split(/([0-9"':,.!;_“”‘’`\\|)}{(~*&^%$?/><+=\][\s])/); */ /* */ var aTempArr; if (this.ignoreNumbers) { aTempArr = this.text.split(/([0-9"':,.!;_“”‘’`\\|)}{(~*&^%$?/><+=\][\s])/); } else { aTempArr = this.text.split(/(["':,.!;_“”‘’`\\|)}{(~*&^?/><+=\][\s])/); aTempArr = this.includeCommasInNumbers(aTempArr); } var tempArr = this.getShortFormIncludedArr(aTempArr); tempArr = tempArr.filter(function (ele, ind) { return ele !== ''; }); tempArr = this.getEmailnWebAddrIncluded(tempArr); this.spaceWords = []; this.wordsWithDelimiters = this.findTextWithDelimiters(tempArr); this.wordsWithDelimitersOpal = this.findTextWithDelimitersOpal(tempArr); _forEach(tempArr, function (key) { _this.spaceWords.push({ word: key, fontColor: 'black', title: '', toLowerCaseWords: key.toLowerCase(), isToLowerCase: key.charCodeAt(0) < 97 ? true : false, }); }); } Main.prototype.checkIfOnly5k = function (oPos) { if ((oPos.search('ox5k_only') > -1 || oPos.search('ox5k') > -1) && oPos.search('ox3k') < 0) { return true; } return false; }; Object.defineProperty(Main.prototype, "countOfCerfWords", { get: function () { var _this = this; var tempArr = []; var finalArr = []; _forEach(Object.keys(resultData.blocks), function (key, index) { _forEach(resultData.blocks[key], function (value) { tempArr.push(_this.findCERFDictionary(key, value)); }); }); // let length = _sumBy(tempArr, 'length'); // length += this.OPALWordsCount.length; // length += this.UncategorizedCount.length; var length = this.words.length; tempArr.push(this.OPALWordsCount, this.UncategorizedCount); _forEach(tempArr, function (key, index) { finalArr.push({ value: key, percent: ((tempArr[index].length * 100) / length).toFixed(0), }); }); return { wordList: finalArr, count: length, }; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "countOfNonOpalWords", { get: function () { var _this = this; var tempArr = []; var finalArr = []; var countVal = 0; _forEach(Object.keys(resultData.blocks), function (key, index) { _forEach(resultData.blocks[key], function (value) { tempArr.push(_this.findCERFDictionary(key, value)); }); }); // let length = _sumBy(tempArr, 'length'); // length += this.OPALWordsCount.length; // length += this.UncategorizedCount.length; var length = this.words.length; _forEach(tempArr, function (key, index) { countVal += tempArr[index].length; }); tempArr.push([]); for (var i = 0; i < length - countVal; i += 1) { tempArr[tempArr.length - 1].push([]); } _forEach(tempArr, function (key, index) { finalArr.push({ value: key, percent: ((tempArr[index].length * 100) / length).toFixed(0), }); }); return { wordList: finalArr, count: length, }; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "countOfOpalWords", { get: function () { var tempArr = []; var finalArr = []; // _forEach(Object.keys(resultData.blocks), (key, index) => { // _forEach(resultData.blocks[key], (value) => { // tempArr.push(this.findCERFDictionary(key, value)); // }); // }); // let length = _sumBy(tempArr, 'length'); // length += this.OPALWordsCount.length; // length += this.UncategorizedCount.length; var length = this.words.length; tempArr.push(this.OPALWordsCount, this.UncategorizedCountOpal); _forEach(tempArr, function (key, index) { finalArr.push({ value: key, percent: ((tempArr[index].length * 100) / length).toFixed(0), }); }); return { wordList: finalArr, count: length, }; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "wordCount", { get: function () { return this.words.length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "sentenceCount", { get: function () { return this.sentences.length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "avgSentenceLength", { get: function () { return this.words.length / this.sentences.length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "oxford3000Words", { get: function () { return Math.round((this.findWordInDictionary('ox3k').length / this.wordCount) * 100); }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "oxford5000Words", { get: function () { return Math.round((this.findWordInDictionary('ox5k').length / this.wordCount) * 100); }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "oxford3000WordsCount", { get: function () { return this.findWordInDictionary('ox3k').length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "oxford5000WordsCount", { get: function () { return this.findWordInDictionary('ox5k').length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "oxford5000OnlyWordsCount", { get: function () { return this.findWordInDictionary('ox5k_only').length; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "OPALWords", { get: function () { this.OPALWordsList = []; this.wordArr = _map(Opal, 'Word'); var i = 0; var result; var position; while (i < this.words.length) { result = this.searchStringInArray(this.wordArr, this.words[i]); position = result !== -1 ? { Style: 'opal' } : undefined; this.OPALWordsList.push({ position: position, word: this.words[i] }); i++; } return this.OPALWordsList; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "OPALWordsCount", { get: function () { var count = _filter(this.OPALWords, function (o) { return o.position !== undefined; }); return count; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "OPALWordsPercent", { get: function () { return Math.round((this.OPALWordsCount.length / this.wordCount) * 100); }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "UncategorizedCount", { get: function () { this.UncategorizedList = []; this.UncategorizedList = _map(_filter(this.positionArr, ['position', undefined]), 'word'); var tempArr = _map(this.OPALWordsCount, 'word'); _pullAllWith(this.UncategorizedList, tempArr, _isEqual); var newArr = []; _forEach(this.UncategorizedList, function (value) { if (['‐', '‑', '‒', '–', '—', '―', '-'].indexOf(value) === -1) newArr.push({ word: value }); }); this.UncategorizedList = newArr; return this.UncategorizedList; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "UncategorizedCountWOOpal", { get: function () { this.UncategorizedListWOOpal = []; this.UncategorizedListWOOpal = _map(_filter(this.positionArr, ['position', undefined]), 'word'); // const tempArr = _map(this.OPALWordsCount, 'word'); // _pullAllWith(this.UncategorizedList, tempArr, _isEqual); var newArr = []; _forEach(this.UncategorizedListWOOpal, function (value) { newArr.push({ word: value }); }); this.UncategorizedListWOOpal = newArr; return this.UncategorizedListWOOpal; }, enumerable: false, configurable: true }); Object.defineProperty(Main.prototype, "UncategorizedCountOpal", { get: function () { this.UncategorizedListOpal = []; this.UncategorizedListOpal = _map(this.positionArr, 'word'); var tempArr = _map(this.OPALWordsCount, 'word'); _pullAllWith(this.UncategorizedListOpal, tempArr, _isEqual); var newArr = []; _forEach(this.UncategorizedListOpal, function (value) { newArr.push({ word: value }); }); this.UncategorizedListOpal = newArr; return this.UncategorizedListOpal; }, enumerable: false, configurable: true }); return Main; }()); exports.default = Main; //# sourceMappingURL=Main.js.map