oxford-text-checker
Version:
OxfordTextChecker wrapper - analyzes vocabulary in your text
669 lines • 29 kB
JavaScript
;
/**
* This code taken from https://www.oxfordlearnersdictionaries.com/text-checker/
*/
var __spreadArray = (this && this.__spreadArray) || function (to, from) {
for (var i = 0, il = from.length, j = to.length; i < il; i++, j++)
to[j] = from[i];
return to;
};
Object.defineProperty(exports, "__esModule", { value: true });
var _filter = require('lodash.filter');
var _forEach = require('lodash.foreach');
var _isEqual = require('lodash.isequal');
var _map = require('lodash.map');
var _pullAllWith = require('lodash.pullallwith');
var _sumBy = require('lodash.sumby');
var OUP3k = require('./OUP3k.json');
var Opal = require('./Opal.json');
var oShortForm = require('./short_form.json');
var resultData = require('./text').resultData;
var Main = /** @class */ (function () {
function Main(props) {
var _this = this;
this.includeCommasInNumbers = function (p_arr) {
var aFinalArr = [], i, nLen = p_arr.length;
for (i = 0; i < nLen; i++) {
if (p_arr[i] === ',') {
var nInd = aFinalArr.length, sPrev = nInd ? aFinalArr[nInd - 1] : undefined;
if (sPrev && p_arr[i + 1] && !isNaN(sPrev.split(',').join('') + p_arr[i + 1])) {
var sInteger = sPrev + ',' + p_arr[i + 1];
aFinalArr.pop();
aFinalArr.push(sInteger);
i++;
}
}
else {
aFinalArr.push(p_arr[i]);
}
}
return aFinalArr;
};
this.getShortFormIncludedArr = function (p_arr) {
var nLen = p_arr.length, i, aFinalArr = [];
for (i = 0; i < nLen; i++) {
var sAps = p_arr[i];
if (sAps === "'" || sAps === '’') {
if (p_arr[i - 1] && p_arr[i + 1]) {
var sTemp = p_arr[i - 1] + p_arr[i + 1], bApostrophe = p_arr[i + 1].toLowerCase() === 's';
if (_this.ignoreCase) {
sTemp = sTemp.toLowerCase();
}
if (oShortForm[sTemp] || bApostrophe) {
aFinalArr.pop();
aFinalArr.push(p_arr[i - 1] + "'" + p_arr[i + 1]);
++i;
}
else {
aFinalArr.push(p_arr[i]);
}
}
else {
aFinalArr.push(p_arr[i]);
}
}
else {
aFinalArr.push(p_arr[i]);
}
}
return aFinalArr;
};
this.getEmailnWebAddrIncluded = function (p_arr) {
var sTemp = '', i = 0, nStart = 0, regExp = /^[a-zA-Z0-9.\:!#$%&'*+\/=?^_`{|}~-]+[\.@][a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9\/])?)*$/, bSpace = false;
while (i < p_arr.length) {
sTemp += p_arr[i];
bSpace = p_arr[i] === ' ';
if (bSpace || i === p_arr.length - 1) {
sTemp = sTemp.trim();
var bFlag = regExp.test(sTemp);
if (bFlag) {
if (bSpace) {
p_arr.splice(nStart, i - nStart);
}
else {
p_arr.splice(nStart, i - nStart + 1);
}
p_arr.splice(nStart, 0, sTemp);
i = nStart + 2;
}
else {
i++;
}
sTemp = '';
nStart = i;
}
else {
i++;
}
}
return p_arr;
};
this.listOfwordsInogForm = function (p_wordarr) {
p_wordarr = p_wordarr.slice();
var aFinalArr = [], i, nLen = p_wordarr.length, aHyphen = ['‐', '‑', '‒', '–', '—', '―', '-'];
for (i = 0; i < nLen; i++) {
var sWord = p_wordarr[i];
if (sWord.indexOf("'") === 0 || sWord.indexOf('.') === 0) {
sWord = sWord.substr(1);
}
var nWordLen = sWord.length - 1;
if (sWord.indexOf("'") === nWordLen || sWord.indexOf('.') === nWordLen) {
sWord = sWord.substr(0, nWordLen);
}
if (aHyphen.indexOf(sWord) === -1) {
aFinalArr.push(sWord);
}
}
return aFinalArr;
};
this.wordsOfText = function (text) {
var words;
/* if(this.ignoreNumbers){
words = text.replace(/[0-9;!:—&\/\[\]]/g, ' ').replace(/\.\s+/g, ' ').replace(/[^a-zA-Z-\d\s&':_@#,]/g, '').replace(/,([^0-9])/g, ' $1').replace(/\s-/gim, ' ').replace(/\s-\s/gim, ' ').replace(/-\s/gim, ' ').match(/\S+/g);
}else{
words = text.replace(/[;!:—&\/\[\]]/g, ' ').replace(/\.\s+/g, ' ').replace(/[^a-zA-Z-\d\s&':_@#,$£₹%]/g, '').replace(/,([^0-9])/g, ' $1').replace(/\s-/gim, ' ').replace(/\s-\s/gim, ' ').replace(/-\s/gim, ' ').match(/\S+/g);
} */
/* */
// let aText = text.split(/[\n\s,]+/), i, nLen = aText.length, aFinal = [], /* commented for comma separated numbers */
var aText, i, nLen, aFinal = [], emailWebRegExp = /^[a-zA-Z0-9.\:!#$%&'*+\/=?^_`{|}~-]+[\.@][a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9\/])?)*$/;
/* */
if (_this.ignoreNumbers) {
aText = text.split(/[\n\s,]+/);
}
else {
aText = text.split(/[\n\s]+/);
var aFinalCom_1 = [];
aText.forEach(function (ele, ind) {
if (!isNaN(ele.split(',').join(''))) {
aFinalCom_1.push(ele);
}
else {
aFinalCom_1 = __spreadArray(__spreadArray([], aFinalCom_1), ele.split(','));
}
});
aText = aFinalCom_1;
}
nLen = aText.length;
/* */
for (i = 0; i < nLen; i++) {
var aMatch = aText[i].match(emailWebRegExp), aWords = void 0;
if (aMatch) {
aFinal.push(aText[i]);
}
else {
if (_this.ignoreNumbers) {
var bIsNum = /\d/.test(aText[i]);
if (!bIsNum) {
aWords = aText[i]
.replace(/[0-9;!:—&\/\[\]]/g, ' ')
.replace(/\.\s+/g, ' ')
.replace(/[^a-zA-Z-\d\s&':_@#,]/g, '')
.replace(/,([^0-9])/g, ' $1')
.replace(/\s-/gim, ' ')
.replace(/\s-\s/gim, ' ')
.replace(/-\s/gim, ' ')
.match(/\S+/g);
}
}
else {
aWords = aText[i]
.replace(/[;!:—&\/\[\]]/g, ' ')
.replace(/\.\s+/g, ' ')
.replace(/[^a-zA-Z-\d\s&':_@#,$£₹%]/g, '')
.replace(/,([^0-9])/g, ' $1')
.replace(/\s-/gim, ' ')
.replace(/\s-\s/gim, ' ')
.replace(/-\s/gim, ' ')
.match(/\S+/g);
}
if (aWords) {
aFinal = __spreadArray(__spreadArray([], aFinal), aWords);
}
}
}
words = aFinal;
/* */
words = words ? _this.listOfwordsInogForm(words) : [];
if (_this.aWordsToIgnore.length) {
var aTemp_1 = _this.aWordsToIgnore;
var _loop_1 = function (i_1) {
/* if (aTemp[i] && words.indexOf(aTemp[i]) > -1) {
let nInd = words.indexOf(aTemp[i]);
words.splice(nInd, 1);
} */
words = words.filter(function (ele) { return aTemp_1[i_1] !== ele; });
};
for (var i_1 in aTemp_1) {
_loop_1(i_1);
}
}
if (_this.ignoreNumbers) {
words = words.filter(function (ele) { return isNaN(parseFloat(ele)); });
}
return words;
};
this.findTextWithDelimiters = function (arr) {
_this.delimiterArr = [];
_this.wordDArr = _map(OUP3k['Words'], 'Word');
var i = 0;
var result;
while (i < arr.length) {
/* if (arr[i].indexOf("'") !== -1) {
let _tempWord = arr[i].split("'")[0];
result = this.searchStringInArray(this.wordDArr, _tempWord);
} else {
result = this.searchStringInArray(this.wordDArr, arr[i]);
} */
/* result = this.searchStringInArray(this.wordDArr, arr[i]); */
/* Added to accomodate apostrophe form */
var aWord = arr[i].split("'"), bApostropheForm = aWord[1] ? aWord[1].toLowerCase() === 's' : false, sShortForm = aWord[0] + aWord[1];
if (bApostropheForm && !oShortForm[sShortForm]) {
result = _this.searchStringInArray(_this.wordDArr, aWord[0]);
}
else {
result = _this.searchStringInArray(_this.wordDArr, arr[i]);
}
/* */
if (OUP3k['Words'][result] && OUP3k['Words'][result].title) {
OUP3k['Words'][result].title = _this.changeTitleOfToolTip(OUP3k['Words'][result]);
}
_this.delimiterArr.push({
position: OUP3k['Words'][result],
word: arr[i],
});
i++;
}
return _this.delimiterArr;
};
this.changeTitleOfToolTip = function (arr) {
if (!arr.usedOnce) {
arr.usedOnce = true;
}
else {
return arr.title;
}
var newArr = arr.title.split(/([=;\s])/);
var indexText = arr.title.indexOf('=');
if (indexText === -1) {
return arr.title.toUpperCase();
}
// Polyfill for fill method
if (!Array.prototype.fill) {
Object.defineProperty(Array.prototype, 'fill', {
value: function (value) {
// Steps 1-2.
if (this == null) {
throw new TypeError('this is null or not defined');
}
var O = Object(this);
// Steps 3-5.
var len = O.length >>> 0;
// Steps 6-7.
var start = arguments[1];
var relativeStart = start >> 0;
// Step 8.
var k = relativeStart < 0 ? Math.max(len + relativeStart, 0) : Math.min(relativeStart, len);
// Steps 9-10.
var end = arguments[2];
var relativeEnd = end === undefined ? len : end >> 0;
// Step 11.
var final = relativeEnd < 0 ? Math.max(len + relativeEnd, 0) : Math.min(relativeEnd, len);
// Step 12.
while (k < final) {
O[k] = value;
k++;
}
// Step 13.
return O;
},
});
}
var firstMatrix = new Array(indexText + 1).fill('');
var finalString;
var swap;
var final = false;
var secondMatrix = _map(newArr, equal);
function equal(n, i) {
if (n === '=') {
swap = true;
final = false;
finalString = '';
_forEach(newArr, function (word, index) {
if (index > i && swap && word === ';') {
swap = false;
final = true;
finalString += newArr[i] + newArr[i - 1].toUpperCase() + word + ' ';
firstMatrix[indexText] += finalString;
}
if (index > i && swap) {
finalString += newArr[index];
}
if (!final && index === newArr.length - 1) {
finalString += newArr[i] + newArr[i - 1].toUpperCase();
firstMatrix[indexText] += finalString;
}
});
}
return finalString;
}
return firstMatrix[firstMatrix.length - 1];
};
this.findTextWithDelimitersOpal = function (arr) {
_this.delimiterOpalArr = [];
_this.wordDArr = _map(Opal, 'Word');
var i = 0;
var result;
var position;
while (i < arr.length) {
/* if (arr[i].indexOf("'") !== -1) {
let _tempWord = arr[i].split("'")[0];
result = this.searchStringInArray(this.wordDArr, _tempWord);
} else {
result = this.searchStringInArray(this.wordDArr, arr[i]);
} */
result = _this.searchStringInArray(_this.wordDArr, arr[i]);
position = result !== -1 ? { Style: 'opal' } : undefined;
_this.delimiterOpalArr.push({ position: position, word: arr[i] });
i++;
}
return _this.delimiterOpalArr;
};
/* need to change */
this.findText = function () {
_this.positionArr = [];
_this.wordArr = _map(OUP3k['Words'], 'Word');
var i = 0;
var result;
while (i < _this.words.length) {
/* if (this.words[i].indexOf("'") !== -1) {
let _tempWord = this.words[i].split("'")[0];
result = this.searchStringInArray(this.wordArr, _tempWord);
} else {
result = this.searchStringInArray(this.wordArr, this.words[i]);
} */
/* result = this.searchStringInArray(this.wordArr, this.words[i]); */
/* Added to accomodate apostrophe form */
var aWord = _this.words[i].split("'"), bApostropheForm = aWord[1] ? aWord[1].toLowerCase() === 's' : false, sShortForm = aWord[0] + aWord[1];
if (bApostropheForm && !oShortForm[sShortForm]) {
result = _this.searchStringInArray(_this.wordArr, aWord[0]);
}
else {
result = _this.searchStringInArray(_this.wordArr, _this.words[i]);
}
/* */
_this.positionArr.push({
position: OUP3k['Words'][result],
word: _this.words[i],
});
i++;
}
return _this.positionArr;
};
this.searchStringInArray = function (strArray, str) {
if (_this.aWordsToIgnore.indexOf(str) === -1) {
for (var j = 0; j < strArray.length; j++) {
if ((_this.ignoreCase && strArray[j].toLowerCase() === str.toLowerCase()) || strArray[j] === str)
return j;
}
}
return -1;
};
this.findWordInDictionary = function (string) {
var oxfordWords = [];
_forEach(_this.positionArr, function (value) {
if (value.position !== undefined && value.position['Ox3K/5K'].search(string) > -1 && string !== 'ox5k_only') {
oxfordWords.push(value);
}
if (value.position !== undefined && string === 'ox5k_only' && _this.checkIfOnly5k(value.position['Ox3K/5K'])) {
oxfordWords.push(value);
}
});
return oxfordWords;
};
this.findCERFDictionary = function (type, string) {
var oxfordWords = [];
_forEach(_this.findWordInDictionary(type), function (value) {
if (value.position !== undefined && value.position['Style'].search(string) > -1) {
oxfordWords.push(value);
}
});
return oxfordWords;
};
this.text = props.text;
this.ignoreCase = props.ignoreCase;
this.ignoreNumbers = props.ignoreNumbers;
this.aWordsToIgnore = props.texttobeIgnored;
this.words = this.wordsOfText(this.text);
this.sentences = this.text.replace(/(\.+|\:|\!|\?)(\"*|\'*|\)*|}*|]*)(\s|\n|\r|\r\n)/gm, '|').split('|');
this.positionArr = [];
/* const tempArr = this.text.split(/([0-9"':,.!;_“”‘’`\\|)}{(~*&^%$?/><+=\][\s])/); */
/* */
var aTempArr;
if (this.ignoreNumbers) {
aTempArr = this.text.split(/([0-9"':,.!;_“”‘’`\\|)}{(~*&^%$?/><+=\][\s])/);
}
else {
aTempArr = this.text.split(/(["':,.!;_“”‘’`\\|)}{(~*&^?/><+=\][\s])/);
aTempArr = this.includeCommasInNumbers(aTempArr);
}
var tempArr = this.getShortFormIncludedArr(aTempArr);
tempArr = tempArr.filter(function (ele, ind) {
return ele !== '';
});
tempArr = this.getEmailnWebAddrIncluded(tempArr);
this.spaceWords = [];
this.wordsWithDelimiters = this.findTextWithDelimiters(tempArr);
this.wordsWithDelimitersOpal = this.findTextWithDelimitersOpal(tempArr);
_forEach(tempArr, function (key) {
_this.spaceWords.push({
word: key,
fontColor: 'black',
title: '',
toLowerCaseWords: key.toLowerCase(),
isToLowerCase: key.charCodeAt(0) < 97 ? true : false,
});
});
}
Main.prototype.checkIfOnly5k = function (oPos) {
if ((oPos.search('ox5k_only') > -1 || oPos.search('ox5k') > -1) && oPos.search('ox3k') < 0) {
return true;
}
return false;
};
Object.defineProperty(Main.prototype, "countOfCerfWords", {
get: function () {
var _this = this;
var tempArr = [];
var finalArr = [];
_forEach(Object.keys(resultData.blocks), function (key, index) {
_forEach(resultData.blocks[key], function (value) {
tempArr.push(_this.findCERFDictionary(key, value));
});
});
// let length = _sumBy(tempArr, 'length');
// length += this.OPALWordsCount.length;
// length += this.UncategorizedCount.length;
var length = this.words.length;
tempArr.push(this.OPALWordsCount, this.UncategorizedCount);
_forEach(tempArr, function (key, index) {
finalArr.push({
value: key,
percent: ((tempArr[index].length * 100) / length).toFixed(0),
});
});
return {
wordList: finalArr,
count: length,
};
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "countOfNonOpalWords", {
get: function () {
var _this = this;
var tempArr = [];
var finalArr = [];
var countVal = 0;
_forEach(Object.keys(resultData.blocks), function (key, index) {
_forEach(resultData.blocks[key], function (value) {
tempArr.push(_this.findCERFDictionary(key, value));
});
});
// let length = _sumBy(tempArr, 'length');
// length += this.OPALWordsCount.length;
// length += this.UncategorizedCount.length;
var length = this.words.length;
_forEach(tempArr, function (key, index) {
countVal += tempArr[index].length;
});
tempArr.push([]);
for (var i = 0; i < length - countVal; i += 1) {
tempArr[tempArr.length - 1].push([]);
}
_forEach(tempArr, function (key, index) {
finalArr.push({
value: key,
percent: ((tempArr[index].length * 100) / length).toFixed(0),
});
});
return {
wordList: finalArr,
count: length,
};
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "countOfOpalWords", {
get: function () {
var tempArr = [];
var finalArr = [];
// _forEach(Object.keys(resultData.blocks), (key, index) => {
// _forEach(resultData.blocks[key], (value) => {
// tempArr.push(this.findCERFDictionary(key, value));
// });
// });
// let length = _sumBy(tempArr, 'length');
// length += this.OPALWordsCount.length;
// length += this.UncategorizedCount.length;
var length = this.words.length;
tempArr.push(this.OPALWordsCount, this.UncategorizedCountOpal);
_forEach(tempArr, function (key, index) {
finalArr.push({
value: key,
percent: ((tempArr[index].length * 100) / length).toFixed(0),
});
});
return {
wordList: finalArr,
count: length,
};
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "wordCount", {
get: function () {
return this.words.length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "sentenceCount", {
get: function () {
return this.sentences.length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "avgSentenceLength", {
get: function () {
return this.words.length / this.sentences.length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "oxford3000Words", {
get: function () {
return Math.round((this.findWordInDictionary('ox3k').length / this.wordCount) * 100);
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "oxford5000Words", {
get: function () {
return Math.round((this.findWordInDictionary('ox5k').length / this.wordCount) * 100);
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "oxford3000WordsCount", {
get: function () {
return this.findWordInDictionary('ox3k').length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "oxford5000WordsCount", {
get: function () {
return this.findWordInDictionary('ox5k').length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "oxford5000OnlyWordsCount", {
get: function () {
return this.findWordInDictionary('ox5k_only').length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "OPALWords", {
get: function () {
this.OPALWordsList = [];
this.wordArr = _map(Opal, 'Word');
var i = 0;
var result;
var position;
while (i < this.words.length) {
result = this.searchStringInArray(this.wordArr, this.words[i]);
position = result !== -1 ? { Style: 'opal' } : undefined;
this.OPALWordsList.push({ position: position, word: this.words[i] });
i++;
}
return this.OPALWordsList;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "OPALWordsCount", {
get: function () {
var count = _filter(this.OPALWords, function (o) {
return o.position !== undefined;
});
return count;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "OPALWordsPercent", {
get: function () {
return Math.round((this.OPALWordsCount.length / this.wordCount) * 100);
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "UncategorizedCount", {
get: function () {
this.UncategorizedList = [];
this.UncategorizedList = _map(_filter(this.positionArr, ['position', undefined]), 'word');
var tempArr = _map(this.OPALWordsCount, 'word');
_pullAllWith(this.UncategorizedList, tempArr, _isEqual);
var newArr = [];
_forEach(this.UncategorizedList, function (value) {
if (['‐', '‑', '‒', '–', '—', '―', '-'].indexOf(value) === -1)
newArr.push({ word: value });
});
this.UncategorizedList = newArr;
return this.UncategorizedList;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "UncategorizedCountWOOpal", {
get: function () {
this.UncategorizedListWOOpal = [];
this.UncategorizedListWOOpal = _map(_filter(this.positionArr, ['position', undefined]), 'word');
// const tempArr = _map(this.OPALWordsCount, 'word');
// _pullAllWith(this.UncategorizedList, tempArr, _isEqual);
var newArr = [];
_forEach(this.UncategorizedListWOOpal, function (value) {
newArr.push({ word: value });
});
this.UncategorizedListWOOpal = newArr;
return this.UncategorizedListWOOpal;
},
enumerable: false,
configurable: true
});
Object.defineProperty(Main.prototype, "UncategorizedCountOpal", {
get: function () {
this.UncategorizedListOpal = [];
this.UncategorizedListOpal = _map(this.positionArr, 'word');
var tempArr = _map(this.OPALWordsCount, 'word');
_pullAllWith(this.UncategorizedListOpal, tempArr, _isEqual);
var newArr = [];
_forEach(this.UncategorizedListOpal, function (value) {
newArr.push({ word: value });
});
this.UncategorizedListOpal = newArr;
return this.UncategorizedListOpal;
},
enumerable: false,
configurable: true
});
return Main;
}());
exports.default = Main;
//# sourceMappingURL=Main.js.map