UNPKG

@cloudcannon/suite

Version:

A suite of gulp tools to manage static sites on CloudCannon

41 lines (35 loc) 1.4 kB
const unicodeData = require("unicharadata"); module.exports = function Chunk(word, POS /* part of speech */, label, dependency /* syntax info */) { this.word = word; this.POS = POS; this.label = label; this.dependency = dependency; this.isSpace = function() { return this.POS === "SPACE" || this.word === " "; } this.isPunctuation = function () { return this.word.length === 1 && unicodeData.category(this.word)[0] === 'P'; } this.isOpenPunctuation = function () { // check if this is an opening bracket or quote mark return this.isPunctuation() && ["Ps", "Pi"].includes(unicodeData.category(this.word)); } this.hasCJK = function() { /* Check if the word has CJK characters. */ let cjkCodepointRanges = [ [4352, 4607], [11904, 42191], [43072, 43135], [44032, 55215], [63744, 64255], [65072, 65103], [65381, 65500], [131072, 196607] ]; for (let i = 0; i < [...this.word].length; i++) { let character = [...this.word][i]; for (let j = 0; j < cjkCodepointRanges.length; j++) { let range = cjkCodepointRanges[j]; if (range[0] <= character.charCodeAt(0) && character.charCodeAt(0) <= range[1]) { return true; } } } return false; } }