@sutton-signwriting/core
Version:
a javascript package for node and browsers that supports general processing of the Sutton SignWriting script
1,276 lines (1,224 loc) âĒ 132 kB
JavaScript
/**
* Sutton SignWriting Core Module v2.0.0 (https://github.com/sutton-signwriting/core)
* Author: Steve Slevinski (https://SteveSlevinski.me)
* core.js is released under the MIT License.
*/
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory((global.ssw = global.ssw || {}, global.ssw.core = {})));
})(this, (function (exports) { 'use strict';
/**
* Object of regular expressions for FSW strings
*
* @alias fsw.re
* @property {string} null - the null symbol
* @property {string} symbol - a symbol
* @property {string} nullorsymbol - null or a symbol
* @property {string} sort - the sorting marker
* @property {string} prefix - a sorting marker followed by one or more symbols with nulls
* @property {string} box - a signbox marker
* @property {string} coord - a coordinate
* @property {string} spatial - a symbol followed by a coordinate
* @property {string} signbox - a signbox marker, max coordinate and zero or more spatial symbols
* @property {string} sign - an optional prefix followed by a signbox
* @property {string} sortable - a mandatory prefix followed by a signbox
*/
let re$4 = {
'null': 'S00000',
'symbol': 'S[123][0-9a-f]{2}[0-5][0-9a-f]',
'coord': '[0-9]{3}x[0-9]{3}',
'sort': 'A',
'box': '[BLMR]'
};
re$4.nullorsymbol = `(?:${re$4.null}|${re$4.symbol})`;
re$4.prefix = `(?:${re$4.sort}${re$4.nullorsymbol}+)`;
re$4.spatial = `${re$4.symbol}${re$4.coord}`;
re$4.signbox = `${re$4.box}${re$4.coord}(?:${re$4.spatial})*`;
re$4.sign = `${re$4.prefix}?${re$4.signbox}`;
re$4.sortable = `${re$4.prefix}${re$4.signbox}`;
/**
* Object of regular expressions for style strings
*
* @alias style.re
* @type {object}
* @property {string} colorize - regular expression for colorize section
* @property {string} colorhex - regular expression for color hex values with 3 or 6 characters
* @property {string} colorname - regular expression for css color name
* @property {string} padding - regular expression for padding section
* @property {string} zoom - regular expression for zoom section
* @property {string} classbase - regular expression for class name definition
* @property {string} id - regular expression for id definition
* @property {string} colorbase - regular expression for color hex or color name
* @property {string} color - regular expression for single color entry
* @property {string} colors - regular expression for double color entry
* @property {string} background - regular expression for background section
* @property {string} detail - regular expression for color details for line and optional fill
* @property {string} detailsym - regular expression for color details for individual symbols
* @property {string} classes - regular expression for one or more class names
* @property {string} full - full regular expression for style string
*/
let re$3 = {
'colorize': 'C',
'colorhex': '(?:[0-9a-fA-F]{3}){1,2}',
'colorname': '[a-zA-Z]+',
'padding': 'P[0-9]{2}',
'zoom': 'Z(?:[0-9]+(?:\\.[0-9]+)?|x)',
'classbase': '-?[_a-zA-Z][_a-zA-Z0-9-]{0,100}',
'id': '[a-zA-Z][_a-zA-Z0-9-]{0,100}'
};
re$3.colorbase = `(?:${re$3.colorhex}|${re$3.colorname})`;
re$3.color = `_${re$3.colorbase}_`;
re$3.colors = `_${re$3.colorbase}(?:,${re$3.colorbase})?_`;
re$3.background = `G${re$3.color}`;
re$3.detail = `D${re$3.colors}`;
re$3.detailsym = `D[0-9]{2}${re$3.colors}`;
re$3.classes = `${re$3.classbase}(?: ${re$3.classbase})*`;
re$3.full = `-(${re$3.colorize})?(${re$3.padding})?(${re$3.background})?(${re$3.detail})?(${re$3.zoom})?(?:-((?:${re$3.detailsym})*))?(?:-(${re$3.classes})?!(?:(${re$3.id})!)?)?`;
const prefixColor = color => {
const regex = new RegExp(`^${re$3.colorhex}$`);
return (regex.test(color) ? '#' : '') + color;
};
const definedProps = obj => Object.fromEntries(Object.entries(obj).filter(([k, v]) => v !== undefined));
/**
* Function to parse style string to object
* @function style.parse
* @param {string} styleString - a style string
* @returns {StyleObject} elements of style string
* @example
* style.parse('-CP10G_blue_D_red,Cyan_')
*
* return {
* 'colorize': true,
* 'padding': 10,
* 'background': 'blue',
* 'detail': ['red', 'Cyan']
* }
*/
const parse$4 = styleString => {
const regex = `^${re$3.full}`;
const m = (typeof styleString === 'string' ? styleString.match(new RegExp(regex)) : []) || [];
return definedProps({
'colorize': !m[1] ? undefined : !!m[1],
'padding': !m[2] ? undefined : parseInt(m[2].slice(1)),
'background': !m[3] ? undefined : prefixColor(m[3].slice(2, -1)),
'detail': !m[4] ? undefined : m[4].slice(2, -1).split(',').map(prefixColor),
'zoom': !m[5] ? undefined : m[5] === 'Zx' ? 'x' : parseFloat(m[5].slice(1)),
'detailsym': !m[6] ? undefined : m[6].match(new RegExp(re$3.detailsym, 'g')).map(val => {
const parts = val.split('_');
const detail = parts[1].split(',').map(prefixColor);
return {
'index': parseInt(parts[0].slice(1)),
'detail': detail
};
}),
'classes': !m[7] ? undefined : m[7],
'id': !m[8] ? undefined : m[8]
});
};
/**
* Function to compose style string from object
* @function style.compose
* @param {StyleObject} styleObject - an object of style options
* @returns {string} style string
* @example
* style.compose({
* 'colorize': true,
* 'padding': 10,
* 'background': 'blue',
* 'detail': ['red', 'Cyan'],
* 'zoom': 1.1,
* 'detailsym': [
* {
* 'index': 1,
* 'detail': ['#ff00ff']
* },
* {
* 'index': 2,
* 'detail': ['yellow', 'green']
* }
* ],
* 'classes': 'primary blinking',
* 'id': 'cursor'
* })
*
* return '-CP10G_blue_D_red,Cyan_Z1.1-D01_ff00ff_D02_yellow,green_-primary blinking!cursor!'
*/
const compose$4 = styleObject => {
if (typeof styleObject !== 'object' || styleObject === null) return undefined;
// three sections
let style1 = '-';
style1 += !styleObject.colorize ? '' : 'C';
const padding = parseInt(styleObject.padding);
style1 += !padding || padding <= 0 || padding > 99 ? '' : 'P' + (padding > 9 ? padding : '0' + padding);
const background = !styleObject.background || !(typeof styleObject.background === 'string') ? undefined : styleObject.background.match(re$3.colorbase)[0];
style1 += !background ? '' : 'G_' + background + '_';
const detail1 = !styleObject.detail || !styleObject.detail[0] || !(typeof styleObject.detail[0] === 'string') ? undefined : styleObject.detail[0].match(re$3.colorbase)[0];
const detail2 = !styleObject.detail || !styleObject.detail[1] || !(typeof styleObject.detail[1] === 'string') ? undefined : styleObject.detail[1].match(re$3.colorbase)[0];
if (detail1) {
style1 += 'D_' + detail1;
if (detail2) {
style1 += ',' + detail2;
}
style1 += '_';
}
const zoom = styleObject.zoom === 'x' ? 'x' : parseFloat(styleObject.zoom);
style1 += !zoom || zoom <= 0 ? '' : 'Z' + zoom;
let style2 = '';
const detailsym = !styleObject.detailsym || !Array.isArray(styleObject.detailsym) ? [] : styleObject.detailsym.map(styleObject => {
const index = parseInt(styleObject.index);
if (!index || index <= 0 || index > 99) return '';
let style = 'D' + (index > 9 ? index : '0' + index);
const detail1 = !styleObject.detail || !styleObject.detail[0] ? undefined : styleObject.detail[0].match(re$3.colorbase)[0];
const detail2 = !styleObject.detail || !styleObject.detail[1] ? undefined : styleObject.detail[1].match(re$3.colorbase)[0];
if (detail1) {
style += '_' + detail1;
if (detail2) {
style += ',' + detail2;
}
style += '_';
}
return style;
});
style2 += detailsym.join('');
let style3 = '';
const classes = !styleObject.classes || !(typeof styleObject.classes === 'string') ? undefined : styleObject.classes.match(re$3.classes)[0];
style3 += !classes ? '' : classes;
const id = !styleObject.id || !(typeof styleObject.id === 'string') ? undefined : styleObject.id.match(re$3.id)[0];
style3 += classes || id ? '!' : '';
style3 += !id ? '' : id + '!';
return style1 + (style2 || style3 ? '-' + style2 : '') + (style3 ? '-' + style3 : '');
};
/**
* Function to merge style objects
* @function style.merge
* @param {StyleObject} style1 - a style object
* @param {StyleObject} style2 - a style object
* @returns {StyleObject} a style object
* @example
* style.merge({'colorize': true},{zoom:2})
*
* return {
* 'colorize': true,
* 'zoom': 2
* }
*/
const merge = (style1, style2) => {
if (typeof style1 !== 'object') style1 = {};
if (typeof style2 !== 'object') style2 = {};
const zoom1 = 'zoom' in style1 ? style1['zoom'] : 1;
const zoom2 = 'zoom' in style2 ? style2['zoom'] : 1;
return {
...style1,
...style2,
...{
zoom: zoom1 * zoom2
}
};
};
const rgb2arr = rgb => {
if (typeof rgb !== 'string') return [0, 0, 0];
return rgb.replace(/rgba?\((.+?)\)/ig, (_, values) => {
return values;
}).split(',').map(Number);
};
const arr2hex = arr => {
return arr.slice(0, 3).map(num => num.toString(16).padStart(2, '0')).join('');
};
/**
* Function to convert rgb color to hex or "transparent" if below tolerance
* @function style.rgb2hex
* @param {string} rgb - an rgb color
* @param {number} [tolerance=0] - max alpha for full transparency
* @returns {string} a hex color or "transparent"
* @example
* style.rgb2hex("rgb(255,255,255)")
* return "ffffff"
*
* style.rgb2hex("rgba(255,255,255,0.5)",0.5)
* return "transparent"
*/
const rgb2hex = (rgb, tolerance = 0) => {
const arr = rgb2arr(rgb);
if (arr.length == 4 && arr[3] <= tolerance) {
return 'transparent';
} else {
return arr2hex(arr);
}
};
/**
* Function to merge color with background based on alpha transparency
* @function style.rgba2hex
* @param {string} color - an rgba color
* @param {string} background - an rgba background color
* @returns {string} a hex color or "transparent"
* @example
* style.rgba2hex("rgba(255,255,255,0.5)","rgb(0,0,0)")
*
* return "7f7f7f"
*/
const rgba2hex = (color, background) => {
const bArr = rgb2arr(background);
const cArr = rgb2arr(color);
const alpha = cArr.length == 4 ? cArr[3] : 1;
if (alpha == 0) {
return 'transparent';
} else {
return arr2hex(cArr.map((v, i) => parseInt((1 - alpha) * bArr[i] + alpha * v)));
}
};
/** The style module contains regular expressions and functions for parsing and composing style strings.
* [Style string definition](https://tools.ietf.org/id/draft-slevinski-formal-signwriting-09.html#name-styling-string)
* @module style
*/
var index$5 = /*#__PURE__*/Object.freeze({
__proto__: null,
re: re$3,
parse: parse$4,
compose: compose$4,
merge: merge,
rgb2hex: rgb2hex,
rgba2hex: rgba2hex
});
/**
* Object of regular expressions for SWU strings in UTF-16
*
* @alias swu.re
* @property {string} null - the null symbol
* @property {string} symbol - a symbol
* @property {string} nullorsymbol - null or a symbol
* @property {string} sort - the sorting marker
* @property {string} prefix - a sorting marker followed by one or more symbols with nulls
* @property {string} box - a signbox marker
* @property {string} coord - a coordinate
* @property {string} spatial - a symbol followed by a coordinate
* @property {string} signbox - a signbox marker, max coordinate and zero or more spatial symbols
* @property {string} sign - an optional prefix followed by a signbox
* @property {string} sortable - a mandatory prefix followed by a signbox
*/
let re$2 = {
'null': '\uD8C0\uDC00',
'symbol': '(?:(?:\uD8C0[\uDC01-\uDFFF])|(?:[\uD8C1-\uD8FC][\uDC00-\uDFFF])|(?:\uD8FD[\uDC00-\uDC80]))',
'coord': '(?:\uD836[\uDC0C-\uDDFF]){2}',
'sort': '\uD836\uDC00',
'box': '\uD836[\uDC01-\uDC04]'
};
re$2.nullorsymbol = `(?:${re$2.null}|${re$2.symbol})`;
re$2.prefix = `(?:${re$2.sort}(?:${re$2.nullorsymbol})+)`;
re$2.spatial = `${re$2.symbol}${re$2.coord}`;
re$2.signbox = `${re$2.box}${re$2.coord}(?:${re$2.spatial})*`;
re$2.sign = `${re$2.prefix}?${re$2.signbox}`;
re$2.sortable = `${re$2.prefix}${re$2.signbox}`;
/**
* An array of symbol IDs in minimized format such as "101011"
*
* @alias convert.symidArr
* @type {string[]}
*/
const symidArr = ["101011", "101021", "101031", "101041", "101051", "101061", "101071", "101081", "101091", "101101", "101111", "101121", "101131", "101141", "102011", "102021", "102031", "102041", "102051", "102061", "102071", "102081", "102091", "102101", "102111", "102121", "102131", "102141", "102151", "102161", "103011", "103021", "103031", "103041", "103051", "103061", "103071", "103081", "103091", "103101", "103111", "103121", "103131", "103141", "103151", "103161", "103171", "103181", "103191", "103201", "103211", "103221", "103231", "103241", "103251", "103261", "103271", "103281", "103291", "103301", "103311", "103321", "103331", "103341", "103351", "103361", "103371", "103381", "104011", "104021", "104031", "104041", "104051", "104061", "104071", "104081", "105011", "105021", "105031", "105041", "105051", "105061", "105071", "105081", "105091", "105101", "105111", "105121", "105131", "105141", "105151", "105161", "105171", "105181", "105191", "105201", "105211", "105221", "105231", "105241", "105251", "105261", "105271", "105281", "105291", "105301", "105311", "105321", "105331", "105341", "105351", "105361", "105371", "105381", "105391", "105401", "105411", "105421", "105431", "105441", "105451", "105461", "105471", "105481", "105491", "105501", "105511", "105521", "105531", "105541", "105551", "105561", "105571", "105581", "106011", "106021", "106031", "106041", "106051", "106061", "106071", "106081", "106091", "106101", "106111", "106121", "106131", "106141", "106151", "106161", "106171", "106181", "106191", "106201", "106211", "106221", "106231", "106241", "106251", "106261", "106271", "106281", "106291", "106301", "107011", "107021", "107031", "107041", "107051", "107061", "107071", "107081", "107091", "107101", "107111", "107121", "107131", "107141", "107151", "107161", "107171", "107181", "107191", "107201", "107211", "107221", "108011", "108021", "108031", "108041", "108051", "108061", "108071", "108081", "108091", "108101", "108111", "108121", "108131", "108141", "108151", "108161", "108171", "108181", "108191", "109011", "109021", "109031", "109041", "109051", "109061", "109071", "109081", "109091", "109101", "109111", "109121", "109131", "109141", "109151", "109161", "109171", "109181", "109191", "109201", "109211", "109221", "109231", "109241", "109251", "109261", "109271", "109281", "109291", "109301", "109311", "109321", "109331", "109341", "109351", "109361", "109371", "109381", "109391", "109401", "110011", "110021", "110031", "110041", "110051", "110061", "110071", "110081", "110091", "110101", "110111", "110121", "110131", "110141", "110151", "110161", "201011", "201021", "201031", "201041", "201051", "201061", "201071", "201081", "201091", "201101", "201111", "201121", "201131", "201141", "201151", "201161", "201171", "202011", "202012", "202021", "202022", "202031", "202041", "202042", "202051", "202052", "202061", "202071", "202081", "202082", "202091", "202092", "202101", "202102", "202111", "202121", "202131", "203011", "203012", "203013", "203014", "203015", "203021", "203022", "203031", "203032", "203041", "203051", "203052", "203061", "203062", "203071", "203072", "203073", "203081", "203082", "203083", "203084", "203091", "203092", "203093", "203101", "203102", "203103", "203111", "203112", "203113", "203121", "203122", "203123", "203131", "203141", "203151", "203161", "203171", "203181", "203191", "203201", "203202", "203203", "204011", "204012", "204013", "204014", "204021", "204022", "204023", "204024", "204031", "204032", "204033", "204034", "204041", "204042", "204043", "204044", "205011", "205012", "205013", "205014", "205015", "205021", "205022", "205031", "205032", "205041", "205051", "205052", "205061", "205062", "205071", "205081", "205082", "205083", "205091", "205101", "205102", "205103", "205111", "205112", "205113", "205121", "205122", "205123", "205131", "205141", "205151", "205161", "205171", "205181", "205191", "206011", "206012", "206013", "206014", "206021", "206022", "206023", "206024", "206031", "206032", "206041", "206042", "206043", "206051", "206052", "206053", "206054", "206061", "206062", "206063", "206064", "206065", "206066", "206071", "206072", "206073", "206081", "206091", "206101", "206111", "207011", "207021", "207031", "207041", "207051", "207061", "207071", "207091", "207101", "207111", "207121", "207131", "207141", "207151", "207161", "207162", "207163", "208011", "208012", "208021", "208022", "208023", "208024", "208031", "208032", "208033", "208034", "208041", "208042", "208051", "208061", "208071", "208081", "208082", "208091", "208092", "208093", "208094", "208101", "208102", "208103", "208104", "208111", "208112", "208121", "208131", "208141", "209011", "209012", "209013", "209014", "209015", "209021", "209031", "209041", "209042", "209043", "209051", "209061", "209071", "209081", "210011", "210012", "210021", "210022", "210031", "210032", "210033", "210041", "210042", "210043", "210051", "210052", "210061", "210062", "210071", "210072", "210073", "210074", "210081", "210082", "301011", "301021", "301031", "301032", "301041", "301042", "301043", "301044", "401011", "401021", "401031", "401041", "401051", "401061", "401071", "401081", "401091", "401101", "401102", "402011", "402012", "402013", "402021", "402022", "402023", "402024", "402031", "402032", "402033", "402041", "402042", "402043", "402044", "402045", "402051", "402052", "402053", "402054", "402055", "402061", "402062", "402063", "402071", "402072", "402073", "402081", "402082", "402083", "402091", "402101", "402111", "403011", "403012", "403013", "403021", "403022", "403023", "403031", "403041", "403042", "403043", "403044", "403051", "403052", "403061", "403062", "403071", "403072", "404011", "404012", "404013", "404021", "404022", "404023", "404031", "404032", "404033", "404041", "404042", "404043", "404051", "404052", "404053", "404061", "404062", "404063", "404071", "404072", "404073", "404081", "404082", "404083", "404091", "404092", "404093", "404101", "404111", "404112", "405011", "405012", "405013", "405014", "405015", "405021", "405031", "405041", "405051", "405052", "405061", "405062", "405071", "405072", "405081", "405091", "405101", "405111", "405121", "405131", "501011", "501021", "501031", "501041", "501051", "501061", "501071", "501081", "501091", "502011", "502021", "502022", "502023", "502024", "502031", "502032", "502033", "502041", "601011", "601012", "601021", "601031", "601041", "601051", "601061", "601071", "701011", "701012", "701021", "701022", "701031"];
/** The convert module contains functions to convert between Formal SignWriitng in ASCII (FSW) and SignWriting in Unicode (SWU) characters, along with other types of data.
* [Characters set definitions](https://tools.ietf.org/id/draft-slevinski-formal-signwriting-09.html#name-characters)
* @module convert
*/
/**
* Function to convert an SWU structural marker to FSW equivalent
* @function convert.swu2mark
* @param {string} swuMark - character for SWU structural marker
* @returns {string} FSW structural marker
* @example
* convert.swu2mark('ð ')
*
* return 'A'
*/
const swu2mark = swuMark => {
return {
'ð ': 'A',
'ð ': 'B',
'ð ': 'L',
'ð ': 'M',
'ð ': 'R'
}[swuMark];
};
/**
* Function to convert an FSW structural marker to SWU equivalent
* @function convert.mark2swu
* @param {string} fswMark - character for FSW structural marker
* @returns {string} SWU structural marker
* @example
* convert.mark2swu('A')
*
* return 'ð '
*/
const mark2swu = fswMark => {
return {
'A': 'ð ',
'B': 'ð ',
'L': 'ð ',
'M': 'ð ',
'R': 'ð '
}[fswMark];
};
/**
* Function to convert an SWU number character to an integer
* @function convert.swu2num
* @param {string} swuNum - SWU number character
* @returns {number} Integer value for number
* @example
* convert.swu2num('ðĪ')
*
* return 500
*/
const swu2num = swuNum => parseInt(swuNum.codePointAt(0)) - 0x1D80C + 250;
/**
* Function to convert a number to an SWU number character
* @function convert.num2swu
* @param {number} num - Integer value for number
* @returns {string} SWU number character
* @example
* convert.num2swu(500)
*
* return 'ðĪ'
*/
const num2swu = num => String.fromCodePoint(0x1D80C + parseInt(num) - 250);
/**
* Function to convert two SWU number characters to an array of x,y integers
* @function convert.swu2coord
* @param {string} swuCoord - Two SWU number character
* @returns {number[]} Array of x,y integers
* @example
* convert.swu2coord('ðĪðĪ')
*
* return [500, 500]
*/
const swu2coord = swuCoord => [swu2num(swuCoord.slice(0, 2)), swu2num(swuCoord.slice(2, 4))];
/**
* Function to convert an array of x,y integers to two SWU number characters
* @function convert.coord2swu
* @param {number[]} coord - Array of x,y integers
* @returns {string} Two SWU number character
* @example
* convert.coord2swu([500, 500])
*
* return 'ðĪðĪ'
*/
const coord2swu = coord => coord.map(num => num2swu(num)).join('');
/**
* Function to convert an FSW coordinate string to an array of x,y integers
* @function convert.fsw2coord
* @param {string} fswCoord - An FSW coordinate string
* @returns {number[]} Array of x,y integers
* @example
* convert.fsw2coord('500x500')
*
* return [500, 500]
*/
const fsw2coord = fswCoord => fswCoord.split('x').map(num => parseInt(num));
/**
* Function to convert an array of x,y integers to an FSW coordinate string
* @function convert.coord2fsw
* @param {number[]} coord - Array of x,y integers
* @returns {string} An FSW coordinate string
* @example
* convert.coord2fsw([500, 500])
*
* return '500x500'
*/
const coord2fsw = coord => coord.join('x');
/**
* Function to convert an SWU symbol character to a code point on plane 4
* @function convert.swu2code
* @param {string} swuSym - SWU symbol character
* @returns {number} Code point on plane 4
* @example
* convert.swu2code('ņ')
*
* return 0x40001
*/
const swu2code = swuSym => parseInt(swuSym.codePointAt(0));
/**
* Function to convert a code point on plane 4 to an SWU symbol character
* @function convert.code2swu
* @param {number} code - Code point on plane 4
* @returns {string} SWU symbol character
* @example
* convert.code2swu(0x40001)
*
* return 'ņ'
*/
const code2swu = code => String.fromCodePoint(code);
/**
* Function to convert an SWU symbol character to a 16-bit ID
* @function convert.swu2id
* @param {string} swuSym - SWU symbol character
* @returns {number} 16-bit ID
* @example
* convert.swu2id('ņ')
*
* return 1
*/
const swu2id = swuSym => swu2code(swuSym) - 0x40000;
/**
* Function to convert a 16-bit ID to an SWU symbol character
* @function convert.id2swu
* @param {number} id - 16-bit ID
* @returns {string} SWU symbol character
* @example
* convert.id2swu(1)
*
* return 'ņ'
*/
const id2swu = id => code2swu(id + 0x40000);
/**
* Function to convert an FSW symbol key to a 16-bit ID
* @function convert.key2id
* @param {string} key - FSW symbol key
* @returns {number} 16-bit ID
* @example
* convert.key2id('S10000')
*
* return 1
*/
const key2id = key => key === "S00000" ? 0 : 1 + (parseInt(key.slice(1, 4), 16) - 256) * 96 + parseInt(key.slice(4, 5), 16) * 16 + parseInt(key.slice(5, 6), 16);
/**
* Function to convert a 16-bit ID to an FSW symbol key
* @function convert.id2key
* @param {number} id - 16-bit ID
* @returns {string} FSW symbol key
* @example
* convert.id2key(1)
*
* return 'S10000'
*/
const id2key = id => {
if (id === 0) {
return "S00000";
}
const symcode = id - 1;
const base = parseInt(symcode / 96);
const fill = parseInt((symcode - base * 96) / 16);
const rotation = parseInt(symcode - base * 96 - fill * 16);
return 'S' + (base + 0x100).toString(16) + fill.toString(16) + rotation.toString(16);
};
/**
* Function to convert an SWU symbol character to an FSW symbol key
* @function convert.swu2key
* @param {string} swuSym - SWU symbol character
* @returns {string} FSW symbol key
* @example
* convert.swu2key('ņ')
*
* return 'S10000'
*/
const swu2key = swuSym => {
if (swuSym === "ņ") {
return "S00000";
}
const symcode = swu2code(swuSym) - 0x40001;
const base = parseInt(symcode / 96);
const fill = parseInt((symcode - base * 96) / 16);
const rotation = parseInt(symcode - base * 96 - fill * 16);
return 'S' + (base + 0x100).toString(16) + fill.toString(16) + rotation.toString(16);
};
/**
* Function to convert an FSW symbol key to an SWU symbol character
* @function convert.key2swu
* @param {string} key - FSW symbol key
* @returns {string} SWU symbol character
* @example
* convert.key2swu('S10000')
*
* return 'ņ'
*/
const key2swu = key => {
if (key === "S00000") {
return code2swu(0x40000);
}
return code2swu(0x40001 + (parseInt(key.slice(1, 4), 16) - 256) * 96 + parseInt(key.slice(4, 5), 16) * 16 + parseInt(key.slice(5, 6), 16));
};
/**
* Function to convert SWU text to FSW text
* @function convert.swu2fsw
* @param {string} swuText - SWU text
* @returns {string} FSW text
* @example
* convert.swu2fsw('ð ņņņĨņĐð ðĪðĪĐņĐðĢĩðĪņðĪðĢĪņĨðĪðĪņðĢŪðĢ')
*
* return 'AS10011S10019S2e704S2e748M525x535S2e748483x510S10011501x466S2e704510x500S10019476x475'
*/
const swu2fsw = swuText => {
if (!swuText) return '';
let fsw = swuText.replace(/ð /g, "A").replace(/ð /g, "B").replace(/ð /g, "L").replace(/ð /g, "M").replace(/ð /g, "R");
const syms = fsw.match(new RegExp(re$2.nullorsymbol, 'g'));
if (syms) {
syms.forEach(function (sym) {
fsw = fsw.replace(sym, swu2key(sym));
});
}
const coords = fsw.match(new RegExp(re$2.coord, 'g'));
if (coords) {
coords.forEach(function (coord) {
fsw = fsw.replace(coord, swu2coord(coord).join('x'));
});
}
return fsw;
};
/**
* Function to convert FSW text to SWU text
* @function convert.fsw2swu
* @param {string} fswText - FSW text
* @returns {string} SWU text
* @example
* convert.fsw2swu('AS10011S10019S2e704S2e748M525x535S2e748483x510S10011501x466S2e704510x500S10019476x475')
*
* return 'ð ņņņĨņĐð ðĪðĪĐņĐðĢĩðĪņðĪðĢĪņĨðĪðĪņðĢŪðĢ'
*/
const fsw2swu = fswText => {
if (!fswText) return '';
const prefixes = fswText.match(new RegExp(re$4.prefix, 'g'));
if (prefixes) {
prefixes.forEach(function (prefix) {
fswText = fswText.replace(prefix, 'ð ' + prefix.slice(1).match(/.{6}/g).map(key => key2swu(key)).join(''));
});
}
const boxes = fswText.match(new RegExp(re$4.box + re$4.coord, 'g'));
if (boxes) {
boxes.forEach(function (boxes) {
fswText = fswText.replace(boxes, mark2swu(boxes.slice(0, 1)) + coord2swu(fsw2coord(boxes.slice(1, 8))));
});
}
const spatials = fswText.match(new RegExp(re$4.spatial, 'g'));
if (spatials) {
spatials.forEach(function (spatial) {
fswText = fswText.replace(spatial, key2swu(spatial.slice(0, 6)) + coord2swu(fsw2coord(spatial.slice(6, 13))));
});
}
return fswText;
};
/**
* Function to convert base or full symid Min to symid Max
* @function convert.symidMax
* @param {string} symidMin - Symbol ID minimized
* @returns {string} Symbol ID maximized
* @example
* convert.symidMax('101011')
*
* return '01-01-001-01'
* @example
* convert.symidMax('101011616')
*
* return '01-01-001-01-06-16'
*/
const symidMax = symidMin => {
if (!/^\d{6}(?:\d{3})?$/.test(symidMin)) {
return '';
}
let max = `0${symidMin.charAt(0)}-${symidMin.charAt(1)}${symidMin.charAt(2)}-0${symidMin.charAt(3)}${symidMin.charAt(4)}-0${symidMin.charAt(5)}`;
if (symidMin.length > 6) {
max += `-0${symidMin.charAt(6)}-${symidMin.charAt(7)}${symidMin.charAt(8)}`;
}
return max;
};
/**
* Function to convert base or full symid Max to symid Min
* @function convert.symidMin
* @param {string} symidMax - Symbol ID maximized
* @returns {string} Symbol ID minimized
* @example
* convert.symidMin('01-01-001-01')
*
* return '101011'
* @example
* convert.symidMin('01-01-001-01-06-16')
*
* return '101011616'
*/
const symidMin = symidMax => {
const matches = symidMax.match(/^0(\d)-(\d{2})-0(\d{2})-0(\d)(?:-0(\d)-(\d{2}))?$/);
if (!matches) {
return '';
}
if (matches[5]) {
return matches[1] + matches[2] + matches[3] + matches[4] + matches[5] + matches[6];
} else {
return matches[1] + matches[2] + matches[3] + matches[4];
}
};
/**
* Function to convert base or full symid to key
* @function convert.symid2key
* @param {string} symid - Symbol ID
* @returns {string} Symbol key
* @example
* convert.symid2key('01-01-001-01')
*
* return 'S100'
* @example
* convert.symid2key('01-01-001-01-06-16')
*
* return 'S1005f'
*/
const symid2key = symid => {
const matches = symid.match(/^0(\d)-(\d{2})-0(\d{2})-0(\d)(?:-0(\d)-(\d{2}))?$/);
if (!matches) {
return '';
}
const symidMin = matches[1] + matches[2] + matches[3] + matches[4];
const i = symidArr.indexOf(symidMin);
if (i === -1) {
return '';
}
if (matches[5]) {
return 'S' + (256 + i).toString(16) + (parseInt(matches[5], 10) - 1) + (parseInt(matches[6], 10) - 1).toString(16);
} else {
return 'S' + (256 + i).toString(16);
}
};
/**
* Function to convert base or full key to symid
* @function convert.key2symid
* @param {string} key - Symbol key
* @returns {string} Symbol ID
* @example
* convert.key2symid('S100')
*
* return '01-01-001-01'
* @example
* convert.key2symid('S1005f')
*
* return '01-01-001-01-06-16'
*/
const key2symid = key => {
const matches = key.match(/^S([1-3][0-9a-f]{2})(?:([0-5])([0-9a-f]))?$/);
if (!matches) {
return '';
}
const i = parseInt(matches[1], 16) - 256;
if (i >= symidArr.length) {
return '';
}
if (matches[3]) {
return symidMax(symidArr[i]) + '-0' + (1 + parseInt(matches[2])) + '-' + (parseInt(matches[3], 16) + 1).toString().padStart(2, '0');
} else {
return symidMax(symidArr[i]);
}
};
var index$4 = /*#__PURE__*/Object.freeze({
__proto__: null,
swu2mark: swu2mark,
mark2swu: mark2swu,
swu2num: swu2num,
num2swu: num2swu,
swu2coord: swu2coord,
coord2swu: coord2swu,
fsw2coord: fsw2coord,
coord2fsw: coord2fsw,
swu2code: swu2code,
code2swu: code2swu,
swu2id: swu2id,
id2swu: id2swu,
key2id: key2id,
id2key: id2key,
swu2key: swu2key,
key2swu: key2swu,
swu2fsw: swu2fsw,
fsw2swu: fsw2swu,
symidArr: symidArr,
symidMax: symidMax,
symidMin: symidMin,
symid2key: symid2key,
key2symid: key2symid
});
const parse$3 = {
/**
* Function to parse an fsw symbol with optional coordinate and style string
* @function fsw.parse.symbol
* @param {string} fswSym - an fsw symbol
* @returns {SymbolObject} elements of fsw symbol
* @example
* fsw.parse.symbol('S10000500x500-C')
*
* return {
* 'symbol': 'S10000',
* 'coord': [500, 500],
* 'style': '-C'
* }
*/
symbol: fswSym => {
const regex = `^(${re$4.symbol})(${re$4.coord})?(${re$3.full})?`;
const symbol = typeof fswSym === 'string' ? fswSym.match(new RegExp(regex)) : undefined;
return {
'symbol': symbol ? symbol[1] : undefined,
'coord': symbol && symbol[2] ? fsw2coord(symbol[2]) : undefined,
'style': symbol ? symbol[3] : undefined
};
},
/**
* Function to parse an fsw sign with style string
* @function fsw.parse.sign
* @param {string} fswSign - an fsw sign
* @returns { SignObject } elements of fsw sign
* @example
* fsw.parse.sign('AS10011S10019S2e704S2e748M525x535S2e748483x510S10011501x466S2e704510x500S10019476x475-C')
*
* return {
* sequence: ['S10011', 'S10019', 'S2e704', 'S2e748'],
* box: 'M',
* max: [525, 535],
* spatials: [
* {
* symbol: 'S2e748',
* coord: [483, 510]
* },
* {
* symbol: 'S10011',
* coord: [501, 466]
* },
* {
* symbol: 'S2e704',
* coord: [510, 500]
* },
* {
* symbol: 'S10019',
* coord: [476, 475]
* }
* ],
* style: '-C'
* }
*/
sign: fswSign => {
const regex = `^(${re$4.prefix})?(${re$4.signbox})(${re$3.full})?`;
const sign = typeof fswSign === 'string' ? fswSign.match(new RegExp(regex)) : undefined;
if (sign) {
return {
'sequence': sign[1] ? sign[1].slice(1).match(/.{6}/g) : undefined,
'box': sign[2][0],
'max': fsw2coord(sign[2].slice(1, 8)),
'spatials': sign[2].length < 9 ? undefined : sign[2].slice(8).match(/(.{13})/g).map(m => {
return {
symbol: m.slice(0, 6),
coord: [parseInt(m.slice(6, 9)), parseInt(m.slice(10, 13))]
};
}),
'style': sign[3]
};
} else {
return {};
}
},
/**
* Function to parse an fsw text
* @function fsw.parse.text
* @param {string} fswText - an fsw text
* @returns {string[]} fsw signs and punctuations
* @example
* fsw.parse.text('AS14c20S27106M518x529S14c20481x471S27106503x489 AS18701S1870aS2e734S20500M518x533S1870a489x515S18701482x490S20500508x496S2e734500x468 S38800464x496')
*
* return [
* 'AS14c20S27106M518x529S14c20481x471S27106503x489',
* 'AS18701S1870aS2e734S20500M518x533S1870a489x515S18701482x490S20500508x496S2e734500x468',
* 'S38800464x496'
* ]
*/
text: fswText => {
if (typeof fswText !== 'string') return [];
const regex = `(${re$4.sign}(${re$3.full})?|${re$4.spatial}(${re$3.full})?)`;
const matches = fswText.match(new RegExp(regex, 'g'));
return matches ? [...matches] : [];
}
};
const compose$3 = {
/**
* Function to compose an fsw symbol with optional coordinate and style string
* @function fsw.compose.symbol
* @param {SymbolObject} fswSymObject - an fsw symbol object
* @returns {string} an fsw symbol string
* @example
* fsw.compose.symbol({
* 'symbol': 'S10000',
* 'coord': [480, 480],
* 'style': '-C'
* })
*
* return 'S10000480x480-C'
*/
symbol: fswSymObject => {
if (typeof fswSymObject.symbol === 'string') {
const symbol = (fswSymObject.symbol.match(re$4.symbol) || [''])[0];
if (symbol) {
const x = (fswSymObject.coord && fswSymObject.coord[0] || '').toString();
const y = (fswSymObject.coord && fswSymObject.coord[1] || '').toString();
const coord = ((x + 'x' + y).match(re$4.coord) || [''])[0] || '';
const styleStr = typeof fswSymObject.style === 'string' && (fswSymObject.style.match(re$3.full) || [''])[0] || '';
return symbol + coord + styleStr;
}
}
return undefined;
},
/**
* Function to compose an fsw sign with style string
* @function fsw.compose.sign
* @param {SignObject} fswSignObject - an fsw symbol object
* @returns {string} an fsw sign string
* @example
* fsw.compose.sign({
* sequence: ['S10011', 'S10019', 'S2e704', 'S2e748'],
* box: 'M',
* max: [525, 535],
* spatials: [
* {
* symbol: 'S2e748',
* coord: [483, 510]
* },
* {
* symbol: 'S10011',
* coord: [501, 466]
* },
* {
* symbol: 'S2e704',
* coord: [510, 500]
* },
* {
* symbol: 'S10019',
* coord: [476, 475]
* }
* ],
* style: '-C'
* })
*
* return 'AS10011S10019S2e704S2e748M525x535S2e748483x510S10011501x466S2e704510x500S10019476x475-C'
*/
sign: fswSignObject => {
let box = typeof fswSignObject.box !== 'string' ? 'M' : (fswSignObject.box + 'M').match(re$4.box);
const x = (fswSignObject.max && fswSignObject.max[0] || '').toString();
const y = (fswSignObject.max && fswSignObject.max[1] || '').toString();
const max = ((x + 'x' + y).match(re$4.coord) || [''])[0] || '';
if (!max) return undefined;
let prefix = '';
if (fswSignObject.sequence && Array.isArray(fswSignObject.sequence)) {
prefix = fswSignObject.sequence.map(key => (key.match(re$4.nullorsymbol) || [''])[0]).join('');
prefix = prefix ? 'A' + prefix : '';
}
let signbox = '';
if (fswSignObject.spatials && Array.isArray(fswSignObject.spatials)) {
signbox = fswSignObject.spatials.map(spatial => {
if (typeof spatial.symbol === 'string') {
const symbol = (spatial.symbol.match(re$4.symbol) || [''])[0];
if (symbol) {
const x = (spatial.coord && spatial.coord[0] || '').toString();
const y = (spatial.coord && spatial.coord[1] || '').toString();
const coord = ((x + 'x' + y).match(re$4.coord) || [''])[0] || '';
if (coord) {
return symbol + coord;
}
}
}
return '';
}).join('');
}
const styleStr = typeof fswSignObject.style === 'string' && (fswSignObject.style.match(re$3.full) || [''])[0] || '';
return prefix + box + max + signbox + styleStr;
}
};
/**
* Function to gather sizing information about an fsw sign or symbol
* @function fsw.info
* @param {string} fsw - an fsw sign or symbol
* @returns {SegmentInfo} information about the fsw string
* @example
* fsw.info('AS14c20S27106L518x529S14c20481x471S27106503x489-P10Z2')
*
* return {
* minX: 481,
* minY: 471,
* width: 37,
* height: 58,
* lane: -1,
* padding: 10,
* segment: 'sign',
* zoom: 2
* }
*/
const info$1 = fsw => {
let lanes = {
"B": 0,
"L": -1,
"M": 0,
"R": 1
};
let parsed = parse$3.sign(fsw);
let width, height, segment, x1, x2, y1, y2, lane;
if (parsed.spatials) {
x1 = Math.min(...parsed.spatials.map(spatial => spatial.coord[0]));
x2 = parsed.max[0];
width = x2 - x1;
y1 = Math.min(...parsed.spatials.map(spatial => spatial.coord[1]));
y2 = parsed.max[1];
height = y2 - y1;
segment = 'sign';
lane = parsed.box;
} else {
parsed = parse$3.symbol(fsw);
lane = "M";
if (parsed.coord) {
x1 = parsed.coord[0];
width = (500 - x1) * 2;
y1 = parsed.coord[1];
height = (500 - y1) * 2;
segment = 'symbol';
} else {
x1 = 490;
width = 20;
y1 = 490;
height = 20;
segment = 'none';
}
}
let style = parse$4(parsed.style);
let zoom = style.zoom || 1;
let padding = style.padding || 0;
return {
minX: x1,
minY: y1,
width: width,
height: height,
segment: segment,
lane: lanes[lane],
padding: padding,
zoom: zoom
};
};
/**
* Default special tokens configuration
* ```
* DEFAULT_SPECIAL_TOKENS = [
* { index: 0, name: 'UNK', value: '[UNK]' },
* { index: 1, name: 'PAD', value: '[PAD]' },
* { index: 2, name: 'CLS', value: '[CLS]' },
* { index: 3, name: 'SEP', value: '[SEP]' }
* ];
* ```
*/
const DEFAULT_SPECIAL_TOKENS = [{
index: 0,
name: 'UNK',
value: '[UNK]'
}, {
index: 1,
name: 'PAD',
value: '[PAD]'
}, {
index: 2,
name: 'CLS',
value: '[CLS]'
}, {
index: 3,
name: 'SEP',
value: '[SEP]'
}];
/**
* Generates an array of all possible tokens for the FSW tokenizer
* @private
* @function generateTokens
* @returns {string[]} Array of all possible tokens
*/
const generateTokens = () => {
const range = (start, end) => Array.from({
length: end - start
}, (_, i) => start + i);
const hexRange = (start, end) => range(start, end + 1).map(i => i.toString(16));
const sequence = ["A"];
const signbox = ["B", "L", "M", "R"];
const nullToken = ["S000"];
const baseSymbols = range(0x100, 0x38b + 1).map(i => `S${i.toString(16)}`);
const rows = hexRange(0, 15).map(i => `r${i}`);
const cols = hexRange(0, 5).map(i => `c${i}`);
const positions = range(250, 750).map(i => `p${i}`);
return [...sequence, ...signbox, ...nullToken, ...baseSymbols, ...rows, ...cols, ...positions];
};
/**
* Creates mappings for special tokens
* @private
* @function createSpecialTokenMappings
* @param {Array} specialTokens - Array of special token objects
* @returns {Object} Special token mappings
*/
const createSpecialTokenMappings = specialTokens => {
const byIndex = {};
const byName = {};
const byValue = {};
const indices = new Set();
specialTokens.forEach(token => {
if (indices.has(token.index)) {
throw new Error(`Duplicate token index: ${token.index}`);
}
indices.add(token.index);
byIndex[token.index] = token;
byName[token.name] = token;
byValue[token.value] = token;
});
return {
byIndex,
byName,
byValue,
getByIndex: index => byIndex[index] || byIndex[specialTokens.find(t => t.name === 'UNK').index],
getByName: name => byName[name] || byName['UNK'],
getByValue: value => byValue[value] || byName['UNK'],
getAllValues: () => specialTokens.map(t => t.value),
getAllIndices: () => specialTokens.map(t => t.index)
};
};
/**
* Creates index-to-string and string-to-index mappings for tokens
* @private
* @function createTokenMappings
* @param {string[]} tokens - Array of tokens to map
* @param {Object} specialTokenMappings - Special tokens mapping object
* @param {number} startingIndex - Starting index for regular tokens
* @returns {Object} Object containing i2s and s2i mappings
*/
const createTokenMappings = (tokens, specialTokenMappings, startingIndex) => {
const i2s = {};
const s2i = {};
// Add special tokens first
Object.values(specialTokenMappings.byIndex).forEach(token => {
i2s[token.index] = token.value;
s2i[token.value] = token.index;
});
// Add regular tokens
tokens.forEach((token, i) => {
const index = startingIndex + i;
i2s[index] = token;
s2i[token] = index;
});
return {
i2s,
s2i
};
};
/**
* Tokenizes an FSW string into an array of tokens
* @function fsw.tokenize
* @param {string} fsw - FSW string to tokenize
* @param {Object} options - Tokenization options
* @param {boolean} [options.sequence=true] - Whether to include sequence tokens
* @param {boolean} [options.signbox=true] - Whether to include signbox tokens
* @param {string} [options.sep="[SEP]"] - Separator token
* @returns {string[]} Array of tokens
* @example
* fsw.tokenize("AS10e00M507x515S10e00492x485",{sequence:false,sep:null})
*
* return [
* 'M', 'p507', 'p515','S10e', 'c0', 'r0', 'p492', 'p485'
* ]
*/
const tokenize = (fsw, {
sequence = true,
signbox = true,
sep = "[SEP]"
} = {}) => {
const tokenizeSymbol = symbol => [symbol.slice(0, 4), `c${symbol.charAt(4)}`, `r${symbol.charAt(5)}`];
const tokenizeCoord = coord => coord.map(p => `p${p}`);
const segments = parse$3.text(fsw).map(fswSegment => {
if (/[BLMR]/.test(fswSegment)) {
const sign = parse$3.sign(fswSegment);
const tokens = [];
if (sign.sequence && sequence) {
tokens.push("A", ...sign.sequence.map(seqItem => tokenizeSymbol(seqItem)).flat());
}
if (signbox) {
tokens.push(sign.box, ...tokenizeCoord(sign.max), ...sign.spatials.flatMap(symbol => [...tokenizeSymbol(symbol.symbol), ...tokenizeCoord(symbol.coord)]));
}
return sep ? [...tokens, sep] : tokens;
} else {
const parsed = parse$3.symbol(fswSegment);
if (!signbox && !sequence) {
return [];
}
let tokens = [];
if (!signbox && sequence) {
tokens = ["A", ...tokenizeSymbol(parsed.symbol)];
} else {
tokens = ["M", ...tokenizeCoord(parsed.coord.map(c => 1000 - c)), ...tokenizeSymbol(parsed.symbol), ...tokenizeCoord(parsed.coord)];
}
return tokens.length > 0 && sep ? [...tokens, sep] : tokens;
}
});
return segments.flatMap(segment => segment);
};
/**
* Converts an array of tokens back into an FSW string
* @function fsw.detokenize
* @param {string[]} tokens - Array of tokens to convert
* @param {Array} specialTokens - Array of special token objects to filter out
* @returns {string} FSW string
* @example
* fsw.detokenize(['M', 'p507', 'p515','S10e', 'c0', 'r0', 'p492', 'p485'])
*
* return "M507x515S10e00492x485"
*/
const detokenize = (tokens, specialTokens = DEFAULT_SPECIAL_TOKENS) => {
const specialValues = new Set(specialTokens.map(t => t.value));
return tokens.filter(t => !specialValues.has(t)).join(' ').replace(/\bp(\d{3})\s+p(\d{3})/g, '$1x$2').replace(/ c(\d)\d? r(.)/g, '$1$2').replace(/ c(\d)\d?/g, '$10').replace(/ r(.)/g, '0$1').replace(/ /g, '').replace(/(\d)([BLMR])/g, '$1 $2').replace(/(\d)(AS)/g, '$1 $2').replace(/(A(?:S00000|S[123][0-9a-f]{2}[0-5][0-9a-f])+)( )([BLMR])/g, '$1$3');
};
/**
* Splits tokens into chunks of specified size while preserving sign boundaries
* @function fsw.chunkTokens
* @param {string[]} tokens - Array of tokens to chunk
* @param {number} chunkSize - Maximum size of each chunk
* @param {Object} options - Chunking options
* @param {string} [options.cls="[CLS]"] - CLS token
* @param {string} [options.sep="[SEP]"] - SEP token
* @param {string} [options.pad="[PAD]"] - PAD token
* @returns {string[][]} Array of token chunks
*/
const chunkTokens = (tokens, chunkSize, {
cls = "[CLS]",
sep = "[SEP]",
pad = "[PAD]"
} = {}) => {
if (chunkSize < 60) {
throw new Error('Chunk size must be at least 60 tokens to accommodate a typical sign');
}
const chunks = [];
let currentChunk = [];
let tokenIndex = 0;
while (tokenIndex < tokens.length) {
currentChunk = [cls];
while (tokenIndex < tokens.length) {
tokens[tokenIndex];
let lookAhead = tokenIndex;
while (lookAhead < tokens.length && tokens[lookAhead] !== sep) {
lookAhead++;
}
const signSize = lookAhead - tokenIndex + 1;
if (currentChunk.length + signSize > chunkSize - 1) {
break;
}
while (tokenIndex <= lookAhead) {
currentChunk.push(tokens[tokenIndex]);
tokenIndex++;
}
}
while (currentChunk.length < chunkSize) {
currentChunk.push(pad);
}
chunks.push(currentChunk);
}
return chunks;
};
/**
* Creates a tokenizer object with encoding and decoding capabilities
* @function fsw.createTokenizer
* @param {Object} [specialTokens] - Special tokens mapping object
* @param {number} [startingIndex] - Starting index for regular tokens
* @returns {TokenizerObject} Tokenizer object
* @example
* const t = fsw.createTokenizer()
*
* t.encode('M507x515S10e00492x485')
*
* return [7, 941, 949, 24, 678, 662, 926, 919, 3]
*/
const createTokenizer = (specialTokens = DEFAULT_SPECIAL_TOKENS, startingIndex = null) => {
const specialTokenMappings = createSpecialTokenMappings(specialTokens);
const calculatedStartingIndex = startingIndex ?? (specialTokenMappings.getAllIndices().length > 0 ? Math.max(...specialTokenMappings.getAllIndices())