UNPKG

nehan

Version:

Html layout engine for paged-media written in Typescript

158 lines 5.01 kB
import { Utils, Lexer, Config, Char, Word, RefChar, SpaceChar, HalfChar, SmpUniChar, MixChar, DualChar, DualCharTable, Tcy, } from "./public-api"; export class TextLexer extends Lexer { hasWord() { return this.tokens.findIndex(token => token instanceof Word) >= 0; } normalize(src, args) { let normSrc = src .replace(/&#x([0-9A-F]+);/gi, (match, p1) => { return String.fromCodePoint(Utils.atoi(p1, 16)); }) .replace(/&#([0-9]+);/gi, (match, p1) => { return String.fromCodePoint(Utils.atoi(p1, 10)); }); if (!args.isPre) { normSrc = normSrc.replace(/^\n+/, ""); normSrc = normSrc.replace(/\n+$/, ""); normSrc = Utils.String.multiSpaceToSingle2(normSrc); } return normSrc; } getShy() { if (this.buff.indexOf(RefChar.softHyphen) !== 0) { return null; } this.stepBuff(RefChar.softHyphen.length); return RefChar.softHyphen; } getWord() { let word = ""; while (this.hasNextBuff()) { let word_match = Config.rexWord.exec(this.buff); if (word_match) { word += word_match[0]; this.stepBuff(word_match[0].length); continue; } let shy = this.getShy(); if (shy) { word += shy; continue; } break; } if (word === "") { return null; } return new Word(word); } getSpaceChar() { let space_char_ref_match = Config.rexSpaceCharRef.exec(this.buff); if (space_char_ref_match) { let space_char_ref = space_char_ref_match[0]; this.stepBuff(space_char_ref.length); let space_char = SpaceChar.charRefToStr(space_char_ref); return new SpaceChar(space_char); } let space_char_match = Config.rexSpace.exec(this.buff); if (space_char_match) { let space_char = space_char_match[0]; this.stepBuff(space_char.length); return new SpaceChar(space_char); } return null; } getRefChar() { let ref_char_match = Config.rexRefChar.exec(this.buff); if (!ref_char_match) { return null; } let ref_char = ref_char_match[0]; this.stepBuff(ref_char.length); return new RefChar(ref_char); } getHalfChar() { let half_char_match = Config.rexHalfChar.exec(this.buff); if (!half_char_match) { return null; } let half_char = half_char_match[0]; this.stepBuff(half_char.length); return new HalfChar(half_char); } getSmpUniChar() { let lead = this.buff.charCodeAt(0); if (0xd800 <= lead && lead <= 0xdbff) { let trail = this.buff.charCodeAt(1); if (trail && 0xdc00 <= trail && trail <= 0xdfff) { let bytes = this.buff.substring(0, 2); this.stepBuff(2); return new SmpUniChar(bytes); } } return null; } getMixChar(c1) { let char = c1; let voiced_mark_match = Config.rexVoicedMark.exec(this.buff); if (!voiced_mark_match) { return null; } char += voiced_mark_match[0]; this.stepBuff(voiced_mark_match[0].length); return new MixChar(char); } getDualChar(c1) { let info = DualCharTable.load(c1); if (!info) { return null; } return new DualChar(c1, info); } createToken() { const p1 = this.peekChar(); const half_dual_char = this.getDualChar(p1); if (half_dual_char !== null) { this.stepBuff(1); return half_dual_char; } const word = this.getWord(); if (word !== null) { return word; } const space_char = this.getSpaceChar(); if (space_char !== null) { return space_char; } const ref_char = this.getRefChar(); if (ref_char !== null) { return ref_char; } const half_char = this.getHalfChar(); if (half_char !== null) { return half_char; } const smp_uni_char = this.getSmpUniChar(); if (smp_uni_char !== null) { return smp_uni_char; } const c1 = this.getChar(); const mix_char = this.getMixChar(c1); if (mix_char !== null) { return mix_char; } const dual_char = this.getDualChar(c1); if (dual_char !== null) { return dual_char; } return new Char(c1); } } export class TcyLexer extends TextLexer { createToken() { const tcy = new Tcy(this.src); this.stepBuff(this.src.length); return tcy; } } //# sourceMappingURL=text-lexer.js.map