nehan
Version:
Html layout engine for paged-media written in Typescript
158 lines • 5.01 kB
JavaScript
import { Utils, Lexer, Config, Char, Word, RefChar, SpaceChar, HalfChar, SmpUniChar, MixChar, DualChar, DualCharTable, Tcy, } from "./public-api";
export class TextLexer extends Lexer {
hasWord() {
return this.tokens.findIndex(token => token instanceof Word) >= 0;
}
normalize(src, args) {
let normSrc = src
.replace(/&#x([0-9A-F]+);/gi, (match, p1) => {
return String.fromCodePoint(Utils.atoi(p1, 16));
})
.replace(/&#([0-9]+);/gi, (match, p1) => {
return String.fromCodePoint(Utils.atoi(p1, 10));
});
if (!args.isPre) {
normSrc = normSrc.replace(/^\n+/, "");
normSrc = normSrc.replace(/\n+$/, "");
normSrc = Utils.String.multiSpaceToSingle2(normSrc);
}
return normSrc;
}
getShy() {
if (this.buff.indexOf(RefChar.softHyphen) !== 0) {
return null;
}
this.stepBuff(RefChar.softHyphen.length);
return RefChar.softHyphen;
}
getWord() {
let word = "";
while (this.hasNextBuff()) {
let word_match = Config.rexWord.exec(this.buff);
if (word_match) {
word += word_match[0];
this.stepBuff(word_match[0].length);
continue;
}
let shy = this.getShy();
if (shy) {
word += shy;
continue;
}
break;
}
if (word === "") {
return null;
}
return new Word(word);
}
getSpaceChar() {
let space_char_ref_match = Config.rexSpaceCharRef.exec(this.buff);
if (space_char_ref_match) {
let space_char_ref = space_char_ref_match[0];
this.stepBuff(space_char_ref.length);
let space_char = SpaceChar.charRefToStr(space_char_ref);
return new SpaceChar(space_char);
}
let space_char_match = Config.rexSpace.exec(this.buff);
if (space_char_match) {
let space_char = space_char_match[0];
this.stepBuff(space_char.length);
return new SpaceChar(space_char);
}
return null;
}
getRefChar() {
let ref_char_match = Config.rexRefChar.exec(this.buff);
if (!ref_char_match) {
return null;
}
let ref_char = ref_char_match[0];
this.stepBuff(ref_char.length);
return new RefChar(ref_char);
}
getHalfChar() {
let half_char_match = Config.rexHalfChar.exec(this.buff);
if (!half_char_match) {
return null;
}
let half_char = half_char_match[0];
this.stepBuff(half_char.length);
return new HalfChar(half_char);
}
getSmpUniChar() {
let lead = this.buff.charCodeAt(0);
if (0xd800 <= lead && lead <= 0xdbff) {
let trail = this.buff.charCodeAt(1);
if (trail && 0xdc00 <= trail && trail <= 0xdfff) {
let bytes = this.buff.substring(0, 2);
this.stepBuff(2);
return new SmpUniChar(bytes);
}
}
return null;
}
getMixChar(c1) {
let char = c1;
let voiced_mark_match = Config.rexVoicedMark.exec(this.buff);
if (!voiced_mark_match) {
return null;
}
char += voiced_mark_match[0];
this.stepBuff(voiced_mark_match[0].length);
return new MixChar(char);
}
getDualChar(c1) {
let info = DualCharTable.load(c1);
if (!info) {
return null;
}
return new DualChar(c1, info);
}
createToken() {
const p1 = this.peekChar();
const half_dual_char = this.getDualChar(p1);
if (half_dual_char !== null) {
this.stepBuff(1);
return half_dual_char;
}
const word = this.getWord();
if (word !== null) {
return word;
}
const space_char = this.getSpaceChar();
if (space_char !== null) {
return space_char;
}
const ref_char = this.getRefChar();
if (ref_char !== null) {
return ref_char;
}
const half_char = this.getHalfChar();
if (half_char !== null) {
return half_char;
}
const smp_uni_char = this.getSmpUniChar();
if (smp_uni_char !== null) {
return smp_uni_char;
}
const c1 = this.getChar();
const mix_char = this.getMixChar(c1);
if (mix_char !== null) {
return mix_char;
}
const dual_char = this.getDualChar(c1);
if (dual_char !== null) {
return dual_char;
}
return new Char(c1);
}
}
export class TcyLexer extends TextLexer {
createToken() {
const tcy = new Tcy(this.src);
this.stepBuff(this.src.length);
return tcy;
}
}
//# sourceMappingURL=text-lexer.js.map