@readium/shared
Version:
Shared models to be used across other Readium projects and implementations in Typescript
146 lines (136 loc) • 4.03 kB
text/typescript
import { LocatorText } from "../../../Locator";
// https://github.com/jhy/jsoup/blob/0b10d516ed8f907f8fb4acb9a0806137a8988d45/src/main/java/org/jsoup/parser/Tag.java#L243
const inlineTags = [
"OBJECT",
"BASE",
"FONT",
"TT",
"I",
"B",
"U",
"BIG",
"SMALL",
"EM",
"STRONG",
"DFN",
"CODE",
"SAMP",
"KBD",
"VAR",
"CITE",
"ABBR",
"TIME",
"ACRONYM",
"MARK",
"RUBY",
"RT",
"RP",
"RTC",
"A",
"IMG",
"BR",
"WBR",
"MAP",
"Q",
"SUB",
"SUP",
"BDO",
"IFRAME",
"EMBED",
"SPAN",
"INPUT",
"SELECT",
"TEXTAREA",
"LABEL",
"BUTTON",
"OPTGROUP",
"OPTION",
"LEGEND",
"DATALIST",
"KEYGEN",
"OUTPUT",
"PROGRESS",
"METER",
"AREA",
"PARAM",
"SOURCE",
"TRACK",
"SUMMARY",
"COMMAND",
"DEVICE",
"BASEFONT",
"BGSOUND",
"MENUITEM",
"DATA",
"BDI",
"S",
"STRIKE",
"NOBR",
"RB"
];
export function isInlineTag(n: Node) {
return inlineTags.includes(n.nodeName.toUpperCase());
}
export function srcRelativeToHref(e: Element, base?: string | URL): string | null {
const src = e.getAttribute("src");
if (!src?.length) return null;
try {
// TODO use readium util Href class
return new URL(src, base).toString();
} catch (error) {
return src; // This is a temporary solution until we implement the Href utility
}
}
// Inspired by golang's unicode.IsSpace
const unicodeSpaceRange = `[\\s\\u0085\\u00A0\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]`;
const allUnicodeSpace = new RegExp(`^${unicodeSpaceRange}+$`, "g");
const unicodeSpaceTrim = new RegExp(`^${unicodeSpaceRange}+|${unicodeSpaceRange}+$/g`, "g");
const unicodeSpaceStart = new RegExp(`^${unicodeSpaceRange}+`, "g");
const unicodeSpaceEnd = new RegExp(`${unicodeSpaceRange}+$`, "g");
export const isBlank = (s: string | null) =>
// Normal spaces, \t, \n, \v, \f, \r, U+0085, U+00A0, U+2000 to U+200A, U+2028, U+2029, U+202F, U+205F, U+3000
!s?.length ? true : allUnicodeSpace.test(s);
// Like Go's strings.TrimSpace
export const trimUnicodeSpace = (s: string) => s.replace(unicodeSpaceTrim, "");
export const trimUnicodeSpaceStart = (s: string) => s.replace(unicodeSpaceStart, "");
export const trimUnicodeSpaceEnd = (s: string) => s.replace(unicodeSpaceEnd, "");
export const trimmingTextLocator = (text: string, before: string = ""): LocatorText =>
new LocatorText({
before: before + (unicodeSpaceStart.exec(text)?.[0] ?? ""),
highlight: trimUnicodeSpace(text),
after: unicodeSpaceEnd.exec(text)?.[0] ?? "",
});
export function elementLanguage(e: HTMLElement | null) {
while (e) {
let lang = e.getAttribute("lang")
|| e.getAttributeNS("http://www.w3.org/1999/xhtml", "lang")
|| e.getAttributeNS("http://www.w3.org/XML/1998/namespace", "lang");
if (lang) return lang;
e = e.parentElement;
}
return null;
}
// Inspired by JSoup: https://github.com/jhy/jsoup/blob/1762412a28fa7b08ccf71d93fc4c98dc73086e03/src/main/java/org/jsoup/internal/StringUtil.java#L233
// Slight differing definition of what a whitespace characacter is
export function appendNormalizedWhitespace(accum: string, text: string): string {
const stripLeading = accum.length > 0 && accum[accum.length - 1] === " ";
let lastWasWhite = false, reachedNonWhite = false;
for (let i = 0; i < text.length; i++) {
if (isBlank(text[i])) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite) {
continue;
}
accum += " ";
lastWasWhite = true;
} else {
const cp = text.charCodeAt(i);
// zero width sp, soft hyphen
if (cp !== 8203 && cp !== 173) {
accum += text[i];
lastWasWhite = false;
reachedNonWhite = true;
}
}
}
return accum;
}