UNPKG

foliate-js

Version:
1,079 lines (1,015 loc) 43.3 kB
import * as CFI from './epubcfi.js' const NS = { CONTAINER: 'urn:oasis:names:tc:opendocument:xmlns:container', XHTML: 'http://www.w3.org/1999/xhtml', OPF: 'http://www.idpf.org/2007/opf', EPUB: 'http://www.idpf.org/2007/ops', DC: 'http://purl.org/dc/elements/1.1/', DCTERMS: 'http://purl.org/dc/terms/', ENC: 'http://www.w3.org/2001/04/xmlenc#', NCX: 'http://www.daisy.org/z3986/2005/ncx/', XLINK: 'http://www.w3.org/1999/xlink', SMIL: 'http://www.w3.org/ns/SMIL', } const MIME = { XML: 'application/xml', NCX: 'application/x-dtbncx+xml', XHTML: 'application/xhtml+xml', HTML: 'text/html', CSS: 'text/css', SVG: 'image/svg+xml', JS: /\/(x-)?(javascript|ecmascript)/, } // https://www.w3.org/TR/epub-33/#sec-reserved-prefixes const PREFIX = { a11y: 'http://www.idpf.org/epub/vocab/package/a11y/#', dcterms: 'http://purl.org/dc/terms/', marc: 'http://id.loc.gov/vocabulary/', media: 'http://www.idpf.org/epub/vocab/overlays/#', onix: 'http://www.editeur.org/ONIX/book/codelists/current.html#', rendition: 'http://www.idpf.org/vocab/rendition/#', schema: 'http://schema.org/', xsd: 'http://www.w3.org/2001/XMLSchema#', msv: 'http://www.idpf.org/epub/vocab/structure/magazine/#', prism: 'http://www.prismstandard.org/specifications/3.0/PRISM_CV_Spec_3.0.htm#', } const RELATORS = { art: 'artist', aut: 'author', clr: 'colorist', edt: 'editor', ill: 'illustrator', nrt: 'narrator', trl: 'translator', pbl: 'publisher', } const ONIX5 = { '02': 'isbn', '06': 'doi', '15': 'isbn', '26': 'doi', '34': 'issn', } // convert to camel case const camel = x => x.toLowerCase().replace(/[-:](.)/g, (_, g) => g.toUpperCase()) // strip and collapse ASCII whitespace // https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace const normalizeWhitespace = str => str ? str .replace(/[\t\n\f\r ]+/g, ' ') .replace(/^[\t\n\f\r ]+/, '') .replace(/[\t\n\f\r ]+$/, '') : '' const filterAttribute = (attr, value, isList) => isList ? el => el.getAttribute(attr)?.split(/\s/)?.includes(value) : typeof value === 'function' ? el => value(el.getAttribute(attr)) : el => el.getAttribute(attr) === value const getAttributes = (...xs) => el => el ? Object.fromEntries(xs.map(x => [camel(x), el.getAttribute(x)])) : null const getElementText = el => normalizeWhitespace(el?.textContent) const childGetter = (doc, ns) => { // ignore the namespace if it doesn't appear in document at all const useNS = doc.lookupNamespaceURI(null) === ns || doc.lookupPrefix(ns) const f = useNS ? (el, name) => el => el.namespaceURI === ns && el.localName === name : (el, name) => el => el.localName === name return { $: (el, name) => [...el.children].find(f(el, name)), $$: (el, name) => [...el.children].filter(f(el, name)), $$$: useNS ? (el, name) => [...el.getElementsByTagNameNS(ns, name)] : (el, name) => [...el.getElementsByTagName(name)], } } const resolveURL = (url, relativeTo) => { try { // replace %2c in the url with a comma, this might be introduced by calibre url = url.replace(/%2c/, ',') if (relativeTo.includes(':') && !relativeTo.startsWith('OEBPS')) return new URL(url, relativeTo) // the base needs to be a valid URL, so set a base URL and then remove it const root = 'https://invalid.invalid/' const obj = new URL(url, root + relativeTo) obj.search = '' return decodeURI(obj.href.replace(root, '')) } catch(e) { console.warn(e) return url } } const isExternal = uri => /^(?!blob)\w+:/i.test(uri) // like `path.relative()` in Node.js const pathRelative = (from, to) => { if (!from) return to const as = from.replace(/\/$/, '').split('/') const bs = to.replace(/\/$/, '').split('/') const i = (as.length > bs.length ? as : bs).findIndex((_, i) => as[i] !== bs[i]) return i < 0 ? '' : Array(as.length - i).fill('..').concat(bs.slice(i)).join('/') } const pathDirname = str => str.slice(0, str.lastIndexOf('/') + 1) // replace asynchronously and sequentially // same technique as https://stackoverflow.com/a/48032528 const replaceSeries = async (str, regex, f) => { const matches = [] str.replace(regex, (...args) => (matches.push(args), null)) const results = [] for (const args of matches) results.push(await f(...args)) return str.replace(regex, () => results.shift()) } const regexEscape = str => str.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') const tidy = obj => { for (const [key, val] of Object.entries(obj)) if (val == null) delete obj[key] else if (Array.isArray(val)) { obj[key] = val.filter(x => x).map(x => typeof x === 'object' && !Array.isArray(x) ? tidy(x) : x) if (!obj[key].length) delete obj[key] else if (obj[key].length === 1) obj[key] = obj[key][0] } else if (typeof val === 'object') { obj[key] = tidy(val) if (!Object.keys(val).length) delete obj[key] } const keys = Object.keys(obj) if (keys.length === 1 && keys[0] === 'name') return obj[keys[0]] return obj } // https://www.w3.org/TR/epub/#sec-prefix-attr const getPrefixes = doc => { const map = new Map(Object.entries(PREFIX)) const value = doc.documentElement.getAttributeNS(NS.EPUB, 'prefix') || doc.documentElement.getAttribute('prefix') if (value) for (const [, prefix, url] of value .matchAll(/(.+): +(.+)[ \t\r\n]*/g)) map.set(prefix, url) return map } // https://www.w3.org/TR/epub-rs/#sec-property-values // but ignoring the case where the prefix is omitted const getPropertyURL = (value, prefixes) => { if (!value) return null const [a, b] = value.split(':') const prefix = b ? a : null const reference = b ? b : a const baseURL = prefixes.get(prefix) return baseURL ? baseURL + reference : null } const getMetadata = opf => { const { $ } = childGetter(opf, NS.OPF) const $metadata = $(opf.documentElement, 'metadata') // first pass: convert to JS objects const els = Object.groupBy($metadata.children, el => el.namespaceURI === NS.DC ? 'dc' : el.namespaceURI === NS.OPF && el.localName === 'meta' ? (el.hasAttribute('name') ? 'legacyMeta' : 'meta') : '') const baseLang = $metadata.getAttribute('xml:lang') ?? opf.documentElement.getAttribute('xml:lang') ?? 'und' const prefixes = getPrefixes(opf) const parse = el => { const property = el.getAttribute('property') const scheme = el.getAttribute('scheme') return { property: getPropertyURL(property, prefixes) ?? property, scheme: getPropertyURL(scheme, prefixes) ?? scheme, lang: el.getAttribute('xml:lang'), value: getElementText(el), props: getProperties(el), // `opf:` attributes from EPUB 2 & EPUB 3.1 (removed in EPUB 3.2) attrs: Object.fromEntries(Array.from(el.attributes) .filter(attr => attr.namespaceURI === NS.OPF) .map(attr => [attr.localName, attr.value])), } } const refines = Map.groupBy(els.meta ?? [], el => el.getAttribute('refines')) const getProperties = el => { const els = refines.get(el ? '#' + el.getAttribute('id') : null) if (!els) return null return Object.groupBy(els.map(parse), x => x.property) } const dc = Object.fromEntries(Object.entries(Object.groupBy(els.dc || [], el => el.localName)) .map(([name, els]) => [name, els.map(parse)])) const properties = getProperties() ?? {} const legacyMeta = Object.fromEntries(els.legacyMeta?.map(el => [el.getAttribute('name'), el.getAttribute('content')]) ?? []) // second pass: map to webpub const one = x => x?.[0]?.value const prop = (x, p) => one(x?.props?.[p]) const makeLanguageMap = x => { if (!x) return null const alts = x.props?.['alternate-script'] ?? [] const altRep = x.attrs['alt-rep'] if (!alts.length && (!x.lang || x.lang === baseLang) && !altRep) return x.value const map = { [x.lang ?? baseLang]: x.value } if (altRep) map[x.attrs['alt-rep-lang']] = altRep for (const y of alts) map[y.lang] ??= y.value return map } const makeContributor = x => x ? ({ name: makeLanguageMap(x), sortAs: makeLanguageMap(x.props?.['file-as']?.[0]) ?? x.attrs['file-as'], role: x.props?.role?.filter(x => x.scheme === PREFIX.marc + 'relators') ?.map(x => x.value) ?? [x.attrs.role], code: prop(x, 'term') ?? x.attrs.term, scheme: prop(x, 'authority') ?? x.attrs.authority, }) : null const makeCollection = x => ({ name: makeLanguageMap(x), // NOTE: webpub requires number but EPUB allows values like "2.2.1" position: one(x.props?.['group-position']), }) const makeAltIdentifier = x => { const { value } = x if (/^urn:/i.test(value)) return value if (/^doi:/i.test(value)) return `urn:${value}` const type = x.props?.['identifier-type'] if (!type) { const scheme = x.attrs.scheme if (!scheme) return value // https://idpf.github.io/epub-registries/identifiers/ // but no "jdcn", which isn't a registered URN namespace if (/^(doi|isbn|uuid)$/i.test(scheme)) return `urn:${scheme}:${value}` // NOTE: webpub requires scheme to be a URI; EPUB allows anything return { scheme, value } } if (type.scheme === PREFIX.onix + 'codelist5') { const nid = ONIX5[type.value] if (nid) return `urn:${nid}:${value}` } return value } const belongsTo = Object.groupBy(properties['belongs-to-collection'] ?? [], x => prop(x, 'collection-type') === 'series' ? 'series' : 'collection') const mainTitle = dc.title?.find(x => prop(x, 'title-type') === 'main') ?? dc.title?.[0] const metadata = { identifier: getIdentifier(opf), title: makeLanguageMap(mainTitle), sortAs: makeLanguageMap(mainTitle?.props?.['file-as']?.[0]) ?? mainTitle?.attrs?.['file-as'] ?? legacyMeta?.['calibre:title_sort'], subtitle: dc.title?.find(x => prop(x, 'title-type') === 'subtitle')?.value, language: dc.language?.map(x => x.value), description: one(dc.description), publisher: makeContributor(dc.publisher?.[0]), published: dc.date?.find(x => x.attrs.event === 'publication')?.value ?? one(dc.date), modified: one(properties[PREFIX.dcterms + 'modified']) ?? dc.date?.find(x => x.attrs.event === 'modification')?.value, subject: dc.subject?.map(makeContributor), belongsTo: { collection: belongsTo.collection?.map(makeCollection), series: belongsTo.series?.map(makeCollection) ?? legacyMeta?.['calibre:series'] ? { name: legacyMeta?.['calibre:series'], position: parseFloat(legacyMeta?.['calibre:series_index']), } : null, }, altIdentifier: dc.identifier?.map(makeAltIdentifier), source: dc.source?.map(makeAltIdentifier), // NOTE: not in webpub schema rights: one(dc.rights), // NOTE: not in webpub schema } const remapContributor = defaultKey => x => { const keys = new Set(x.role?.map(role => RELATORS[role] ?? defaultKey)) return [keys.size ? keys : [defaultKey], x] } for (const [keys, val] of [].concat( dc.creator?.map(makeContributor)?.map(remapContributor('author')) ?? [], dc.contributor?.map(makeContributor)?.map(remapContributor('contributor')) ?? [])) for (const key of keys) if (metadata[key]) metadata[key].push(val) else metadata[key] = [val] tidy(metadata) if (metadata.altIdentifier === metadata.identifier) delete metadata.altIdentifier const rendition = {} const media = {} for (const [key, val] of Object.entries(properties)) { if (key.startsWith(PREFIX.rendition)) rendition[camel(key.replace(PREFIX.rendition, ''))] = one(val) else if (key.startsWith(PREFIX.media)) media[camel(key.replace(PREFIX.media, ''))] = one(val) } if (media.duration) media.duration = parseClock(media.duration) return { metadata, rendition, media } } const parseNav = (doc, resolve = f => f) => { const { $, $$, $$$ } = childGetter(doc, NS.XHTML) const resolveHref = href => href ? decodeURI(resolve(href)) : null const parseLI = getType => $li => { const $a = $($li, 'a') ?? $($li, 'span') const $ol = $($li, 'ol') const href = resolveHref($a?.getAttribute('href')) const label = getElementText($a) || $a?.getAttribute('title') // TODO: get and concat alt/title texts in content const result = { label, href, subitems: parseOL($ol) } if (getType) result.type = $a?.getAttributeNS(NS.EPUB, 'type')?.split(/\s/) return result } const parseOL = ($ol, getType) => $ol ? $$($ol, 'li').map(parseLI(getType)) : null const parseNav = ($nav, getType) => parseOL($($nav, 'ol'), getType) const $$nav = $$$(doc, 'nav') let toc = null, pageList = null, landmarks = null, others = [] for (const $nav of $$nav) { const type = $nav.getAttributeNS(NS.EPUB, 'type')?.split(/\s/) ?? [] if (type.includes('toc')) toc ??= parseNav($nav) else if (type.includes('page-list')) pageList ??= parseNav($nav) else if (type.includes('landmarks')) landmarks ??= parseNav($nav, true) else others.push({ label: getElementText($nav.firstElementChild), type, list: parseNav($nav), }) } return { toc, pageList, landmarks, others } } const parseNCX = (doc, resolve = f => f) => { const { $, $$ } = childGetter(doc, NS.NCX) const resolveHref = href => href ? decodeURI(resolve(href)) : null const parseItem = el => { const $label = $(el, 'navLabel') const $content = $(el, 'content') const label = getElementText($label) const href = resolveHref($content.getAttribute('src')) if (el.localName === 'navPoint') { const els = $$(el, 'navPoint') return { label, href, subitems: els.length ? els.map(parseItem) : null } } return { label, href } } const parseList = (el, itemName) => $$(el, itemName).map(parseItem) const getSingle = (container, itemName) => { const $container = $(doc.documentElement, container) return $container ? parseList($container, itemName) : null } return { toc: getSingle('navMap', 'navPoint'), pageList: getSingle('pageList', 'pageTarget'), others: $$(doc.documentElement, 'navList').map(el => ({ label: getElementText($(el, 'navLabel')), list: parseList(el, 'navTarget'), })), } } const parseClock = str => { if (!str) return const parts = str.split(':').map(x => parseFloat(x)) if (parts.length === 3) { const [h, m, s] = parts return h * 60 * 60 + m * 60 + s } if (parts.length === 2) { const [m, s] = parts return m * 60 + s } const [x, unit] = str.split(/(?=[^\d.])/) const n = parseFloat(x) const f = unit === 'h' ? 60 * 60 : unit === 'min' ? 60 : unit === 'ms' ? .001 : 1 return n * f } class MediaOverlay extends EventTarget { #entries #lastMediaOverlayItem #sectionIndex #audioIndex #itemIndex #audio #volume = 1 #rate = 1 #state constructor(book, loadXML) { super() this.book = book this.loadXML = loadXML } async #loadSMIL(item) { if (this.#lastMediaOverlayItem === item) return const doc = await this.loadXML(item.href) const resolve = href => href ? resolveURL(href, item.href) : null const { $, $$$ } = childGetter(doc, NS.SMIL) this.#audioIndex = -1 this.#itemIndex = -1 this.#entries = $$$(doc, 'par').reduce((arr, $par) => { const text = resolve($($par, 'text')?.getAttribute('src')) const $audio = $($par, 'audio') if (!text || !$audio) return arr const src = resolve($audio.getAttribute('src')) const begin = parseClock($audio.getAttribute('clipBegin')) const end = parseClock($audio.getAttribute('clipEnd')) const last = arr.at(-1) if (last?.src === src) last.items.push({ text, begin, end }) else arr.push({ src, items: [{ text, begin, end }] }) return arr }, []) this.#lastMediaOverlayItem = item } get #activeAudio() { return this.#entries[this.#audioIndex] } get #activeItem() { return this.#activeAudio?.items?.[this.#itemIndex] } #error(e) { console.error(e) this.dispatchEvent(new CustomEvent('error', { detail: e })) } #highlight() { this.dispatchEvent(new CustomEvent('highlight', { detail: this.#activeItem })) } #unhighlight() { this.dispatchEvent(new CustomEvent('unhighlight', { detail: this.#activeItem })) } async #play(audioIndex, itemIndex) { this.#stop() this.#audioIndex = audioIndex this.#itemIndex = itemIndex const src = this.#activeAudio?.src if (!src || !this.#activeItem) return this.start(this.#sectionIndex + 1) const url = URL.createObjectURL(await this.book.loadBlob(src)) const audio = new Audio(url) this.#audio = audio audio.volume = this.#volume audio.playbackRate = this.#rate audio.addEventListener('timeupdate', () => { if (audio.paused) return const t = audio.currentTime const { items } = this.#activeAudio if (t > this.#activeItem?.end) { this.#unhighlight() if (this.#itemIndex === items.length - 1) { this.#play(this.#audioIndex + 1, 0).catch(e => this.#error(e)) return } } const oldIndex = this.#itemIndex while (items[this.#itemIndex + 1]?.begin <= t) this.#itemIndex++ if (this.#itemIndex !== oldIndex) this.#highlight() }) audio.addEventListener('error', () => this.#error(new Error(`Failed to load ${src}`))) audio.addEventListener('playing', () => this.#highlight()) audio.addEventListener('ended', () => { this.#unhighlight() URL.revokeObjectURL(url) this.#audio = null this.#play(audioIndex + 1, 0).catch(e => this.#error(e)) }) if (this.#state === 'paused') { this.#highlight() audio.currentTime = this.#activeItem.begin ?? 0 } else audio.addEventListener('canplaythrough', () => { // for some reason need to seek in `canplaythrough` // or it won't play when skipping in WebKit audio.currentTime = this.#activeItem.begin ?? 0 this.#state = 'playing' audio.play().catch(e => this.#error(e)) }, { once: true }) } async start(sectionIndex, filter = () => true) { this.#audio?.pause() const section = this.book.sections[sectionIndex] const href = section?.id if (!href) return const { mediaOverlay } = section if (!mediaOverlay) return this.start(sectionIndex + 1) this.#sectionIndex = sectionIndex await this.#loadSMIL(mediaOverlay) for (let i = 0; i < this.#entries.length; i++) { const { items } = this.#entries[i] for (let j = 0; j < items.length; j++) { if (items[j].text.split('#')[0] === href && filter(items[j], j, items)) return this.#play(i, j).catch(e => this.#error(e)) } } } pause() { this.#state = 'paused' this.#audio?.pause() } resume() { this.#state = 'playing' this.#audio?.play().catch(e => this.#error(e)) } #stop() { if (this.#audio) { this.#audio.pause() URL.revokeObjectURL(this.#audio.src) this.#audio = null this.#unhighlight() } } stop() { this.#state = 'stopped' this.#stop() } prev() { if (this.#itemIndex > 0) this.#play(this.#audioIndex, this.#itemIndex - 1) else if (this.#audioIndex > 0) this.#play(this.#audioIndex - 1, this.#entries[this.#audioIndex - 1].items.length - 1) else if (this.#sectionIndex > 0) this.start(this.#sectionIndex - 1, (_, i, items) => i === items.length - 1) } next() { this.#play(this.#audioIndex, this.#itemIndex + 1) } setVolume(volume) { this.#volume = volume if (this.#audio) this.#audio.volume = volume } setRate(rate) { this.#rate = rate if (this.#audio) this.#audio.playbackRate = rate } } const isUUID = /([0-9a-f]{8})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{4})-([0-9a-f]{12})/ const getUUID = opf => { for (const el of opf.getElementsByTagNameNS(NS.DC, 'identifier')) { const [id] = getElementText(el).split(':').slice(-1) if (isUUID.test(id)) return id } return '' } const getIdentifier = opf => getElementText( opf.getElementById(opf.documentElement.getAttribute('unique-identifier')) ?? opf.getElementsByTagNameNS(NS.DC, 'identifier')[0]) // https://www.w3.org/publishing/epub32/epub-ocf.html#sec-resource-obfuscation const deobfuscate = async (key, length, blob) => { const array = new Uint8Array(await blob.slice(0, length).arrayBuffer()) length = Math.min(length, array.length) for (var i = 0; i < length; i++) array[i] = array[i] ^ key[i % key.length] return new Blob([array, blob.slice(length)], { type: blob.type }) } const WebCryptoSHA1 = async str => { const data = new TextEncoder().encode(str) const buffer = await globalThis.crypto.subtle.digest('SHA-1', data) return new Uint8Array(buffer) } const deobfuscators = (sha1 = WebCryptoSHA1) => ({ 'http://www.idpf.org/2008/embedding': { key: opf => sha1(getIdentifier(opf) // eslint-disable-next-line no-control-regex .replaceAll(/[\u0020\u0009\u000d\u000a]/g, '')), decode: (key, blob) => deobfuscate(key, 1040, blob), }, 'http://ns.adobe.com/pdf/enc#RC': { key: opf => { const uuid = getUUID(opf).replaceAll('-', '') return Uint8Array.from({ length: 16 }, (_, i) => parseInt(uuid.slice(i * 2, i * 2 + 2), 16)) }, decode: (key, blob) => deobfuscate(key, 1024, blob), }, }) class Encryption { #uris = new Map() #decoders = new Map() #algorithms constructor(algorithms) { this.#algorithms = algorithms } async init(encryption, opf) { if (!encryption) return const data = Array.from( encryption.getElementsByTagNameNS(NS.ENC, 'EncryptedData'), el => ({ algorithm: el.getElementsByTagNameNS(NS.ENC, 'EncryptionMethod')[0] ?.getAttribute('Algorithm'), uri: el.getElementsByTagNameNS(NS.ENC, 'CipherReference')[0] ?.getAttribute('URI'), })) for (const { algorithm, uri } of data) { if (!this.#decoders.has(algorithm)) { const algo = this.#algorithms[algorithm] if (!algo) { console.warn('Unknown encryption algorithm') continue } const key = await algo.key(opf) this.#decoders.set(algorithm, blob => algo.decode(key, blob)) } this.#uris.set(uri, algorithm) } } getDecoder(uri) { return this.#decoders.get(this.#uris.get(uri)) ?? (x => x) } } class Resources { constructor({ opf, resolveHref }) { this.opf = opf const { $, $$, $$$ } = childGetter(opf, NS.OPF) const $manifest = $(opf.documentElement, 'manifest') const $spine = $(opf.documentElement, 'spine') const $$itemref = $$($spine, 'itemref') this.manifest = $$($manifest, 'item') .map(getAttributes('href', 'id', 'media-type', 'properties', 'media-overlay')) .map(item => { item.href = resolveHref(item.href) item.properties = item.properties?.split(/\s/) return item }) this.spine = $$itemref .map(getAttributes('idref', 'id', 'linear', 'properties')) .map(item => (item.properties = item.properties?.split(/\s/), item)) this.pageProgressionDirection = $spine .getAttribute('page-progression-direction') this.navPath = this.getItemByProperty('nav')?.href this.ncxPath = (this.getItemByID($spine.getAttribute('toc')) ?? this.manifest.find(item => item.mediaType === MIME.NCX))?.href const $guide = $(opf.documentElement, 'guide') if ($guide) this.guide = $$($guide, 'reference') .map(getAttributes('type', 'title', 'href')) .map(({ type, title, href }) => ({ label: title, type: type.split(/\s/), href: resolveHref(href), })) this.cover = this.getItemByProperty('cover-image') // EPUB 2 compat ?? this.getItemByID($$$(opf, 'meta') .find(filterAttribute('name', 'cover')) ?.getAttribute('content')) ?? this.manifest.find(item => item.href.includes('cover') && item.mediaType.startsWith('image')) ?? this.getItemByHref(this.guide ?.find(ref => ref.type.includes('cover'))?.href) this.cfis = CFI.fromElements($$itemref) } getItemByID(id) { return this.manifest.find(item => item.id === id) } getItemByHref(href) { return this.manifest.find(item => item.href === href) } getItemByProperty(prop) { return this.manifest.find(item => item.properties?.includes(prop)) } resolveCFI(cfi) { const parts = CFI.parse(cfi) const top = (parts.parent ?? parts).shift() let $itemref = CFI.toElement(this.opf, top) // make sure it's an idref; if not, try again without the ID assertion // mainly because Epub.js used to generate wrong ID assertions // https://github.com/futurepress/epub.js/issues/1236 if ($itemref && $itemref.nodeName !== 'idref') { top.at(-1).id = null $itemref = CFI.toElement(this.opf, top) } const idref = $itemref?.getAttribute('idref') const index = this.spine.findIndex(item => item.idref === idref) const anchor = doc => CFI.toRange(doc, parts) return { index, anchor } } } class Loader { #cache = new Map() #children = new Map() #refCount = new Map() allowScript = false eventTarget = new EventTarget() constructor({ loadText, loadBlob, resources }) { this.loadText = loadText this.loadBlob = loadBlob this.manifest = resources.manifest this.assets = resources.manifest // needed only when replacing in (X)HTML w/o parsing (see below) //.filter(({ mediaType }) => ![MIME.XHTML, MIME.HTML].includes(mediaType)) } async createURL(href, data, type, parent) { if (!data) return '' const detail = { data, type } Object.defineProperty(detail, 'name', { value: href }) // readonly const event = new CustomEvent('data', { detail }) this.eventTarget.dispatchEvent(event) const newData = await event.detail.data const newType = await event.detail.type const url = URL.createObjectURL(new Blob([newData], { type: newType })) this.#cache.set(href, url) this.#refCount.set(href, 1) if (parent) { const childList = this.#children.get(parent) if (childList) childList.push(href) else this.#children.set(parent, [href]) } return url } ref(href, parent) { const childList = this.#children.get(parent) if (!childList?.includes(href)) { this.#refCount.set(href, this.#refCount.get(href) + 1) //console.log(`referencing ${href}, now ${this.#refCount.get(href)}`) if (childList) childList.push(href) else this.#children.set(parent, [href]) } return this.#cache.get(href) } unref(href) { if (!this.#refCount.has(href)) return const count = this.#refCount.get(href) - 1 //console.log(`unreferencing ${href}, now ${count}`) if (count < 1) { //console.log(`unloading ${href}`) URL.revokeObjectURL(this.#cache.get(href)) this.#cache.delete(href) this.#refCount.delete(href) // unref children const childList = this.#children.get(href) if (childList) while (childList.length) this.unref(childList.pop()) this.#children.delete(href) } else this.#refCount.set(href, count) } // load manifest item, recursively loading all resources as needed async loadItem(item, parents = []) { if (!item) return null const { href, mediaType } = item const isScript = MIME.JS.test(item.mediaType) if (isScript && !this.allowScript) return null const parent = parents.at(-1) if (this.#cache.has(href)) return this.ref(href, parent) const shouldReplace = (isScript || [MIME.XHTML, MIME.HTML, MIME.CSS, MIME.SVG].includes(mediaType)) // prevent circular references && parents.every(p => p !== href) if (shouldReplace) return this.loadReplaced(item, parents) // NOTE: this can be replaced with `Promise.try()` const tryLoadBlob = Promise.resolve().then(() => this.loadBlob(href)) return this.createURL(href, tryLoadBlob, mediaType, parent) } async loadHref(href, base, parents = []) { if (isExternal(href)) return href const path = resolveURL(href, base) const item = this.manifest.find(item => item.href === path) if (!item) return href return this.loadItem(item, parents.concat(base)) } async loadReplaced(item, parents = []) { const { href, mediaType } = item const parent = parents.at(-1) let str = '' try { str = await this.loadText(href) } catch (e) { return this.createURL(href, Promise.reject(e), mediaType, parent) } if (!str) return null // note that one can also just use `replaceString` for everything: // ``` // const replaced = await this.replaceString(str, href, parents) // return this.createURL(href, replaced, mediaType, parent) // ``` // which is basically what Epub.js does, which is simpler, but will // break things like iframes (because you don't want to replace links) // or text that just happen to be paths // parse and replace in HTML if ([MIME.XHTML, MIME.HTML, MIME.SVG].includes(mediaType)) { let doc = new DOMParser().parseFromString(str, mediaType) // change to HTML if it's not valid XHTML if (mediaType === MIME.XHTML && (doc.querySelector('parsererror') || !doc.documentElement?.namespaceURI)) { console.warn(doc.querySelector('parsererror')?.innerText ?? 'Invalid XHTML') item.mediaType = MIME.HTML doc = new DOMParser().parseFromString(str, item.mediaType) } // replace hrefs in XML processing instructions // this is mainly for SVGs that use xml-stylesheet if ([MIME.XHTML, MIME.SVG].includes(item.mediaType)) { let child = doc.firstChild while (child instanceof ProcessingInstruction) { if (child.data) { const replacedData = await replaceSeries(child.data, /(?:^|\s*)(href\s*=\s*['"])([^'"]*)(['"])/i, (_, p1, p2, p3) => this.loadHref(p2, href, parents) .then(p2 => `${p1}${p2}${p3}`)) child.replaceWith(doc.createProcessingInstruction( child.target, replacedData)) } child = child.nextSibling } } // replace hrefs (excluding anchors) // TODO: srcset? const replace = async (el, attr) => el.setAttribute(attr, await this.loadHref(el.getAttribute(attr), href, parents)) for (const el of doc.querySelectorAll('link[href]')) await replace(el, 'href') for (const el of doc.querySelectorAll('[src]')) await replace(el, 'src') for (const el of doc.querySelectorAll('[poster]')) await replace(el, 'poster') for (const el of doc.querySelectorAll('object[data]')) await replace(el, 'data') for (const el of doc.querySelectorAll('[*|href]:not([href])')) el.setAttributeNS(NS.XLINK, 'href', await this.loadHref( el.getAttributeNS(NS.XLINK, 'href'), href, parents)) // replace inline styles for (const el of doc.querySelectorAll('style')) if (el.textContent) el.textContent = await this.replaceCSS(el.textContent, href, parents) for (const el of doc.querySelectorAll('[style]')) el.setAttribute('style', await this.replaceCSS(el.getAttribute('style'), href, parents)) // TODO: replace inline scripts? probably not worth the trouble const result = new XMLSerializer().serializeToString(doc) return this.createURL(href, result, item.mediaType, parent) } const result = mediaType === MIME.CSS ? await this.replaceCSS(str, href, parents) : await this.replaceString(str, href, parents) return this.createURL(href, result, mediaType, parent) } async replaceCSS(str, href, parents = []) { const replacedUrls = await replaceSeries(str, /url\(\s*["']?([^'"\n]*?)\s*["']?\s*\)/gi, (_, url) => this.loadHref(url, href, parents) .then(url => `url("${url}")`)) // apart from `url()`, strings can be used for `@import` (but why?!) return replaceSeries(replacedUrls, /@import\s*["']([^"'\n]*?)["']/gi, (_, url) => this.loadHref(url, href, parents) .then(url => `@import "${url}"`)) } // find & replace all possible relative paths for all assets without parsing replaceString(str, href, parents = []) { const assetMap = new Map() const urls = this.assets.map(asset => { // do not replace references to the file itself if (asset.href === href) return // href was decoded and resolved when parsing the manifest const relative = pathRelative(pathDirname(href), asset.href) const relativeEnc = encodeURI(relative) const rootRelative = '/' + asset.href const rootRelativeEnc = encodeURI(rootRelative) const set = new Set([relative, relativeEnc, rootRelative, rootRelativeEnc]) for (const url of set) assetMap.set(url, asset) return Array.from(set) }).flat().filter(x => x) if (!urls.length) return str const regex = new RegExp(urls.map(regexEscape).join('|'), 'g') return replaceSeries(str, regex, async match => this.loadItem(assetMap.get(match.replace(/^\//, '')), parents.concat(href))) } unloadItem(item) { this.unref(item?.href) } destroy() { for (const url of this.#cache.values()) URL.revokeObjectURL(url) } } const getHTMLFragment = (doc, id) => doc.getElementById(id) ?? doc.querySelector(`[name="${CSS.escape(id)}"]`) const getPageSpread = properties => { for (const p of properties) { if (p === 'page-spread-left' || p === 'rendition:page-spread-left') return 'left' if (p === 'page-spread-right' || p === 'rendition:page-spread-right') return 'right' if (p === 'rendition:page-spread-center') return 'center' } } const getDisplayOptions = doc => { if (!doc) return null return { fixedLayout: getElementText(doc.querySelector('option[name="fixed-layout"]')), openToSpread: getElementText(doc.querySelector('option[name="open-to-spread"]')), } } export class EPUB { parser = new DOMParser() #loader #encryption constructor({ loadText, loadBlob, getSize, sha1 }) { this.loadText = loadText this.loadBlob = loadBlob this.getSize = getSize this.#encryption = new Encryption(deobfuscators(sha1)) } async #loadXML(uri) { const str = await this.loadText(uri) if (!str) return null const doc = this.parser.parseFromString(str, MIME.XML) if (doc.querySelector('parsererror')) throw new Error(`XML parsing error: ${uri} ${doc.querySelector('parsererror').innerText}`) return doc } async init() { const $container = await this.#loadXML('META-INF/container.xml') if (!$container) throw new Error('Failed to load container file') const opfs = Array.from( $container.getElementsByTagNameNS(NS.CONTAINER, 'rootfile'), getAttributes('full-path', 'media-type')) .filter(file => file.mediaType === 'application/oebps-package+xml') if (!opfs.length) throw new Error('No package document defined in container') const opfPath = opfs[0].fullPath const opf = await this.#loadXML(opfPath) if (!opf) throw new Error('Failed to load package document') const $encryption = await this.#loadXML('META-INF/encryption.xml') await this.#encryption.init($encryption, opf) this.resources = new Resources({ opf, resolveHref: url => resolveURL(url, opfPath), }) this.#loader = new Loader({ loadText: this.loadText, loadBlob: uri => Promise.resolve(this.loadBlob(uri)) .then(this.#encryption.getDecoder(uri)), resources: this.resources, }) this.transformTarget = this.#loader.eventTarget this.sections = this.resources.spine.map((spineItem, index) => { const { idref, linear, properties = [] } = spineItem const item = this.resources.getItemByID(idref) if (!item) { console.warn(`Could not find item with ID "${idref}" in manifest`) return null } return { id: item.href, load: () => this.#loader.loadItem(item), unload: () => this.#loader.unloadItem(item), createDocument: () => this.loadDocument(item), size: this.getSize(item.href), cfi: this.resources.cfis[index], linear, pageSpread: getPageSpread(properties), resolveHref: href => resolveURL(href, item.href), mediaOverlay: item.mediaOverlay ? this.resources.getItemByID(item.mediaOverlay) : null, } }).filter(s => s) const { navPath, ncxPath } = this.resources if (navPath) try { const resolve = url => resolveURL(url, navPath) const nav = parseNav(await this.#loadXML(navPath), resolve) this.toc = nav.toc this.pageList = nav.pageList this.landmarks = nav.landmarks } catch(e) { console.warn(e) } if (!this.toc && ncxPath) try { const resolve = url => resolveURL(url, ncxPath) const ncx = parseNCX(await this.#loadXML(ncxPath), resolve) this.toc = ncx.toc this.pageList = ncx.pageList } catch(e) { console.warn(e) } this.landmarks ??= this.resources.guide const { metadata, rendition, media } = getMetadata(opf) this.metadata = metadata this.rendition = rendition this.media = media this.dir = this.resources.pageProgressionDirection const displayOptions = getDisplayOptions( await this.#loadXML('META-INF/com.apple.ibooks.display-options.xml') ?? await this.#loadXML('META-INF/com.kobobooks.display-options.xml')) if (displayOptions) { if (displayOptions.fixedLayout === 'true') this.rendition.layout ??= 'pre-paginated' if (displayOptions.openToSpread === 'false') this.sections .find(section => section.linear !== 'no').pageSpread ??= this.dir === 'rtl' ? 'left' : 'right' } return this } async loadDocument(item) { const str = await this.loadText(item.href) return this.parser.parseFromString(str, item.mediaType) } getMediaOverlay() { return new MediaOverlay(this, this.#loadXML.bind(this)) } resolveCFI(cfi) { return this.resources.resolveCFI(cfi) } resolveHref(href) { const [path, hash] = href.split('#') const item = this.resources.getItemByHref(decodeURI(path)) if (!item) return null const index = this.resources.spine.findIndex(({ idref }) => idref === item.id) const anchor = hash ? doc => getHTMLFragment(doc, hash) : () => 0 return { index, anchor } } splitTOCHref(href) { return href?.split('#') ?? [] } getTOCFragment(doc, id) { return doc.getElementById(id) ?? doc.querySelector(`[name="${CSS.escape(id)}"]`) } isExternal(uri) { return isExternal(uri) } async getCover() { const cover = this.resources?.cover return cover?.href ? new Blob([await this.loadBlob(cover.href)], { type: cover.mediaType }) : null } async getCalibreBookmarks() { const txt = await this.loadText('META-INF/calibre_bookmarks.txt') const magic = 'encoding=json+base64:' if (txt?.startsWith(magic)) { const json = atob(txt.slice(magic.length)) return JSON.parse(json) } } destroy() { this.#loader?.destroy() } }