UNPKG

foliate-js

Version:
346 lines (316 loc) 13 kB
const findIndices = (arr, f) => arr .map((x, i, a) => f(x, i, a) ? i : null).filter(x => x != null) const splitAt = (arr, is) => [-1, ...is, arr.length].reduce(({ xs, a }, b) => ({ xs: xs?.concat([arr.slice(a + 1, b)]) ?? [], a: b }), {}).xs const concatArrays = (a, b) => a.slice(0, -1).concat([a[a.length - 1].concat(b[0])]).concat(b.slice(1)) const isNumber = /\d/ export const isCFI = /^epubcfi\((.*)\)$/ const escapeCFI = str => str.replace(/[\^[\](),;=]/g, '^$&') const wrap = x => isCFI.test(x) ? x : `epubcfi(${x})` const unwrap = x => x.match(isCFI)?.[1] ?? x const lift = f => (...xs) => `epubcfi(${f(...xs.map(x => x.match(isCFI)?.[1] ?? x))})` export const joinIndir = lift((...xs) => xs.join('!')) const tokenizer = str => { const tokens = [] let state, escape, value = '' const push = x => (tokens.push(x), state = null, value = '') const cat = x => (value += x, escape = false) for (const char of Array.from(str.trim()).concat('')) { if (char === '^' && !escape) { escape = true continue } if (state === '!') push(['!']) else if (state === ',') push([',']) else if (state === '/' || state === ':') { if (isNumber.test(char)) { cat(char) continue } else push([state, parseInt(value)]) } else if (state === '~') { if (isNumber.test(char) || char === '.') { cat(char) continue } else push(['~', parseFloat(value)]) } else if (state === '@') { if (char === ':') { push(['@', parseFloat(value)]) state = '@' continue } if (isNumber.test(char) || char === '.') { cat(char) continue } else push(['@', parseFloat(value)]) } else if (state === '[') { if (char === ';' && !escape) { push(['[', value]) state = ';' } else if (char === ',' && !escape) { push(['[', value]) state = '[' } else if (char === ']' && !escape) push(['[', value]) else cat(char) continue } else if (state?.startsWith(';')) { if (char === '=' && !escape) { state = `;${value}` value = '' } else if (char === ';' && !escape) { push([state, value]) state = ';' } else if (char === ']' && !escape) push([state, value]) else cat(char) continue } if (char === '/' || char === ':' || char === '~' || char === '@' || char === '[' || char === '!' || char === ',') state = char } return tokens } const findTokens = (tokens, x) => findIndices(tokens, ([t]) => t === x) const parser = tokens => { const parts = [] let state for (const [type, val] of tokens) { if (type === '/') parts.push({ index: val }) else { const last = parts[parts.length - 1] if (type === ':') last.offset = val else if (type === '~') last.temporal = val else if (type === '@') last.spatial = (last.spatial ?? []).concat(val) else if (type === ';s') last.side = val else if (type === '[') { if (state === '/' && val) last.id = val else { last.text = (last.text ?? []).concat(val) continue } } } state = type } return parts } // split at step indirections, then parse each part const parserIndir = tokens => splitAt(tokens, findTokens(tokens, '!')).map(parser) export const parse = cfi => { const tokens = tokenizer(unwrap(cfi)) const commas = findTokens(tokens, ',') if (!commas.length) return parserIndir(tokens) const [parent, start, end] = splitAt(tokens, commas).map(parserIndir) return { parent, start, end } } const partToString = ({ index, id, offset, temporal, spatial, text, side }) => { const param = side ? `;s=${side}` : '' return `/${index}` + (id ? `[${escapeCFI(id)}${param}]` : '') // "CFI expressions [..] SHOULD include an explicit character offset" + (offset != null && index % 2 ? `:${offset}` : '') + (temporal ? `~${temporal}` : '') + (spatial ? `@${spatial.join(':')}` : '') + (text || (!id && side) ? '[' + (text?.map(escapeCFI)?.join(',') ?? '') + param + ']' : '') } const toInnerString = parsed => parsed.parent ? [parsed.parent, parsed.start, parsed.end].map(toInnerString).join(',') : parsed.map(parts => parts.map(partToString).join('')).join('!') const toString = parsed => wrap(toInnerString(parsed)) export const collapse = (x, toEnd) => typeof x === 'string' ? toString(collapse(parse(x), toEnd)) : x.parent ? concatArrays(x.parent, x[toEnd ? 'end' : 'start']) : x // create range CFI from two CFIs const buildRange = (from, to) => { if (typeof from === 'string') from = parse(from) if (typeof to === 'string') to = parse(to) from = collapse(from) to = collapse(to, true) // ranges across multiple documents are not allowed; handle local paths only const localFrom = from[from.length - 1], localTo = to[to.length - 1] const localParent = [], localStart = [], localEnd = [] let pushToParent = true const len = Math.max(localFrom.length, localTo.length) for (let i = 0; i < len; i++) { const a = localFrom[i], b = localTo[i] pushToParent &&= a?.index === b?.index && !a?.offset && !b?.offset if (pushToParent) localParent.push(a) else { if (a) localStart.push(a) if (b) localEnd.push(b) } } // copy non-local paths from `from` const parent = from.slice(0, -1).concat([localParent]) return toString({ parent, start: [localStart], end: [localEnd] }) } export const compare = (a, b) => { if (typeof a === 'string') a = parse(a) if (typeof b === 'string') b = parse(b) if (a.start || b.start) return compare(collapse(a), collapse(b)) || compare(collapse(a, true), collapse(b, true)) for (let i = 0; i < Math.max(a.length, b.length); i++) { const p = a[i] ?? [], q = b[i] ?? [] const maxIndex = Math.max(p.length, q.length) - 1 for (let i = 0; i <= maxIndex; i++) { const x = p[i], y = q[i] if (!x) return -1 if (!y) return 1 if (x.index > y.index) return 1 if (x.index < y.index) return -1 if (i === maxIndex) { // TODO: compare temporal & spatial offsets if (x.offset > y.offset) return 1 if (x.offset < y.offset) return -1 } } } return 0 } const isTextNode = ({ nodeType }) => nodeType === 3 || nodeType === 4 const isElementNode = ({ nodeType }) => nodeType === 1 const getChildNodes = (node, filter) => { const nodes = Array.from(node.childNodes) // "content other than element and character data is ignored" .filter(node => isTextNode(node) || isElementNode(node)) return filter ? nodes.map(node => { const accept = filter(node) if (accept === NodeFilter.FILTER_REJECT) return null else if (accept === NodeFilter.FILTER_SKIP) return getChildNodes(node, filter) else return node }).flat().filter(x => x) : nodes } // child nodes are organized such that the result is always // [element, text, element, text, ..., element], // regardless of the actual structure in the document; // so multiple text nodes need to be combined, and nonexistent ones counted; // see "Step Reference to Child Element or Character Data (/)" in EPUB CFI spec const indexChildNodes = (node, filter) => { const nodes = getChildNodes(node, filter) .reduce((arr, node) => { let last = arr[arr.length - 1] if (!last) arr.push(node) // "there is one chunk between each pair of child elements" else if (isTextNode(node)) { if (Array.isArray(last)) last.push(node) else if (isTextNode(last)) arr[arr.length - 1] = [last, node] else arr.push(node) } else { if (isElementNode(last)) arr.push(null, node) else arr.push(node) } return arr }, []) // "the first chunk is located before the first child element" if (isElementNode(nodes[0])) nodes.unshift('first') // "the last chunk is located after the last child element" if (isElementNode(nodes[nodes.length - 1])) nodes.push('last') // "'virtual' elements" nodes.unshift('before') // "0 is a valid index" nodes.push('after') // "n+2 is a valid index" return nodes } const partsToNode = (node, parts, filter) => { const { id } = parts[parts.length - 1] if (id) { const el = node.ownerDocument.getElementById(id) if (el) return { node: el, offset: 0 } } for (const { index } of parts) { const newNode = node ? indexChildNodes(node, filter)[index] : null // handle non-existent nodes if (newNode === 'first') return { node: node.firstChild ?? node } if (newNode === 'last') return { node: node.lastChild ?? node } if (newNode === 'before') return { node, before: true } if (newNode === 'after') return { node, after: true } node = newNode } const { offset } = parts[parts.length - 1] if (!Array.isArray(node)) return { node, offset } // get underlying text node and offset from the chunk let sum = 0 for (const n of node) { const { length } = n.nodeValue if (sum + length >= offset) return { node: n, offset: offset - sum } sum += length } } const nodeToParts = (node, offset, filter) => { const { parentNode, id } = node const indexed = indexChildNodes(parentNode, filter) const index = indexed.findIndex(x => Array.isArray(x) ? x.some(x => x === node) : x === node) // adjust offset as if merging the text nodes in the chunk const chunk = indexed[index] if (Array.isArray(chunk)) { let sum = 0 for (const x of chunk) { if (x === node) { sum += offset break } else sum += x.nodeValue.length } offset = sum } const part = { id, index, offset } return (parentNode !== node.ownerDocument.documentElement ? nodeToParts(parentNode, null, filter).concat(part) : [part]) // remove ignored nodes .filter(x => x.index !== -1) } export const fromRange = (range, filter) => { const { startContainer, startOffset, endContainer, endOffset } = range const start = nodeToParts(startContainer, startOffset, filter) if (range.collapsed) return toString([start]) const end = nodeToParts(endContainer, endOffset, filter) return buildRange([start], [end]) } export const toRange = (doc, parts, filter) => { const startParts = collapse(parts) const endParts = collapse(parts, true) const root = doc.documentElement const start = partsToNode(root, startParts[0], filter) const end = partsToNode(root, endParts[0], filter) const range = doc.createRange() if (start.before) range.setStartBefore(start.node) else if (start.after) range.setStartAfter(start.node) else range.setStart(start.node, start.offset) if (end.before) range.setEndBefore(end.node) else if (end.after) range.setEndAfter(end.node) else range.setEnd(end.node, end.offset) return range } // faster way of getting CFIs for sorted elements in a single parent export const fromElements = elements => { const results = [] const { parentNode } = elements[0] const parts = nodeToParts(parentNode) for (const [index, node] of indexChildNodes(parentNode).entries()) { const el = elements[results.length] if (node === el) results.push(toString([parts.concat({ id: el.id, index })])) } return results } export const toElement = (doc, parts) => partsToNode(doc.documentElement, collapse(parts)).node // turn indices into standard CFIs when you don't have an actual package document export const fake = { fromIndex: index => wrap(`/6/${(index + 1) * 2}`), toIndex: parts => parts?.at(-1).index / 2 - 1, } // get CFI from Calibre bookmarks // see https://github.com/johnfactotum/foliate/issues/849 export const fromCalibrePos = pos => { const [parts] = parse(pos) const item = parts.shift() parts.shift() return toString([[{ index: 6 }, item], parts]) } export const fromCalibreHighlight = ({ spine_index, start_cfi, end_cfi }) => { const pre = fake.fromIndex(spine_index) + '!' return buildRange(pre + start_cfi.slice(2), pre + end_cfi.slice(2)) }