foliate-js
Version:
Render e-books in the browser
346 lines (316 loc) • 13 kB
JavaScript
const findIndices = (arr, f) => arr
.map((x, i, a) => f(x, i, a) ? i : null).filter(x => x != null)
const splitAt = (arr, is) => [-1, ...is, arr.length].reduce(({ xs, a }, b) =>
({ xs: xs?.concat([arr.slice(a + 1, b)]) ?? [], a: b }), {}).xs
const concatArrays = (a, b) =>
a.slice(0, -1).concat([a[a.length - 1].concat(b[0])]).concat(b.slice(1))
const isNumber = /\d/
export const isCFI = /^epubcfi\((.*)\)$/
const escapeCFI = str => str.replace(/[\^[\](),;=]/g, '^$&')
const wrap = x => isCFI.test(x) ? x : `epubcfi(${x})`
const unwrap = x => x.match(isCFI)?.[1] ?? x
const lift = f => (...xs) =>
`epubcfi(${f(...xs.map(x => x.match(isCFI)?.[1] ?? x))})`
export const joinIndir = lift((...xs) => xs.join('!'))
const tokenizer = str => {
const tokens = []
let state, escape, value = ''
const push = x => (tokens.push(x), state = null, value = '')
const cat = x => (value += x, escape = false)
for (const char of Array.from(str.trim()).concat('')) {
if (char === '^' && !escape) {
escape = true
continue
}
if (state === '!') push(['!'])
else if (state === ',') push([','])
else if (state === '/' || state === ':') {
if (isNumber.test(char)) {
cat(char)
continue
} else push([state, parseInt(value)])
} else if (state === '~') {
if (isNumber.test(char) || char === '.') {
cat(char)
continue
} else push(['~', parseFloat(value)])
} else if (state === '@') {
if (char === ':') {
push(['@', parseFloat(value)])
state = '@'
continue
}
if (isNumber.test(char) || char === '.') {
cat(char)
continue
} else push(['@', parseFloat(value)])
} else if (state === '[') {
if (char === ';' && !escape) {
push(['[', value])
state = ';'
} else if (char === ',' && !escape) {
push(['[', value])
state = '['
} else if (char === ']' && !escape) push(['[', value])
else cat(char)
continue
} else if (state?.startsWith(';')) {
if (char === '=' && !escape) {
state = `;${value}`
value = ''
} else if (char === ';' && !escape) {
push([state, value])
state = ';'
} else if (char === ']' && !escape) push([state, value])
else cat(char)
continue
}
if (char === '/' || char === ':' || char === '~' || char === '@'
|| char === '[' || char === '!' || char === ',') state = char
}
return tokens
}
const findTokens = (tokens, x) => findIndices(tokens, ([t]) => t === x)
const parser = tokens => {
const parts = []
let state
for (const [type, val] of tokens) {
if (type === '/') parts.push({ index: val })
else {
const last = parts[parts.length - 1]
if (type === ':') last.offset = val
else if (type === '~') last.temporal = val
else if (type === '@') last.spatial = (last.spatial ?? []).concat(val)
else if (type === ';s') last.side = val
else if (type === '[') {
if (state === '/' && val) last.id = val
else {
last.text = (last.text ?? []).concat(val)
continue
}
}
}
state = type
}
return parts
}
// split at step indirections, then parse each part
const parserIndir = tokens =>
splitAt(tokens, findTokens(tokens, '!')).map(parser)
export const parse = cfi => {
const tokens = tokenizer(unwrap(cfi))
const commas = findTokens(tokens, ',')
if (!commas.length) return parserIndir(tokens)
const [parent, start, end] = splitAt(tokens, commas).map(parserIndir)
return { parent, start, end }
}
const partToString = ({ index, id, offset, temporal, spatial, text, side }) => {
const param = side ? `;s=${side}` : ''
return `/${index}`
+ (id ? `[${escapeCFI(id)}${param}]` : '')
// "CFI expressions [..] SHOULD include an explicit character offset"
+ (offset != null && index % 2 ? `:${offset}` : '')
+ (temporal ? `~${temporal}` : '')
+ (spatial ? `@${spatial.join(':')}` : '')
+ (text || (!id && side) ? '['
+ (text?.map(escapeCFI)?.join(',') ?? '')
+ param + ']' : '')
}
const toInnerString = parsed => parsed.parent
? [parsed.parent, parsed.start, parsed.end].map(toInnerString).join(',')
: parsed.map(parts => parts.map(partToString).join('')).join('!')
const toString = parsed => wrap(toInnerString(parsed))
export const collapse = (x, toEnd) => typeof x === 'string'
? toString(collapse(parse(x), toEnd))
: x.parent ? concatArrays(x.parent, x[toEnd ? 'end' : 'start']) : x
// create range CFI from two CFIs
const buildRange = (from, to) => {
if (typeof from === 'string') from = parse(from)
if (typeof to === 'string') to = parse(to)
from = collapse(from)
to = collapse(to, true)
// ranges across multiple documents are not allowed; handle local paths only
const localFrom = from[from.length - 1], localTo = to[to.length - 1]
const localParent = [], localStart = [], localEnd = []
let pushToParent = true
const len = Math.max(localFrom.length, localTo.length)
for (let i = 0; i < len; i++) {
const a = localFrom[i], b = localTo[i]
pushToParent &&= a?.index === b?.index && !a?.offset && !b?.offset
if (pushToParent) localParent.push(a)
else {
if (a) localStart.push(a)
if (b) localEnd.push(b)
}
}
// copy non-local paths from `from`
const parent = from.slice(0, -1).concat([localParent])
return toString({ parent, start: [localStart], end: [localEnd] })
}
export const compare = (a, b) => {
if (typeof a === 'string') a = parse(a)
if (typeof b === 'string') b = parse(b)
if (a.start || b.start) return compare(collapse(a), collapse(b))
|| compare(collapse(a, true), collapse(b, true))
for (let i = 0; i < Math.max(a.length, b.length); i++) {
const p = a[i] ?? [], q = b[i] ?? []
const maxIndex = Math.max(p.length, q.length) - 1
for (let i = 0; i <= maxIndex; i++) {
const x = p[i], y = q[i]
if (!x) return -1
if (!y) return 1
if (x.index > y.index) return 1
if (x.index < y.index) return -1
if (i === maxIndex) {
// TODO: compare temporal & spatial offsets
if (x.offset > y.offset) return 1
if (x.offset < y.offset) return -1
}
}
}
return 0
}
const isTextNode = ({ nodeType }) => nodeType === 3 || nodeType === 4
const isElementNode = ({ nodeType }) => nodeType === 1
const getChildNodes = (node, filter) => {
const nodes = Array.from(node.childNodes)
// "content other than element and character data is ignored"
.filter(node => isTextNode(node) || isElementNode(node))
return filter ? nodes.map(node => {
const accept = filter(node)
if (accept === NodeFilter.FILTER_REJECT) return null
else if (accept === NodeFilter.FILTER_SKIP) return getChildNodes(node, filter)
else return node
}).flat().filter(x => x) : nodes
}
// child nodes are organized such that the result is always
// [element, text, element, text, ..., element],
// regardless of the actual structure in the document;
// so multiple text nodes need to be combined, and nonexistent ones counted;
// see "Step Reference to Child Element or Character Data (/)" in EPUB CFI spec
const indexChildNodes = (node, filter) => {
const nodes = getChildNodes(node, filter)
.reduce((arr, node) => {
let last = arr[arr.length - 1]
if (!last) arr.push(node)
// "there is one chunk between each pair of child elements"
else if (isTextNode(node)) {
if (Array.isArray(last)) last.push(node)
else if (isTextNode(last)) arr[arr.length - 1] = [last, node]
else arr.push(node)
} else {
if (isElementNode(last)) arr.push(null, node)
else arr.push(node)
}
return arr
}, [])
// "the first chunk is located before the first child element"
if (isElementNode(nodes[0])) nodes.unshift('first')
// "the last chunk is located after the last child element"
if (isElementNode(nodes[nodes.length - 1])) nodes.push('last')
// "'virtual' elements"
nodes.unshift('before') // "0 is a valid index"
nodes.push('after') // "n+2 is a valid index"
return nodes
}
const partsToNode = (node, parts, filter) => {
const { id } = parts[parts.length - 1]
if (id) {
const el = node.ownerDocument.getElementById(id)
if (el) return { node: el, offset: 0 }
}
for (const { index } of parts) {
const newNode = node ? indexChildNodes(node, filter)[index] : null
// handle non-existent nodes
if (newNode === 'first') return { node: node.firstChild ?? node }
if (newNode === 'last') return { node: node.lastChild ?? node }
if (newNode === 'before') return { node, before: true }
if (newNode === 'after') return { node, after: true }
node = newNode
}
const { offset } = parts[parts.length - 1]
if (!Array.isArray(node)) return { node, offset }
// get underlying text node and offset from the chunk
let sum = 0
for (const n of node) {
const { length } = n.nodeValue
if (sum + length >= offset) return { node: n, offset: offset - sum }
sum += length
}
}
const nodeToParts = (node, offset, filter) => {
const { parentNode, id } = node
const indexed = indexChildNodes(parentNode, filter)
const index = indexed.findIndex(x =>
Array.isArray(x) ? x.some(x => x === node) : x === node)
// adjust offset as if merging the text nodes in the chunk
const chunk = indexed[index]
if (Array.isArray(chunk)) {
let sum = 0
for (const x of chunk) {
if (x === node) {
sum += offset
break
} else sum += x.nodeValue.length
}
offset = sum
}
const part = { id, index, offset }
return (parentNode !== node.ownerDocument.documentElement
? nodeToParts(parentNode, null, filter).concat(part) : [part])
// remove ignored nodes
.filter(x => x.index !== -1)
}
export const fromRange = (range, filter) => {
const { startContainer, startOffset, endContainer, endOffset } = range
const start = nodeToParts(startContainer, startOffset, filter)
if (range.collapsed) return toString([start])
const end = nodeToParts(endContainer, endOffset, filter)
return buildRange([start], [end])
}
export const toRange = (doc, parts, filter) => {
const startParts = collapse(parts)
const endParts = collapse(parts, true)
const root = doc.documentElement
const start = partsToNode(root, startParts[0], filter)
const end = partsToNode(root, endParts[0], filter)
const range = doc.createRange()
if (start.before) range.setStartBefore(start.node)
else if (start.after) range.setStartAfter(start.node)
else range.setStart(start.node, start.offset)
if (end.before) range.setEndBefore(end.node)
else if (end.after) range.setEndAfter(end.node)
else range.setEnd(end.node, end.offset)
return range
}
// faster way of getting CFIs for sorted elements in a single parent
export const fromElements = elements => {
const results = []
const { parentNode } = elements[0]
const parts = nodeToParts(parentNode)
for (const [index, node] of indexChildNodes(parentNode).entries()) {
const el = elements[results.length]
if (node === el)
results.push(toString([parts.concat({ id: el.id, index })]))
}
return results
}
export const toElement = (doc, parts) =>
partsToNode(doc.documentElement, collapse(parts)).node
// turn indices into standard CFIs when you don't have an actual package document
export const fake = {
fromIndex: index => wrap(`/6/${(index + 1) * 2}`),
toIndex: parts => parts?.at(-1).index / 2 - 1,
}
// get CFI from Calibre bookmarks
// see https://github.com/johnfactotum/foliate/issues/849
export const fromCalibrePos = pos => {
const [parts] = parse(pos)
const item = parts.shift()
parts.shift()
return toString([[{ index: 6 }, item], parts])
}
export const fromCalibreHighlight = ({ spine_index, start_cfi, end_cfi }) => {
const pre = fake.fromIndex(spine_index) + '!'
return buildRange(pre + start_cfi.slice(2), pre + end_cfi.slice(2))
}