UNPKG

alto-xml

Version:

Utilities for accessing ALTO XML documents

150 lines (116 loc) 2.81 kB
import { parse, serialize } from './dom.js' import { intersects } from './util.js' const WM = new WeakMap() export class AltoElement { static parse(string) { return new this(parse(string)) } static for(node) { return node != null ? (WM.get(node) || new this(node)) : null } constructor(node) { if (node == null) throw new Error('backing xml node missing') if (WM.has(node)) throw new Error('element for node already exists') WM.set(node, this) this.node = node } get parent() { return WM.get(this.node.parentElement) } get document() { return WM.get(this.node.ownerDocument) } get(...args) { return this.node.querySelector(...args) } query(...args) { return this.node.querySelectorAll(...args) } attr(name) { return this.node.getAttribute(name) } toString() { return serialize(this.node) } } export class LayoutElement extends AltoElement { get scale () { return this.document?.scale } get WIDTH() { return parseFloat(this.attr('WIDTH')) } get HEIGHT() { return parseFloat(this.attr('HEIGHT')) } get HPOS() { return parseFloat(this.attr('HPOS')) } get VPOS() { return parseFloat(this.attr('VPOS')) } bounds(scale = this.scale) { let { HPOS, VPOS, WIDTH, HEIGHT } = this if (Number.isNaN(HPOS + VPOS + WIDTH + HEIGHT)) return null let x = scale?.x ?? 1 let y = scale?.y ?? 1 return { x: HPOS * x, y: VPOS * y, width: WIDTH * x, height: HEIGHT * y } } intersects(rect, ...args) { if (!rect) return false if (rect === true) return true return rect && intersects(rect, this.bounds(...args)) } } export class Page extends LayoutElement { } export class TextBlock extends LayoutElement { *lines() { for (let node of this.query(':scope > TextLine')) yield TextLine.for(node) } [Symbol.iterator]() { return this.lines() } } export class TextLine extends LayoutElement { previous() { let previous = this.node.previousElementSibling return (previous?.nodeName.toLowerCase() === 'textline') ? TextLine.for(previous) : null } next() { let next = this.node.nextElementSibling return (next?.nodeName.toLowerCase() === 'textline') ? TextLine.for(next) : null } first() { return String.for(this.get(':scope > String:first-child')) } last() { return String.for(this.get(':scope > String:last-child')) } *strings() { for (let node of this.query(':scope > String')) yield String.for(node) } [Symbol.iterator]() { return this.strings() } } export class String extends LayoutElement { get CONTENT() { return this.attr('CONTENT') } toPlainText() { return this.CONTENT } }