UNPKG

hyper-readings

Version:

a tool for making and sharing p2p distributed reading lists

149 lines (137 loc) 4.62 kB
import parse5 from 'parse5' import { /* attr, */ textContent } from './parse5Helpers' import arrayToTree from '../array-to-tree' function createTypeDescriptionStatement (type) { return async (hr, node) => { const n = await hr.createNode(type) return { node: n } } } function createTypeDescriptionStatementWithTextContent (type) { return async (hr, node) => { const n = await hr.createNode(type, { 'c4o:hasContent': textContent(node) }) return { node: n, terminal: true } } } var describeParagraph = createTypeDescriptionStatementWithTextContent('doco:Paragraph') var describeTitle = createTypeDescriptionStatementWithTextContent('doco:Title') var describeSection = createTypeDescriptionStatement('doco:Section') // var describeInlineElement = createTypeDescriptionStatement('po:Inline') var describeHTML = createTypeDescriptionStatement('hr:root') var describeBody = createTypeDescriptionStatement('hr:body') var describeHead = createTypeDescriptionStatement('hr:head') // TODO: bring this back, but as annotations on text sections // async function describeLink (hr, node) { // const href = attr(node, 'href') // if (!href) return // const identifer = await hr.createNode('datacite:AlternateResourceIdentifier') // await identifer.set('cito:usesIdentifierScheme', 'datacite:url') // await identifer.set('rdf:value', href.value) // const linkNode = await hr.createNode('po:Inline') // await linkNode.set('cito:hasIdentifier', identifer) // return linkNode // } // async function describeText (hr, node) { // const text = await hr.createNode('doco:TextChunk') // await text.set('rdf:value', node.value) // return text // } // async function describeSpan (hr, node) { // switch (attr(node, 'data-type')) { // case 'comment': { // const text = attr(node, 'data-comment') // const comment = await hr.createNode('fabio:Comment') // await comment.set('c4o:hasContent', text) // return comment // } // default: { // return hr.createNode('po:Inline') // } // } // } function isEmptyNode (node) { return !node.childNodes && node.value && /^\s*$/.test(node.value) } // TYPES var htmlNodeMappings = { 'html': describeHTML, 'head': describeHead, 'body': describeBody, 'section': describeSection, 'p': describeParagraph, 'h1': describeTitle, 'h2': describeTitle, 'h3': describeTitle, 'h4': describeTitle, 'h5': describeTitle, 'h6': describeTitle // '#text': describeText, // TODO: implement inline elements as annotations on content // 'em': describeInlineElement, // 'i': describeInlineElement, // 'strong': describeInlineElement, // 'b': describeInlineElement, // 'a': describeLink, // 'span': describeSpan } var defaultMapping = function (hr, node) { // make this inherited from parent / inline or block return hr.createNode('po:Block') } const headerRegex = /^h(\d+)$/ const headingSplitter = (value) => { const match = value.nodeName.match(headerRegex) return match && match[1] } const sectionTransform = (array) => ({ nodeName: 'section', tagName: 'section', childNodes: array, inferred: true }) function addInferredSections (nodes) { return arrayToTree(nodes, headingSplitter, sectionTransform) } export default async function htmlImporter (hr, html) { var doc = parse5.parse(html) var stack = [] await walk(doc.childNodes[0]) while (stack.length) { var data = stack.shift() await walk(data.node, data.context) } return hr // end function nextNodes (parent, nodes) { // prepopulate this node with the expected content const filteredNodes = nodes .filter(node => !isEmptyNode(node)) if (!filteredNodes.length) return [] // return context for next nodes return filteredNodes.map((node, i) => { return { node, context: { parent } } }) } async function walk (node, context) { if (isEmptyNode(node)) { return } // process individual node var mapper = htmlNodeMappings[node.nodeName] || defaultMapping // add all children to the queue to be processed next var { node: hrNode, terminal } = await mapper(hr, node) if (context && context.parent && context.parent.insertNode) await context.parent.insertNode(hrNode) if (terminal) return if (!node.childNodes && node.childNodes.length === 0) return if (!node.inferred) node.childNodes = addInferredSections(node.childNodes) const next = nextNodes(hrNode, node.childNodes) // push next to stack // console.log(next) Array.prototype.push.apply(stack, next) } }