UNPKG

pomljs

Version:

Prompt Orchestration Markup Language

1 lines 112 kB
{"version":3,"file":"writer.cjs","sources":["../.build/writer.js"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport * as xmlbuilder from 'xmlbuilder2';\nimport { ErrorCollection, SystemError, ValidSpeakers, WriteError, richContentFromSourceMap } from './base';\nimport yaml from 'js-yaml';\n// Use the special character to indicate a placeholder for multimedia.\nconst SPECIAL_CHARACTER = 'À';\nclass Writer {\n ir = '';\n options;\n constructor(ir, options) {\n if (ir) {\n this.reset(ir);\n }\n this.options = this.initializeOptions(options);\n }\n initializeOptions(options) {\n return options || {};\n }\n reset(ir) {\n this.ir = ir;\n }\n createMappingNode(element, outputLength) {\n const parseAttrAsInt = (attrName) => {\n const attrValue = element.attr(attrName);\n return attrValue !== undefined && !isNaN(parseInt(attrValue, 10))\n ? parseInt(attrValue, 10)\n : undefined;\n };\n return {\n originalStart: parseAttrAsInt('original-start-index'),\n originalEnd: parseAttrAsInt('original-end-index'),\n inputStart: element[0].startIndex,\n inputEnd: element[0].endIndex,\n outputStart: 0,\n outputEnd: outputLength - 1\n };\n }\n /**\n * Add an offset to mapping nodes.\n *\n * @param mappings - Original mappings.\n * @param indent - The offset amount.\n * @param ignoreBefore - Ignore the mappings before this index.\n * @returns - The new mappings.\n */\n indentMappings(mappings, indent, ignoreBefore) {\n return mappings.map(mapping => {\n return {\n ...mapping,\n outputStart: mapping.outputStart >= ignoreBefore ? mapping.outputStart + indent : mapping.outputStart,\n outputEnd: mapping.outputStart >= ignoreBefore ? mapping.outputEnd + indent : mapping.outputEnd\n };\n });\n }\n indentMultiMedia(multimedia, indent, ignoreBefore) {\n return multimedia.map(media => {\n return {\n ...media,\n index: media.index >= ignoreBefore ? media.index + indent : media.index\n };\n });\n }\n raiseError(message, element) {\n const parseAttrAsInt = (attrName) => {\n const attrValue = element.attr(attrName);\n return attrValue !== undefined && !isNaN(parseInt(attrValue, 10))\n ? parseInt(attrValue, 10)\n : undefined;\n };\n const emptyOutput = {\n output: '',\n multimedia: [],\n mappings: []\n };\n if (element.length === 0) {\n // Ignore the error if the element is not even ready\n return emptyOutput;\n }\n ErrorCollection.add(new WriteError(message, parseAttrAsInt('original-start-index'), parseAttrAsInt('original-end-index'), element[0].sourcePath, element[0].startIndex, element[0].endIndex, this.ir));\n return emptyOutput;\n }\n writeElementTree(element, $) {\n throw new SystemError('Method not implemented.');\n }\n /**\n * Convert an IR string into {@link RichContent} without exposing mapping information.\n *\n * The method delegates to {@link writeWithSourceMap} and then collapses the\n * returned segments back into a single rich content value.\n */\n write(ir) {\n const segments = this.writeWithSourceMap(ir);\n return richContentFromSourceMap(segments);\n }\n /**\n * Convert an IR string into an array of speaker messages.\n *\n * It internally uses {@link writeMessagesWithSourceMap} and removes the\n * mapping information from each message.\n */\n writeMessages(ir) {\n const messages = this.writeMessagesWithSourceMap(ir);\n return messages.map(m => ({\n speaker: m.speaker,\n content: richContentFromSourceMap(m.content)\n }));\n }\n assignSpeakers(result, $) {\n const speakers = [];\n let defaultSpeaker = 'system';\n let systemSpeakerSpecified = false;\n const segments = [];\n const querySegmentFromMapping = (startIndex, endIndex) => {\n return result.mappings.find(segment => segment.inputStart === startIndex && segment.inputEnd === endIndex);\n };\n const getSpecifiedSpeaker = (element) => {\n const speaker = element.attr('speaker');\n if (speaker && !ValidSpeakers.includes(speaker)) {\n this.raiseError(`\"${speaker}\" is not a valid speaker.`, element);\n return undefined;\n }\n return speaker;\n };\n const assignSpeakerForElement = (element, inheritedSpeaker) => {\n let specifiedSpeaker = getSpecifiedSpeaker(element);\n if (specifiedSpeaker === 'system') {\n systemSpeakerSpecified = true;\n }\n // When human has appeared, the default speaker becomes human.\n if (specifiedSpeaker == 'human' && defaultSpeaker == 'system') {\n defaultSpeaker = 'human';\n }\n if (element.length === 0) {\n return;\n }\n const segment = querySegmentFromMapping(element[0].startIndex, element[0].endIndex);\n if (specifiedSpeaker && !segment) {\n console.warn(`Speaker is specified but no exact corresponding output can be found in ${element.html()}`);\n }\n const speaker = specifiedSpeaker || inheritedSpeaker || defaultSpeaker;\n if (segment) {\n segments.push({ start: segment.outputStart, end: segment.outputEnd, speaker });\n }\n if (specifiedSpeaker) {\n inheritedSpeaker = specifiedSpeaker;\n }\n element.children().each((_, child) => {\n const speaker = getSpecifiedSpeaker($(child));\n if (speaker) {\n inheritedSpeaker = speaker;\n }\n assignSpeakerForElement($(child), inheritedSpeaker);\n });\n };\n assignSpeakerForElement(this.getRoot($), undefined);\n const allIndicesSet = new Set();\n segments.forEach(segment => {\n allIndicesSet.add(segment.start);\n allIndicesSet.add(segment.end);\n });\n const essentialIndices = Array.from(allIndicesSet).sort((a, b) => a - b);\n const colorSpeakers = new Array(essentialIndices.length).fill('system');\n segments.forEach(segment => {\n const startIndex = essentialIndices.findIndex(index => index == segment.start);\n const endIndex = essentialIndices.findIndex(index => index == segment.end);\n for (let i = startIndex; i <= endIndex; i++) {\n colorSpeakers[i] = segment.speaker;\n }\n });\n let currentStart = undefined;\n for (let i = 0; i < essentialIndices.length; i++) {\n const speaker = colorSpeakers[i];\n if (i === 0 || (i > 0 && speaker !== colorSpeakers[i - 1])) {\n currentStart = essentialIndices[i];\n }\n if (i === essentialIndices.length - 1 ||\n (i < essentialIndices.length - 1 && speaker !== colorSpeakers[i + 1])) {\n // time to end this segment\n if (currentStart === undefined) {\n throw new SystemError('currentStart is not expected to be undefined');\n }\n speakers.push({ start: currentStart, end: essentialIndices[i], speaker: speaker });\n }\n }\n // If there's only one speaker and it's system, change it to human.\n if (speakers.length == 1 && speakers[0].speaker == 'system' && !systemSpeakerSpecified) {\n speakers[0].speaker = 'human';\n }\n return speakers;\n }\n /**\n * Render the IR string and return detailed mapping for each produced content\n * segment.\n *\n * Each returned {@link SourceMapRichContent} describes the slice of the input\n * IR that generated the piece of output.\n */\n writeWithSourceMap(ir) {\n const result = this.generateWriterResult(ir);\n const segments = this.buildSourceMap(result);\n return segments.map(s => ({ startIndex: s.inputStart, endIndex: s.inputEnd, irStartIndex: s.irStart, irEndIndex: s.irEnd, content: s.content }));\n }\n /**\n * Similar to {@link writeWithSourceMap} but groups the segments into speaker\n * messages.\n */\n writeMessagesWithSourceMap(ir) {\n const result = this.generateWriterResult(ir);\n const segments = this.buildSourceMap(result);\n return result.speakers.map(sp => {\n const msgSegs = segments.filter(seg => seg.outStart >= sp.start && seg.outEnd <= sp.end);\n const nonWs = msgSegs.filter(seg => !(typeof seg.content === 'string' && seg.content.trim() === ''));\n // Use only non-whitespace segments when computing the overall source range\n // for this message so that trailing or leading padding does not expand the\n // reported span. If the message contains nothing but whitespace we fall\n // back to considering all segments.\n const relevant = nonWs.length ? nonWs : msgSegs;\n if (!relevant.length) {\n // If there are no relevant segments, we cannot produce an empty message.\n return {\n startIndex: 0, // in this case, we cannot determine the start index\n endIndex: 0,\n irStartIndex: 0,\n irEndIndex: 0,\n speaker: sp.speaker,\n content: []\n };\n }\n return {\n startIndex: Math.min(...relevant.map(seg => seg.inputStart)),\n endIndex: Math.max(...relevant.map(seg => seg.inputEnd)),\n irStartIndex: Math.min(...relevant.map(seg => seg.irStart)),\n irEndIndex: Math.max(...relevant.map(seg => seg.irEnd)),\n speaker: sp.speaker,\n content: msgSegs.map(seg => ({\n startIndex: seg.inputStart, endIndex: seg.inputEnd, irStartIndex: seg.irStart, irEndIndex: seg.irEnd, content: seg.content\n }))\n };\n }).filter(msg => msg !== undefined);\n }\n /**\n * Transform a {@link WriterResult} into discrete source map segments.\n *\n * The segments are ordered so that rich content can be reconstructed in\n * the correct visual order while preserving multimedia positioning.\n */\n buildSourceMap(result) {\n // Collect every boundary within the output that could signify a change in\n // source location. These come from the input/output mappings as well as\n // multimedia positions. Splitting the output on these boundaries ensures\n // each segment corresponds to a single source range.\n const boundaries = new Set();\n result.mappings.forEach(m => {\n boundaries.add(m.outputStart);\n boundaries.add(m.outputEnd + 1);\n });\n result.multimedia.forEach(m => {\n boundaries.add(m.index);\n boundaries.add(m.index + 1);\n });\n boundaries.add(0);\n boundaries.add(result.output.length);\n const points = Array.from(boundaries).sort((a, b) => a - b);\n // `top` multimedia should appear before all textual content while `bottom`\n // multimedia should come last. We therefore keep three buckets and merge\n // them at the end.\n const topSegments = [];\n const middleSegments = [];\n const bottomSegments = [];\n const originalStartIndices = result.mappings.map(m => m.originalStart).filter(m => m !== undefined);\n const sourceStartIndex = originalStartIndices.length > 0 ? Math.min(...originalStartIndices) : 0;\n const originalEndIndices = result.mappings.map(m => m.originalEnd).filter(m => m !== undefined);\n const sourceEndIndex = originalEndIndices.length > 0 ? Math.max(...originalEndIndices) : 0;\n for (let i = 0; i < points.length - 1; i++) {\n const start = points[i];\n const end = points[i + 1];\n if (start >= end) {\n continue;\n }\n const slice = result.output.slice(start, end);\n // Find the most specific mapping that covers this slice. This allows the\n // resulting segment to map back to the tightest IR range responsible for\n // the output.\n let chosen;\n // The chosen IR might not have a precise original start or end index, so we\n // choose a fallback based on the original mappings.\n let chosenOriginal;\n for (const m of result.mappings) {\n if (start >= m.outputStart && end - 1 <= m.outputEnd) {\n if (!chosen || m.outputEnd - m.outputStart < chosen.outputEnd - chosen.outputStart) {\n chosen = m;\n }\n if ((m.originalStart !== undefined && m.originalEnd !== undefined) && (!chosenOriginal || m.originalEnd - m.originalStart < chosenOriginal.originalEnd - chosenOriginal.originalStart)) {\n chosenOriginal = m;\n }\n }\n }\n if (!chosen) {\n // Mappings must be non-empty here because the points are derived from the\n // mappings. If we cannot find a mapping, use the first one as a fallback.\n chosen = result.mappings[0];\n }\n // If a multimedia item starts at this boundary, emit it instead of text.\n const media = result.multimedia.find(m => m.index === start);\n if (media) {\n const { position, index, ...rest } = media;\n const segment = {\n outStart: start,\n outEnd: end - 1,\n irStart: chosen.inputStart,\n irEnd: chosen.inputEnd,\n inputStart: chosenOriginal?.originalStart ?? sourceStartIndex,\n inputEnd: chosenOriginal?.originalEnd ?? sourceEndIndex,\n content: [rest]\n };\n if (position === 'top') {\n topSegments.push(segment);\n }\n else if (position === 'bottom') {\n bottomSegments.push(segment);\n }\n else {\n middleSegments.push(segment);\n }\n }\n else if (slice !== SPECIAL_CHARACTER && slice.length > 0) {\n // Normal textual slice.\n middleSegments.push({\n outStart: start,\n outEnd: end - 1,\n irStart: chosen.inputStart,\n irEnd: chosen.inputEnd,\n inputStart: chosenOriginal?.originalStart ?? sourceStartIndex,\n inputEnd: chosenOriginal?.originalEnd ?? sourceEndIndex,\n content: slice\n });\n }\n }\n middleSegments.sort((a, b) => a.outStart - b.outStart);\n // Order the buckets so that `top` items are emitted before any textual\n // content and `bottom` items are emitted last. When filtering these\n // segments by speaker boundaries, each top or bottom item still appears\n // within the correct message.\n return [...topSegments, ...middleSegments, ...bottomSegments];\n }\n /**\n * Execute the main writing logic and gather mapping, multimedia and speaker\n * information before it is broken down into smaller segments.\n */\n generateWriterResult(ir) {\n this.reset(ir);\n const $ = cheerio.load(ir, {\n scriptingEnabled: false,\n xml: { xmlMode: true, withStartIndices: true, withEndIndices: true }\n }, false);\n const partialResult = this.writeElementTree(this.getRoot($), $);\n return {\n input: ir,\n output: partialResult.output,\n mappings: partialResult.mappings,\n multimedia: partialResult.multimedia,\n speakers: this.assignSpeakers(partialResult, $)\n };\n }\n getRoot($) {\n return $($.root().children()[0]);\n }\n}\nexport class EnvironmentDispatcher extends Writer {\n writeElementTree(element, $) {\n if (element.is('env')) {\n let options = undefined;\n try {\n const optionsString = element.attr('writer-options');\n if (optionsString) {\n options = JSON.parse(optionsString);\n }\n }\n catch (e) {\n this.raiseError(`Invalid JSON for writer-options: ${element.attr('writer-options')}`, element);\n }\n if (element.attr('presentation') === 'markup') {\n const markupLanguage = element.attr('markup-lang') || 'markdown';\n if (markupLanguage === 'markdown') {\n return new MarkdownWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (markupLanguage === 'html') {\n return new HtmlWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (markupLanguage === 'csv') {\n return new CsvWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (markupLanguage === 'tsv') {\n return new TsvWriter(this.ir, options).writeElementTree(element, $);\n }\n else {\n return this.raiseError(`Invalid markup language: ${markupLanguage}`, element);\n }\n }\n else if (element.attr('presentation') === 'serialize') {\n const serializer = element.attr('serializer') || 'json';\n if (serializer === 'json') {\n return new JsonWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (serializer === 'yaml') {\n return new YamlWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (serializer === 'xml') {\n return new XmlWriter(this.ir, options).writeElementTree(element, $);\n }\n else {\n return this.raiseError(`Invalid serializer: ${serializer}`, element);\n }\n }\n else if (element.attr('presentation') === 'free') {\n return new FreeWriter(this.ir, options).writeElementTree(element, $);\n }\n else if (element.attr('presentation') === 'multimedia') {\n return new MultiMediaWriter(this.ir, options).writeElementTree(element, $);\n }\n else {\n return this.raiseError(`Invalid presentation: ${element}`, element);\n }\n }\n else {\n // Not even an environment, consider writing it as a markdown\n return new MarkdownWriter(this.ir).writeElementTree(element, $);\n }\n }\n}\nexport class MarkdownWriter extends Writer {\n initializeOptions(options) {\n options = options || {};\n return {\n markdownBaseHeaderLevel: options.markdownBaseHeaderLevel ?? 1,\n markdownTableCollapse: options.markdownTableCollapse ?? false,\n csvSeparator: options.csvSeparator ?? ',',\n csvHeader: options.csvHeader ?? true\n };\n }\n raiseErrorAndReturnEmpty(message, element) {\n this.raiseError(message, element);\n return { text: '', before: '', after: '', mappings: [], multimedia: [] };\n }\n makeBox(text, layout, element) {\n const newBeforeAfter = layout === 'block' ? '\\n\\n' : layout === 'newline' ? '\\n' : '';\n if (typeof text === 'string') {\n return {\n text: text,\n before: newBeforeAfter,\n after: newBeforeAfter,\n mappings: [this.createMappingNode(element, text.length)],\n multimedia: []\n };\n }\n else {\n return {\n text: text.text,\n before: this.consolidateSpace(newBeforeAfter, text.before),\n after: this.consolidateSpace(text.after, newBeforeAfter),\n mappings: [...text.mappings, this.createMappingNode(element, text.text.length)],\n multimedia: text.multimedia\n };\n }\n }\n wrapBox(box, wrapBefore, wrapAfter, element) {\n const text = wrapBefore + box.text + wrapAfter;\n const mappings = this.indentMappings(box.mappings, wrapBefore.length, 0);\n if (element) {\n mappings.push(this.createMappingNode(element, text.length));\n }\n return {\n text: text,\n before: box.before,\n after: box.after,\n mappings: mappings,\n multimedia: this.indentMultiMedia(box.multimedia, wrapBefore.length, 0)\n };\n }\n wrapBoxEveryLine(box, wrapBefore, wrapAfter) {\n const lines = box.text.split('\\n');\n let accumulatedLength = 0;\n let mappings = box.mappings;\n let multimedia = box.multimedia;\n const text = lines\n .map(line => {\n const result = wrapBefore + line + wrapAfter;\n mappings = this.indentMappings(mappings, wrapBefore.length, accumulatedLength);\n multimedia = this.indentMultiMedia(multimedia, wrapBefore.length, accumulatedLength);\n accumulatedLength += result.length + 1; // length of '\\n'\n return result;\n })\n .join('\\n');\n return {\n text: text,\n before: box.before,\n after: box.after,\n mappings: mappings,\n multimedia: multimedia\n };\n }\n consolidateSpace(space1, space2) {\n let result = space1 + space2;\n for (let i = 1; i <= Math.min(space1.length, space2.length); i++) {\n if (space1.slice(-i) === space2.slice(0, i)) {\n result = space1 + space2.slice(i);\n }\n }\n return result;\n }\n concatMarkdownBoxes(boxes) {\n const multimedia = [];\n // Remove all spaces children before and after block elements\n // or between two multimedia-only nodes so images do not create\n // stray blank lines when placed consecutively.\n let removedSpace = boxes;\n while (true) {\n let afterRemoveSpace = removedSpace.filter((child, i) => {\n const afterBlock = i > 0 && (removedSpace[i - 1].after.includes('\\n') || /^\\n+$/.test(removedSpace[i - 1].text));\n const beforeBlock = i < removedSpace.length - 1 && (removedSpace[i + 1].before.includes('\\n') || /^\\n+$/.test(removedSpace[i + 1].text));\n // When a whitespace-only box is sandwiched between two multimedia\n // boxes (e.g., two consecutive images), we treat it like the spaces\n // around a block element so it doesn't generate a blank line.\n const afterMedia = i > 0 &&\n removedSpace[i - 1].multimedia.length > 0 &&\n removedSpace[i - 1].multimedia.length === removedSpace[i - 1].text.length;\n const beforeMedia = i < removedSpace.length - 1 &&\n removedSpace[i + 1].multimedia.length > 0 &&\n removedSpace[i + 1].multimedia.length === removedSpace[i + 1].text.length;\n return !((afterBlock || beforeBlock || afterMedia || beforeMedia) && /^[ \\t]*$/.test(child.text));\n });\n if (afterRemoveSpace.length === removedSpace.length) {\n break;\n }\n // Repeat until no more space can be removed\n removedSpace = afterRemoveSpace;\n }\n // When concatenating, we handle 3 cases.\n // 1. If both ends are text, the same space characters will be overlapped and consolidated.\n // 2. If one end is text and the other end is multimedia (floated), the multimedia will be as if it doesn't exist.\n // This case is only handled when it only contains multimedia. If there's text in between, we assume it's already handled.\n // 3. If one end is text and the other end is multimedia (adhered), the multimedia will eat up the space characters.\n const enumerate = (boxes) => {\n return boxes.map((box, i) => {\n return { box, index: i };\n });\n };\n // See the comment above for the explanation.\n const asIfNotExist = (box) => {\n return (box.multimedia.length > 0 &&\n box.multimedia.length === box.text.length &&\n box.multimedia.every(media => media.position !== 'here'));\n };\n const textBoxQueue = enumerate(removedSpace).filter(({ box }) => !asIfNotExist(box));\n const multimediaQueue = enumerate(removedSpace).filter(({ box }) => asIfNotExist(box));\n const mappings = [];\n // When concatenating, make sure all multimedia boxes are skipped.\n // Multimedia boxes are instead directly adhered to the previous box.\n // Kinda like a merge sort.\n let text = '';\n let before = '';\n let after = '';\n let i = 0, j = 0;\n while (i < textBoxQueue.length || j < multimediaQueue.length) {\n if (i === textBoxQueue.length ||\n (j < multimediaQueue.length && multimediaQueue[j].index < textBoxQueue[i].index)) {\n const multimediaBox = multimediaQueue[j].box;\n mappings.push(...this.indentMappings(multimediaBox.mappings, text.length, 0));\n multimedia.push(...this.indentMultiMedia(multimediaBox.multimedia, text.length, 0));\n text += multimediaBox.text;\n j++;\n }\n else {\n const box = textBoxQueue[i].box;\n if (i === 0) {\n before = box.before;\n }\n mappings.push(...this.indentMappings(box.mappings, text.length, 0));\n // It still could contain inner multimedia\n multimedia.push(...this.indentMultiMedia(box.multimedia, text.length, 0));\n text += box.text;\n if (i === textBoxQueue.length - 1) {\n after = box.after;\n }\n else {\n let thisAfter;\n if (box.multimedia.filter(media => media.position === 'here' && media.index + 1 === box.text.length).length > 0) {\n // Has an adhered multimedia at the end\n thisAfter = '';\n }\n else if (textBoxQueue[i + 1].box.multimedia.filter(media => media.position === 'here' && media.index === 0).length > 0) {\n thisAfter = '';\n }\n else {\n thisAfter = this.consolidateSpace(box.after, textBoxQueue[i + 1].box.before);\n }\n text += thisAfter;\n }\n i++;\n }\n }\n return { text, before, after, mappings, multimedia };\n }\n indentText(text, indent, firstLineIndent) {\n const lines = text.split('\\n');\n return lines\n .map((line, i) => {\n if (!line) {\n return line;\n }\n else if (i === 0) {\n return ' '.repeat(firstLineIndent) + line;\n }\n else {\n return ' '.repeat(indent) + line;\n }\n })\n .join('\\n');\n }\n handleParagraph = (innerParagraphs, element, indent, firstLineIndent, blankLine) => {\n innerParagraphs.text = this.indentText(innerParagraphs.text, indent ?? 0, Math.max(0, (firstLineIndent ?? 0) + (indent ?? 0)));\n if (element.attr('blank-line') === 'true') {\n blankLine = true;\n }\n else if (element.attr('blank-line') === 'false') {\n blankLine = false;\n }\n if (blankLine || blankLine === undefined) {\n return this.makeBox(innerParagraphs, 'block', element);\n }\n else {\n return this.makeBox(innerParagraphs, 'newline', element);\n }\n };\n writeElementTrees(elements, $) {\n const children = elements\n .toArray()\n .filter(element => element.type !== 'comment')\n .map(element => {\n if (element.type === 'text') {\n return { text: element.data, before: '', after: '', mappings: [], multimedia: [] };\n }\n else {\n return this.writeElementTreeImpl($(element), $);\n }\n });\n return this.concatMarkdownBoxes(children);\n }\n handleList(listStyle, listSelf, $) {\n let indexIncrement = 0;\n const renderListItem = (item) => {\n const selectedItem = $(item);\n if (item.type === 'text') {\n return this.makeBox(item.data, 'inline', selectedItem);\n }\n if (!selectedItem.is('item')) {\n return this.writeElementTreeImpl(selectedItem, $);\n }\n let bullet;\n ++indexIncrement;\n switch (listStyle) {\n case 'star':\n bullet = '* ';\n break;\n case 'dash':\n bullet = '- ';\n break;\n case 'plus':\n bullet = '+ ';\n break;\n case 'decimal':\n bullet = `${indexIncrement}. `;\n break;\n case 'latin':\n bullet = String.fromCharCode(0x61 + indexIncrement - 1) + '. ';\n break;\n default:\n this.raiseError(`Invalid list style: ${listStyle}`, selectedItem);\n return this.makeBox('', 'block', selectedItem);\n }\n const paragraph = this.writeElementTrees(selectedItem.contents(), $);\n const paragraphWithBullet = this.wrapBox(paragraph, bullet, '', selectedItem);\n const doubleNewLine = paragraphWithBullet.text.includes('\\n\\n');\n return this.handleParagraph(paragraphWithBullet, selectedItem, bullet.length, -bullet.length, doubleNewLine);\n };\n const items = listSelf.contents().toArray().map((item) => renderListItem(item));\n return this.handleParagraph(this.concatMarkdownBoxes(items), listSelf);\n }\n processMultipleTableRows(elements, $) {\n const escapeInTable = (text) => {\n return text.replace(/\\|/g, '\\\\|');\n };\n return elements\n .contents()\n .toArray()\n .map(element => {\n if (!$(element).is('trow')) {\n this.raiseError(`Invalid table head, expect trow: ${element}`, $(element));\n return [];\n }\n return $(element)\n .contents()\n .toArray()\n .map(cell => {\n if (!$(cell).is('tcell')) {\n this.raiseError(`Invalid table cell, expect tcell: ${cell}`, $(element));\n return '';\n }\n return escapeInTable(this.writeElementTrees($(cell).contents(), $).text);\n });\n });\n }\n handleTable(tableHeadElements, tableBodyElements, tableElement, $) {\n const tableHead = this.processMultipleTableRows(tableHeadElements, $);\n const tableBody = this.processMultipleTableRows(tableBodyElements, $);\n const numberOfColumns = Math.max(...tableHead.map(row => row.length), ...tableBody.map(row => row.length));\n const columnWidths = [...Array(numberOfColumns).keys()].map(i => {\n return Math.max(...tableHead.map(row => (row[i] ? row[i].length : 0)), ...tableBody.map(row => (row[i] ? row[i].length : 0)));\n });\n // TODO: alignment and collapse config\n // Currently follows the format here: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-tables\n const makeRow = (row, isHeader) => {\n if (isHeader && row.length !== numberOfColumns) {\n row = [...row, ...[...Array(numberOfColumns - row.length).keys()].map(() => '')];\n }\n return ('| ' +\n row\n .map((cell, i) => {\n if (this.options.markdownTableCollapse) {\n return cell + ' |';\n }\n else {\n return cell.padEnd(columnWidths[i]) + ' |';\n }\n })\n .join(' '));\n };\n const makeSeparator = () => {\n return ('| ' +\n columnWidths\n .map(width => '-'.repeat(this.options.markdownTableCollapse && width >= 3 ? 3 : width))\n .join(' | ') +\n ' |');\n };\n const renderedTable = [\n ...tableHead.map(row => makeRow(row, true)),\n makeSeparator(),\n ...tableBody.map(row => makeRow(row, false))\n ];\n return this.makeBox(renderedTable.join('\\n'), 'block', tableElement);\n }\n writeElementTreeImpl(element, $) {\n if (element.is('p')) {\n let paragraphs = this.writeElementTrees(element.contents(), $);\n return this.handleParagraph(paragraphs, element);\n }\n else if (element.is('span')) {\n return this.makeBox(this.writeElementTrees(element.contents(), $), 'inline', element);\n }\n else if (element.is('nl')) {\n const nlText = '\\n'.repeat(parseInt(element.attr('count') || '1'));\n return {\n text: nlText,\n before: '',\n after: '',\n mappings: [this.createMappingNode(element, nlText.length)],\n multimedia: []\n };\n }\n else if (element.is('h')) {\n let paragraphs = this.writeElementTrees(element.contents(), $);\n const level = parseInt(element.attr('level') || '1') + this.options.markdownBaseHeaderLevel - 1;\n return this.handleParagraph(this.wrapBoxEveryLine(paragraphs, '#'.repeat(level) + ' ', ''), element);\n }\n else if (element.is('b')) {\n return this.wrapBox(this.writeElementTrees(element.contents(), $), '**', '**', element);\n }\n else if (element.is('i')) {\n return this.wrapBox(this.writeElementTrees(element.contents(), $), '*', '*', element);\n }\n else if (element.is('s')) {\n return this.wrapBox(this.writeElementTrees(element.contents(), $), '~~', '~~', element);\n }\n else if (element.is('u')) {\n return this.wrapBox(this.writeElementTrees(element.contents(), $), '__', '__', element);\n }\n else if (element.is('code')) {\n let paragraphs;\n if (element.attr('inline') === 'false') {\n const lang = element.attr('lang') || '';\n paragraphs = this.wrapBox(this.writeElementTrees(element.contents(), $), '```' + lang + '\\n', '\\n```');\n return this.handleParagraph(paragraphs, element);\n }\n else {\n // inline = true or undefined\n return this.wrapBox(this.writeElementTrees(element.contents(), $), '`', '`', element);\n }\n }\n else if (element.is('table')) {\n const contents = element.contents();\n if (contents.length !== 2 ||\n (!contents.first().is('thead') && !contents.first().is('tbody'))) {\n return this.raiseErrorAndReturnEmpty(`Invalid table, expect two children thead and tbody: ${element}`, element);\n }\n const [tableHeadElements, tableBodyElements] = contents.toArray();\n return this.handleParagraph(this.handleTable($(tableHeadElements), $(tableBodyElements), $(element), $), element);\n }\n else if (element.is('thead') ||\n element.is('tbody') ||\n element.is('trow') ||\n element.is('tcell')) {\n return this.raiseErrorAndReturnEmpty('thead, tbody, trow, tcell do not appear alone without a table context', element);\n }\n else if (element.is('list')) {\n const listStyle = element.attr('list-style');\n return this.handleList(listStyle || 'dash', element, $);\n }\n else if (element.is('item')) {\n return this.raiseErrorAndReturnEmpty('item does not appear alone without a list context', element);\n }\n else if (element.is('env')) {\n if (element.attr('presentation') === 'markup' &&\n element.attr('markup-lang') === this.markupLanguage()) {\n return this.makeBox(this.writeElementTrees(element.contents(), $), 'inline', element);\n }\n else {\n const content = new EnvironmentDispatcher(this.ir).writeElementTree(element, $);\n const { output, mappings, multimedia } = content;\n return this.makeBox({ text: output, before: '', after: '', mappings, multimedia }, 'inline', $(element));\n }\n }\n else {\n return this.raiseErrorAndReturnEmpty(`Not implemented element type ${element}`, element);\n }\n }\n writeElementTree(element, $) {\n const markdownBox = this.writeElementTreeImpl(element, $);\n return {\n output: markdownBox.text,\n mappings: markdownBox.mappings,\n multimedia: markdownBox.multimedia\n };\n }\n markupLanguage() {\n return 'markdown';\n }\n}\nexport class HtmlWriter extends Writer {\n inTableHead = false;\n initializeOptions(options) {\n return {\n htmlPrettyPrint: options?.htmlPrettyPrint ?? true,\n htmlIndent: options?.htmlIndent ?? ' '\n };\n }\n handleTableHeadBody(document, element, $) {\n if (!(element.is('thead') || element.is('tbody') || element.is('tcell') || element.is('trow'))) {\n this.raiseError(`Only thead, tbody and tcell should be handled, not ${element}`, element);\n return;\n }\n const originalTableHead = this.inTableHead;\n if (element.is('thead')) {\n this.inTableHead = true;\n }\n if (element.is('tcell')) {\n if (this.inTableHead) {\n this.fillNodeContents(document.ele('th'), element, $);\n }\n else {\n this.fillNodeContents(document.ele('td'), element, $);\n }\n }\n else if (element.is('trow')) {\n this.fillNodeContents(document.ele('tr'), element, $);\n }\n else {\n const tagName = element.is('thead') ? 'thead' : 'tbody';\n this.fillNodeContents(document.ele(tagName), element, $);\n }\n this.inTableHead = originalTableHead;\n }\n fillNodeContents(document, element, $) {\n element\n .contents()\n .toArray()\n .forEach(child => {\n if (child.type === 'text') {\n document.txt(child.data);\n }\n else {\n this.addNode(document, $(child), $);\n }\n });\n }\n addNode(document, element, $) {\n if (element.is('h')) {\n const level = element.attr('level') || '1';\n const tagName = `h${level}`;\n this.fillNodeContents(document.ele(tagName), element, $);\n }\n else if (element.is('code')) {\n this.fillNodeContents(document.ele('pre').ele('code'), element, $);\n }\n else if (element.is('nl')) {\n const count = parseInt(element.attr('count') || '1');\n for (let i = 0; i < count; i++) {\n document.ele('br');\n }\n }\n else if (element.is('thead') || element.is('tbody') || element.is('trow') || element.is('tcell')) {\n this.handleTableHeadBody(document, element, $);\n }\n else if (element.is('env')) {\n if (element.attr('presentation') === 'markup' && element.attr('markup-lang') === 'html') {\n this.fillNodeContents(document, element, $);\n }\n else {\n const inner = new EnvironmentDispatcher(this.ir).writeElementTree(element, $);\n if (inner.multimedia.length > 0) {\n this.raiseError('Multimedia cannot be nested in HTML.', element);\n }\n document.txt(inner.output);\n }\n }\n else {\n const tagName = element.prop('tagName')?.toLowerCase() || 'div';\n this.fillNodeContents(document.ele(tagName), element, $);\n }\n }\n writeElementTree(element, $) {\n const document = xmlbuilder.create();\n this.addNode(document, element, $);\n const html = document.end({\n prettyPrint: this.options.htmlPrettyPrint,\n indent: this.options.htmlIndent,\n headless: true\n });\n return {\n output: html,\n mappings: [this.createMappingNode(element, html.length)],\n multimedia: []\n };\n }\n}\nexport class CsvWriter extends MarkdownWriter {\n handleTable(tableHeadElements, tableBodyElements, tableElement, $) {\n const tableHead = this.processMultipleTableRows(tableHeadElements, $);\n const tableBody = this.processMultipleTableRows(tableBodyElements, $);\n const makeCell = (cell) => {\n if (cell.includes(this.options.csvSeparator)) {\n if (cell.includes('\"')) {\n cell = cell.replace(/\"/g, '\"\"');\n }\n cell = '\"' + cell + '\"';\n }\n return cell;\n };\n const makeRow = (row) => {\n return row.map(makeCell).join(this.options.csvSeparator);\n };\n let renderedTable;\n if (this.options.csvHeader) {\n renderedTable = [...tableHead.map(makeRow), ...tableBody.map(makeRow)];\n }\n else {\n renderedTable = [...tableBody.map(makeRow)];\n }\n return this.makeBox(renderedTable.join('\\n'), 'block', tableElement);\n }\n writeElementTreeImpl(element, $) {\n if (element.is('table') ||\n element.is('thead') ||\n element.is('tbody') ||\n element.is('trow') ||\n element.is('tcell') ||\n element.is('env')) {\n return super.writeElementTreeImpl(element, $);\n }\n else {\n return this.raiseErrorAndReturnEmpty(`Not implemented element type in csv ${element}`, element);\n }\n }\n markupLanguage() {\n return 'csv';\n }\n}\nexport class TsvWriter extends CsvWriter {\n initializeOptions(options) {\n return super.initializeOptions({ csvSeparator: '\\t', ...options });\n }\n markupLanguage() {\n return 'tsv';\n }\n}\nclass SerializeWriter extends Writer {\n get serializeLanguage() {\n throw new SystemError('Method serializeLanguage not implemented.');\n }\n parseText(element, text, type) {\n let value = null;\n switch (type) {\n case 'string':\n value = text;\n break;\n case 'integer':\n value = parseInt(text);\n break;\n case 'float':\n value = parseFloat(text);\n break;\n case 'boolean':\n if (text === 'true') {\n value = true;\n }\n else if (text === 'false') {\n value = false;\n }\n else {\n this.raiseError(`Invalid boolean value: ${text}`, element);\n }\n break;\n case 'null':\n value = null;\n break;\n case 'array':\n value = [text];\n break;\n case undefined:\n value = text;\n default:\n this.raiseError(`Invalid type: ${type}`, element);\n }\n return value;\n }\n parseAny(element, $, singleAsObject) {\n if (element.is('any') || element.is('env')) {\n const contents = element.contents().toArray();\n if (contents.length === 1 && contents[0].type === 'text') {\n return this.parseText(element, contents[0].data, element.attr('type') || 'string');\n }\n else if (contents.length === 0) {\n return null;\n }\n else {\n // > 1 or non-text\n const namedValues = contents\n .filter(child => child.type === 'text' || child.type === 'tag')\n .map(child => {\n if (child.type === 'text') {\n return { value: child.data };\n }\n else if ($(child).is('any')) {\n const name = $(child).attr('name');\n const value = this.parseAny($(child), $);\n if (name !== undefined) {\n return { name, value };\n }\n else {\n return { value };\n }\n }\n else {\n return { value: this.parseGeneralElement($(child), $) };\n }\n });\n const enforceArray = element.attr('type') === 'array';\n singleAsObject = singleAsObject ?? enforceArray;\n if (singleAsObject === false &&\n namedValues.length === 1 &&\n namedValues[0].name === undefined) {\n // This happens in env.\n return namedValues[0].value;\n }\n // Without all white space elements, can it be an object?\n const namedValuesWithoutWhiteSpace = namedValues.filter(val => typeof val.value !== 'string' || val.value.trim() !== '');\n // If all values have names, return an object\n if (namedValuesWithoutWhiteSpace.every(val => val.name !== undefined) &&\n element.attr('type') !== 'array') {\n return namedValuesWithoutWhiteSpace.reduce((acc, val) => {\n if (val.name === undefined) {\n this.raiseError(`Value must have a name in object context: ${element}`, element);\n return acc;\n }\n acc[val.name] = val.value;\n return acc;\n }, {});\n }\n else if (namedValuesWithoutWhiteSpace.every(val => typeof val.value === 'string') &&\n element.attr('type') !== 'array') {\n // All sub items are strings, concatenate them directly.\n // We need the white spaces here.\n return namedValuesWithoutWhiteSpace.map(val => val.value).join(' ');\n }\n else {\n // Otherwise, return an array\n return namedValuesWithoutWhiteSpace.map(value => value.value);\n }\n }\n }\n }\n parseObject(element, $) {\n if (!element.is('obj')) {\n this.raiseError(`Not an obj: ${element}`, element);\n return null;\n }\n const jsonData = element.attr('data');\n if (jsonData === undefined) {\n this.raiseError(`No data attribute in obj: ${element}`, element);\n return null;\n }\n const data = JSON.parse(jsonData);\n return data;\n }\n parseEnv(element, $) {\n if (!element.is('env')) {\n this.raiseError(`Not an env: ${element}`, element);\n return null;\n }\n if (element.attr('presentation') === 'serialize') {\n const serializer = element.attr('serializer');\n if (serializer !== undefined && serializer !== this.serializeLanguage) {\n const inner = new EnvironmentDispatcher(this.ir).writeElementTree(element, $);\n if (inner.multimedia.length > 0) {\n