UNPKG

@mieweb/wikigdrive

Version:

Google Drive to MarkDown synchronization

717 lines (625 loc) 23.9 kB
import { MathMLToLaTeX } from 'mathml-to-latex'; import { DocumentContent, DocumentStyles, DrawCustomShape, DrawEnhancedGeometry, DrawFrame, DrawG, DrawRect, GraphicProperty, ListStyle, OfficeText, ParagraphProperty, Style, TableCell, TableOfContent, TableRow, TableTable, TextBookmark, TextLink, TextList, TextParagraph, TextProperty, TextSpace, TextSpan } from './LibreOffice.ts'; import {getUrlHash, urlToFolderId} from '../utils/idParsers.ts'; import {MarkdownNodes, MarkdownTagNode} from './MarkdownNodes.ts'; import {inchesToPixels, inchesToSpaces, spaces} from './utils.ts'; import {extractPath} from './extractPath.ts'; import {mergeDeep} from './mergeDeep.ts'; import {RewriteRule} from './applyRewriteRule.ts'; import {isMarkdownMacro} from './macroUtils.ts'; import {postProcess} from './postprocess/postProcess.ts'; function getBaseFileName(fileName) { return fileName.replace(/.*\//, ''); } const COURIER_FONTS = ['Courier New', 'Courier']; interface StringToStringMap { [name: string]: string } function getInnerText(span: TextSpan) { let retVal = ''; for (const child of span.list) { if (typeof child === 'string') { retVal += child; continue; } switch (child.type) { case 'line_break': retVal += '\n'; break; case 'tab': retVal += '\t'; break; case 'space': retVal += spaces((<TextSpace>child).chars || 1); break; } } return retVal; } export class OdtToMarkdown { public errors: string[] = []; private readonly styles: { [p: string]: Style } = {}; public readonly links: Set<string> = new Set<string>(); private readonly chunks: MarkdownNodes = new MarkdownNodes(); private picturesDir = ''; private picturesDirAbsolute = ''; private rewriteRules: RewriteRule[] = []; private headersMap: { [p: string]: string } = {}; private invisibleBookmarks: { [p: string]: number } = {}; constructor(private document: DocumentContent, private documentStyles: DocumentStyles, private fileNameMap: StringToStringMap = {}, private xmlMap: StringToStringMap = {}) { } getStyle(styleName: string): Style { if (!this.styles[styleName]) { const docStyle = this.documentStyles?.styles?.styles.find(a => a.name === styleName); if (docStyle) { return structuredClone(docStyle); } return { name: 'default', listStyleName: '', parentStyleName: '', paragraphProperties: new ParagraphProperty(), textProperties: new TextProperty(), graphicProperties: new GraphicProperty() }; } const parentStyle = this.getStyle(this.styles[styleName].parentStyleName); return structuredClone(mergeDeep(parentStyle, this.styles[styleName])); } getListStyle(listStyleName: string): ListStyle | null { if (!this.documentStyles?.styles?.listStyles) { return null; } return this.documentStyles.styles.listStyles.find(ls => ls.name === listStyleName) || null; } async convert(): Promise<string> { const listLevelsObj = {}; const listMargins = {}; if (this.document.automaticStyles) { for (const namedStyle of this.document.automaticStyles.styles) { this.styles[namedStyle.name] = namedStyle; } for (const namedStyle of this.document.automaticStyles.styles) { if (namedStyle.listStyleName) { listLevelsObj[namedStyle.paragraphProperties?.marginLeft] = true; listMargins[namedStyle.listStyleName] = namedStyle.paragraphProperties?.marginLeft; } } } const listLevels = Object.keys(listLevelsObj); listLevels.sort((a, b) => inchesToPixels(a) - inchesToPixels(b)); for (const tableOfContent of this.document.body.text.list) { if (tableOfContent.type === 'toc') { await this.tocToText(this.chunks.body, <TableOfContent>tableOfContent); } } await this.officeTextToText(this.chunks.body, this.document.body.text); // text = this.processMacros(text); // text = this.fixBlockMacros(text); const { headersMap, invisibleBookmarks } = await postProcess(this.chunks, this.rewriteRules); this.headersMap = headersMap; this.invisibleBookmarks = invisibleBookmarks; const markdown = this.chunks.toString(); return this.trimBreaks(markdown); } trimBreaks(markdown: string) { const rows = markdown.split('\n'); let inSidePre = false; for (let i = 0; i < rows.length - 1; i++) { if (rows[i].substring(0, 3) === '```') { inSidePre = !inSidePre; } if (inSidePre && (rows[i].match(/[^ ] {2}$/))) { rows[i] = rows[i].replace(/ {2}$/, ''); continue; } if ((rows[i].match(/[^ ] {2}$/)) && rows[i + 1].trim().length === 0) { rows[i] = rows[i].replace(/ {2}$/, ''); continue; } if (rows[i] === ' ') { rows[i] = ''; } } return rows.join('\n'); } getErrors() { return this.errors; } async tocToText(currentTagNode: MarkdownTagNode, tableOfContent: TableOfContent): Promise<void> { const tocNode = this.chunks.createNode('TOC', {}); this.chunks.append(currentTagNode, tocNode); for (const paragraph of tableOfContent.indexBody.list) { await this.paragraphToText(tocNode, paragraph); } } async spanToText(currentTagNode: MarkdownTagNode, span: TextSpan): Promise<void> { const style = this.getStyle(span.styleName); if (COURIER_FONTS.indexOf(style.textProperties.fontName || '') > -1) { const block = this.chunks.createNode('CODE'); this.chunks.append(currentTagNode, block); currentTagNode = block; } if (style.textProperties?.fontStyle === 'italic' && style.textProperties?.fontWeight === 'bold') { const block = this.chunks.createNode('BI'); this.chunks.append(currentTagNode, block); currentTagNode = block; } else if (style.textProperties?.fontStyle === 'italic') { const block = this.chunks.createNode('I'); this.chunks.append(currentTagNode, block); currentTagNode = block; } else if (style.textProperties?.fontWeight === 'bold') { const block = this.chunks.createNode('B'); this.chunks.append(currentTagNode, block); currentTagNode = block; } for (const child of span.list) { if (typeof child === 'string') { this.chunks.appendText(currentTagNode, child); continue; } switch (child.type) { case 'line_break': this.chunks.append(currentTagNode, this.chunks.createNode('BR/')); break; case 'tab': this.chunks.appendText(currentTagNode, '\t'); break; case 'space': this.chunks.appendText(currentTagNode, spaces((<TextSpace>child).chars || 1)); break; } } } addLink(href: string) { if (href && !href.startsWith('#') && href.indexOf(':') > -1) { this.links.add(href.replace(/#.*$/, '')); } } async linkToText(currentTagNode: MarkdownTagNode, link: TextLink): Promise<void> { let href = link.href; const id = urlToFolderId(href); const hash = getUrlHash(link.href); if (id) { href = 'gdoc:' + id + hash; } this.addLink(href); const block = this.chunks.createNode('A', { href: href }); this.chunks.append(currentTagNode, block); currentTagNode = block; for (const child of link.list) { if (typeof child === 'string') { this.chunks.appendText(currentTagNode, child); continue; } switch (child.type) { case 'span': { await this.spanToText(currentTagNode, <TextSpan>child); } break; } } } async drawCustomShape(currentTagNode: MarkdownTagNode, drawCustomShape: DrawCustomShape) { // https://documentation.libreoffice.org/assets/Uploads/Documentation/en/Tutorials/CustomShapes7/Custom-Shape-Tutorial.odt // https://code.woboq.org/libreoffice/libreoffice/svx/source/customshapes/EnhancedCustomShape2d.cxx.html#1808 // https://code.woboq.org/libreoffice/libreoffice/xmloff/source/draw/ximpcustomshape.cxx.html const style = this.getStyle(drawCustomShape.styleName); const logwidth = inchesToPixels(drawCustomShape.width); const logheight = inchesToPixels(drawCustomShape.height); const blockSvg = this.chunks.createNode('EMB_SVG', { width: logwidth, height: logheight }); this.chunks.append(currentTagNode, blockSvg); for (const item of drawCustomShape.list) { if (item.type === 'draw_enhanced_geometry') { const enhancedGeometry = <DrawEnhancedGeometry>item; const blockSvgP = this.chunks.createNode('EMB_SVG_P/', { pathD: extractPath(enhancedGeometry, logwidth, logheight), style }); this.chunks.append(blockSvg, blockSvgP); } } for (const item of drawCustomShape.list) { if (item.type === 'paragraph') { const paragraph = <TextParagraph>item; if (paragraph.list.length === 0) { continue; } const blockSvgText = this.chunks.createNode('EMB_SVG_TEXT'); this.chunks.append(blockSvg, blockSvgText); for (const child of paragraph.list) { if (typeof child === 'string') { this.chunks.appendText(currentTagNode, child); continue; } switch (child.type) { case 'span': { const span = <TextSpan>child; const style = this.getStyle(span.styleName); const blockSvgTextSpan = this.chunks.createNode('EMB_SVG_TSPAN', { style }); this.chunks.append(blockSvgText, blockSvgTextSpan); for (const child of span.list) { if (typeof child === 'string') { this.chunks.appendText(blockSvgTextSpan, child); continue; } switch (child.type) { case 'line_break': this.chunks.append(blockSvgTextSpan, this.chunks.createNode('BR/')); break; case 'tab': this.chunks.appendText(blockSvgTextSpan, '\t'); break; case 'space': this.chunks.appendText(blockSvgTextSpan, spaces((<TextSpace>child).chars || 1)); break; } } } break; } } } } } async drawGToText(currentTagNode: MarkdownTagNode, drawG: DrawG) { const blockHtml = this.chunks.createNode('HTML_MODE/'); this.chunks.append(currentTagNode, blockHtml); this.getStyle(drawG.styleName); let maxx = 0; let maxy = 0; for (const drawCustomShape of drawG.list) { const x2 = inchesToPixels(drawCustomShape.x) + inchesToPixels(drawCustomShape.width); const y2 = inchesToPixels(drawCustomShape.y) + inchesToPixels(drawCustomShape.height); if (maxx < x2) { maxx = x2; } if (maxy < y2) { maxy = y2; } } const blockSvg = this.chunks.createNode('EMB_SVG', { width: maxx, height: maxy, styleTxt: `width: ${maxx / 100}mm; height: ${maxy / 100}mm;` }); this.chunks.append(blockHtml, blockSvg); // currentTagNode = blockSvg; for (const drawCustomShape of drawG.list) { const blockSvgGroup = this.chunks.createNode('EMB_SVG_G', { x: inchesToPixels(drawCustomShape.x), y: inchesToPixels(drawCustomShape.y) }); this.chunks.append(blockSvg, blockSvgGroup); await this.drawCustomShape(blockSvgGroup, drawCustomShape); } const emptyLine = this.chunks.createNode('EMPTY_LINE/'); emptyLine.comment = 'drawGToText: warning'; this.chunks.append(currentTagNode, emptyLine); const blockWarning = this.chunks.createNode('B'); this.chunks.append(currentTagNode, blockWarning); this.chunks.appendText(blockWarning, 'INSTEAD OF EMBEDDED DIAGRAM ABOVE USE EMBEDDED DIAGRAM FROM DRIVE AND PUT LINK TO IT IN THE DESCRIPTION. See: https://github.com/mieweb/wikiGDrive/issues/353'); this.pushError('INSTEAD OF EMBEDDED DIAGRAM ABOVE USE EMBEDDED DIAGRAM FROM DRIVE AND PUT LINK TO IT IN THE DESCRIPTION. See: https://github.com/mieweb/wikiGDrive/issues/353'); } async drawFrameToText(currentTagNode: MarkdownTagNode, drawFrame: DrawFrame) { if (drawFrame.object) { if (drawFrame.object.href) { const fileName= drawFrame.object.href.replace(/\s/g, '_').replace(/^\.\//, '') + '.xml'; try { const mathMl = this.xmlMap[fileName]; if (mathMl && mathMl.indexOf('<math ') > -1) { const node = this.chunks.createNode('MATHML'); const latex = MathMLToLaTeX.convert(mathMl); this.chunks.appendText(node, latex); this.chunks.append(currentTagNode, node); } } catch (err) { console.warn(err); } } return; } if (drawFrame.image) { const baseFileName = getBaseFileName(drawFrame.image.href); const fileName = this.fileNameMap[baseFileName] || baseFileName; const imageLink = this.picturesDir + fileName; const altText = drawFrame.description?.value || ''; const svgId = urlToFolderId(altText); if (svgId) { const node = this.chunks.createNode('SVG/', { href: 'gdoc:' + svgId }); this.chunks.append(currentTagNode, node); } else if (imageLink.endsWith('.svg')) { const node = this.chunks.createNode('SVG/', { href: imageLink, alt: altText }); this.chunks.append(currentTagNode, node); } else { const node = this.chunks.createNode('IMG/', { href: imageLink, alt: altText }); this.chunks.append(currentTagNode, node); } } } hasStyle(paragraph: TextParagraph, name: string) { if (paragraph.styleName === name) { return true; } const style = this.getStyle(paragraph.styleName); if (style.parentStyleName === name) { return true; } return false; } /* isBold(styleName: string) { const style = this.getStyle(styleName); if (style.textProperties?.fontWeight === 'bold') { return true; } if (style.parentStyleName) { return this.isBold(style.parentStyleName); } return false; } */ isCourier(styleName: string): boolean { const style = this.getStyle(styleName); if (COURIER_FONTS.indexOf(style.textProperties?.fontName || '') > -1) { return true; } if (style.parentStyleName) { return this.isCourier(style.parentStyleName); } return false; } async paragraphToText(currentTagNode: MarkdownTagNode, paragraph: TextParagraph): Promise<void> { const style = this.getStyle(paragraph.styleName); const listStyle = this.getListStyle(style.listStyleName); if (this.hasStyle(paragraph, 'Heading_20_1')) { const header = this.chunks.createNode('H1', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, header); currentTagNode = header; } else if (this.hasStyle(paragraph, 'Heading_20_2')) { const header = this.chunks.createNode('H2', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, header); currentTagNode = header; } else if (this.hasStyle(paragraph, 'Heading_20_3')) { const header = this.chunks.createNode('H3', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, header); currentTagNode = header; } else if (this.hasStyle(paragraph, 'Heading_20_4')) { const header = this.chunks.createNode('H4', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, header); currentTagNode = header; } else if (this.isCourier(paragraph.styleName)) { const block = this.chunks.createNode('PRE', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, block); currentTagNode = block; } else { const block = this.chunks.createNode('P', { marginLeft: inchesToSpaces(style.paragraphProperties?.marginLeft), style, listStyle }); this.chunks.append(currentTagNode, block); currentTagNode = block; } let codeElementsCount = 0; let textElementsCount = 0; for (const paraChild of paragraph.list) { if (typeof paraChild === 'string') { textElementsCount++; continue; } if (paraChild.type === 'span') { const paraSpan = <TextSpan>paraChild; const spanStyle = this.getStyle(paraSpan.styleName); const innerTxt = getInnerText(paraSpan); if (isMarkdownMacro(innerTxt)) { continue; } if (COURIER_FONTS.indexOf(spanStyle.textProperties.fontName || '') > -1) { codeElementsCount++; } } } const onlyCodeChildren = codeElementsCount > 0 && codeElementsCount + textElementsCount === paragraph.list.length; if (onlyCodeChildren) { currentTagNode.tag = 'PRE'; } if (!this.isCourier(paragraph.styleName)) { if (style.textProperties?.fontWeight === 'bold') { const block = this.chunks.createNode('B', {}); this.chunks.append(currentTagNode, block); currentTagNode = block; } } for (const child of paragraph.list) { if (typeof child === 'string') { this.chunks.appendText(currentTagNode, child); continue; } switch (child.type) { case 'line_break': this.chunks.append(currentTagNode, this.chunks.createNode('BR/', {})); break; case 'tab': this.chunks.appendText(currentTagNode, '\t'); break; case 'space': this.chunks.appendText(currentTagNode, spaces((<TextSpace>child).chars || 1)); break; case 'span': { const span = <TextSpan>child; const spanStyle = this.getStyle(span.styleName); if (COURIER_FONTS.indexOf(spanStyle.textProperties.fontName || '') > -1 && onlyCodeChildren) { const span2 = Object.assign({}, span); span2.styleName = ''; await this.spanToText(currentTagNode, span2); } else if (COURIER_FONTS.indexOf(spanStyle.textProperties.fontName || '') > -1) { const codeBlock = this.chunks.createNode('CODE'); this.chunks.append(currentTagNode, codeBlock); const span2 = Object.assign({}, span); span2.styleName = ''; await this.spanToText(codeBlock, span2); } else { await this.spanToText(currentTagNode, span); } } break; case 'link': { const link = <TextLink>child; await this.linkToText(currentTagNode, link); } break; case 'rect': { const rect = <DrawRect>child; if (rect.width === '100%') { const node = this.chunks.createNode('HR/'); this.chunks.append(currentTagNode, node); } } break; case 'draw_frame': await this.drawFrameToText(currentTagNode, <DrawFrame>child); break; case 'draw_custom_shape': { const htmlBlock = this.chunks.createNode('HTML_MODE/'); this.chunks.append(currentTagNode, htmlBlock); await this.drawCustomShape(htmlBlock, <DrawCustomShape>child); } break; case 'draw_g': await this.drawGToText(currentTagNode, <DrawG>child); break; case 'change_start': this.chunks.append(currentTagNode, this.chunks.createNode('CHANGE_START')); break; case 'change_end': this.chunks.append(currentTagNode, this.chunks.createNode('CHANGE_END')); break; case 'bookmark': { const bookmark = <TextBookmark>child; this.chunks.append(currentTagNode, this.chunks.createNode('BOOKMARK/', { id: bookmark.name })); } break; } } } async tableCellToText(currentTagNode: MarkdownTagNode, tableCell: TableCell): Promise<void> { const block = this.chunks.createNode('TD'); this.chunks.append(currentTagNode, block); currentTagNode = block; for (const child of tableCell.list) { switch (child.type) { case 'paragraph': await this.paragraphToText(currentTagNode, <TextParagraph>child); break; case 'list': await this.listToText(currentTagNode, <TextList>child); break; case 'table': await this.tableToText(currentTagNode, <TableTable>child); break; } } } async tableRowToText(currentTagNode: MarkdownTagNode, tableRow: TableRow): Promise<void> { const block = this.chunks.createNode('TR'); this.chunks.append(currentTagNode, block); currentTagNode = block; for (const tableCell of tableRow.cells) { await this.tableCellToText(currentTagNode, tableCell); } } async tableToText(currentTagNode: MarkdownTagNode, table: TableTable): Promise<void> { const blockHtml = this.chunks.createNode('HTML_MODE/'); this.chunks.append(currentTagNode, blockHtml); const block = this.chunks.createNode('TABLE'); this.chunks.append(blockHtml, block); currentTagNode = block; for (const tableRow of table.rows) { await this.tableRowToText(currentTagNode, tableRow); } } async listToText(currentTagNode: MarkdownTagNode, list: TextList): Promise<void> { const listStyle = this.getListStyle(list.styleName); const continueNumbering = list.continueNumbering === 'true'; const ulBlock = this.chunks.createNode('UL', { listId: list.id, continueList: list.continueList, listStyle, continueNumbering }); this.chunks.append(currentTagNode, ulBlock); for (const listItem of list.list) { const liBlock = this.chunks.createNode('LI', { listId: list.id }); this.chunks.append(ulBlock, liBlock); for (const item of listItem.list) { if (item.type === 'paragraph') { await this.paragraphToText(liBlock, <TextParagraph>item); } if (item.type === 'list') { await this.listToText(liBlock, <TextList>item); } } } } async officeTextToText(currentTagNode: MarkdownTagNode, content: OfficeText): Promise<void> { for (const child of content.list) { switch (child.type) { case 'paragraph': await this.paragraphToText(currentTagNode, <TextParagraph>child); break; case 'table': await this.tableToText(currentTagNode, <TableTable>child); break; case 'list': await this.listToText(currentTagNode, <TextList>child); break; case 'toc': await this.tocToText(currentTagNode, <TableOfContent>child); break; } } } setPicturesDir(picturesDir: string, picturesDirAbsolute?: string) { this.picturesDir = picturesDir; this.picturesDirAbsolute = picturesDirAbsolute || picturesDir; } setRewriteRules(rewriteRules: RewriteRule[]) { this.rewriteRules = rewriteRules; } pushError(error: string) { this.errors.push(error); } getHeadersMap() { return this.headersMap; } getInvisibleBookmarks() { return this.invisibleBookmarks; } }