UNPKG

notion-to-md

Version:

convert notion pages, block and list of blocks to markdown (supports nesting)

496 lines 22.1 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.NotionToMarkdown = void 0; const md = __importStar(require("./utils/md")); const notion_1 = require("./utils/notion"); /** * Converts a Notion page to Markdown. */ class NotionToMarkdown { constructor(options) { this.notionClient = options.notionClient; const defaultConfig = { separateChildPage: false, convertImagesToBase64: false, parseChildPages: true, }; this.config = { ...defaultConfig, ...options.config }; this.customTransformers = {}; } setCustomTransformer(type, transformer) { this.customTransformers[type] = transformer; return this; } /** * Converts Markdown Blocks to string * @param {MdBlock[]} mdBlocks - Array of markdown blocks * @param {number} nestingLevel - Defines max depth of nesting * @returns {MdStringObject} - Returns markdown string with child pages separated */ toMarkdownString(mdBlocks = [], pageIdentifier = "parent", nestingLevel = 0) { let mdOutput = {}; mdBlocks.forEach((mdBlocks) => { // NOTE: toggle in the child blocks logic // adding a toggle check prevents duplicate // rendering of toggle title // process parent blocks if (mdBlocks.parent && mdBlocks.type !== "toggle" && mdBlocks.type !== "child_page") { if (mdBlocks.type !== "to_do" && mdBlocks.type !== "bulleted_list_item" && mdBlocks.type !== "numbered_list_item" && mdBlocks.type !== "quote") { // initialize if key doesn't exist mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; // add extra line breaks non list blocks mdOutput[pageIdentifier] += `\n${md.addTabSpace(mdBlocks.parent, nestingLevel)}\n\n`; } else { // initialize if key doesn't exist mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; mdOutput[pageIdentifier] += `${md.addTabSpace(mdBlocks.parent, nestingLevel)}\n`; } } // process child blocks if (mdBlocks.children && mdBlocks.children.length > 0) { if (mdBlocks.type === "synced_block" || mdBlocks.type === "column_list" || mdBlocks.type === "column") { let mdstr = this.toMarkdownString(mdBlocks.children, pageIdentifier); mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; Object.keys(mdstr).forEach((key) => { if (mdOutput[key]) { mdOutput[key] += mdstr[key]; } else { mdOutput[key] = mdstr[key]; } }); } else if (mdBlocks.type === "child_page") { const childPageTitle = mdBlocks.parent; let mdstr = this.toMarkdownString(mdBlocks.children, childPageTitle); if (this.config.separateChildPage) { mdOutput = { ...mdOutput, ...mdstr }; } else { mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; if (mdstr[childPageTitle]) { // child page heading followed by child page content mdOutput[pageIdentifier] += `\n${childPageTitle}\n${mdstr[childPageTitle]}`; } } } else if (mdBlocks.type === "toggle") { // convert children md object to md string const toggle_children_md_string = this.toMarkdownString(mdBlocks.children); mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; mdOutput[pageIdentifier] += md.toggle(mdBlocks.parent, toggle_children_md_string["parent"]); } else if (mdBlocks.type === "quote") { let mdstr = this.toMarkdownString(mdBlocks.children, pageIdentifier, nestingLevel); const formattedContent = mdstr.parent .split("\n") .map((line) => (line.trim() ? `> ${line}` : ">")) .join("\n") .trim(); mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; if (pageIdentifier !== "parent" && mdstr["parent"]) { mdOutput[pageIdentifier] += formattedContent; } else if (mdstr[pageIdentifier]) { mdOutput[pageIdentifier] += formattedContent; } mdOutput[pageIdentifier] += "\n"; } else if (mdBlocks.type === "callout") { // do nothing the callout block is already processed } else { let mdstr = this.toMarkdownString(mdBlocks.children, pageIdentifier, nestingLevel + 1); mdOutput[pageIdentifier] = mdOutput[pageIdentifier] || ""; if (pageIdentifier !== "parent" && mdstr["parent"]) { mdOutput[pageIdentifier] += mdstr["parent"]; } else if (mdstr[pageIdentifier]) { mdOutput[pageIdentifier] += mdstr[pageIdentifier]; } } } }); return mdOutput; } /** * Retrieves Notion Blocks based on ID and converts them to Markdown Blocks * @param {string} id - notion page id (not database id) * @param {number} totalPage - Retrieve block children request number, page_size Maximum = totalPage * 100 (Default=null) * @returns {Promise<MdBlock[]>} - List of markdown blocks */ async pageToMarkdown(id, totalPage = null) { if (!this.notionClient) { throw new Error("notion client is not provided, for more details check out https://github.com/souvikinator/notion-to-md"); } const blocks = await (0, notion_1.getBlockChildren)(this.notionClient, id, totalPage); const parsedData = await this.blocksToMarkdown(blocks); return parsedData; } /** * Converts list of Notion Blocks to Markdown Blocks * @param {ListBlockChildrenResponseResults | undefined} blocks - List of notion blocks * @param {number} totalPage - Retrieve block children request number, page_size Maximum = totalPage * 100 * @param {MdBlock[]} mdBlocks - Array of markdown blocks * @returns {Promise<MdBlock[]>} - Array of markdown blocks with their children */ async blocksToMarkdown(blocks, totalPage = null, mdBlocks = []) { var _a, _b; if (!this.notionClient) { throw new Error("notion client is not provided, for more details check out https://github.com/souvikinator/notion-to-md"); } if (!blocks) return mdBlocks; for (let i = 0; i < blocks.length; i++) { let block = blocks[i]; if ( // @ts-ignore block.type === "unsupported" || // @ts-ignore (block.type === "child_page" && !this.config.parseChildPages)) { continue; } if ("has_children" in block && block.has_children) { const block_id = block.type == "synced_block" && ((_b = (_a = block.synced_block) === null || _a === void 0 ? void 0 : _a.synced_from) === null || _b === void 0 ? void 0 : _b.block_id) ? block.synced_block.synced_from.block_id : block.id; // Get children of this block. let child_blocks = await (0, notion_1.getBlockChildren)(this.notionClient, block_id, totalPage); // Push this block to mdBlocks. mdBlocks.push({ type: block.type, blockId: block.id, parent: await this.blockToMarkdown(block), children: [], }); // Recursively call blocksToMarkdown to get children of this block. // check for custom transformer before parsing child if (!(block.type in this.customTransformers) && !this.customTransformers[block.type]) { let l = mdBlocks.length; await this.blocksToMarkdown(child_blocks, totalPage, mdBlocks[l - 1].children); } continue; } let tmp = await this.blockToMarkdown(block); mdBlocks.push({ // @ts-ignore type: block.type, blockId: block.id, parent: tmp, children: [], }); } return mdBlocks; } /** * Converts a Notion Block to a Markdown Block * @param {ListBlockChildrenResponseResult} block - single notion block * @returns {string} corresponding markdown string of the passed block */ async blockToMarkdown(block) { if (typeof block !== "object" || !("type" in block)) return ""; let parsedData = ""; const { type } = block; if (type in this.customTransformers && !!this.customTransformers[type]) { const customTransformerValue = await this.customTransformers[type](block); if (typeof customTransformerValue === "string") return customTransformerValue; } switch (type) { case "image": { let blockContent = block.image; let image_title = "image"; const image_caption_plain = blockContent.caption .map((item) => item.plain_text) .join(""); const image_type = blockContent.type; let link = ""; if (image_type === "external") { link = blockContent.external.url; } if (image_type === "file") { link = blockContent.file.url; } // image caption with high priority if (image_caption_plain.trim().length > 0) { image_title = image_caption_plain; } else if (image_type === "file" || image_type === "external") { const matches = link.match(/[^\/\\&\?]+\.\w{3,4}(?=([\?&].*$|$))/); image_title = matches ? matches[0] : image_title; } return await md.image(image_title, link, this.config.convertImagesToBase64); } break; case "divider": { return md.divider(); } case "equation": { return md.equation(block.equation.expression); } case "video": case "file": case "pdf": { let blockContent; let title = type; if (type === "video") blockContent = block.video; if (type === "file") blockContent = block.file; if (type === "pdf") blockContent = block.pdf; const caption = blockContent === null || blockContent === void 0 ? void 0 : blockContent.caption.map((item) => item.plain_text).join(""); if (blockContent) { const file_type = blockContent.type; let link = ""; if (file_type === "external") link = blockContent.external.url; if (file_type === "file") link = blockContent.file.url; if (caption && caption.trim().length > 0) { title = caption; } else if (link) { const matches = link.match(/[^\/\\&\?]+\.\w{3,4}(?=([\?&].*$|$))/); title = matches ? matches[0] : type; } return md.link(title, link); } } break; case "bookmark": case "embed": case "link_preview": case "link_to_page": { let blockContent; let title = type; if (type === "bookmark") blockContent = block.bookmark; if (type === "embed") blockContent = block.embed; if (type === "link_preview") blockContent = block.link_preview; if (type === "link_to_page" && block.link_to_page.type === "page_id") { blockContent = { url: `https://www.notion.so/${block.link_to_page.page_id}`, }; } if (blockContent) return md.link(title, blockContent.url); } break; case "child_page": { if (!this.config.parseChildPages) return ""; let pageTitle = block.child_page.title; if (this.config.separateChildPage) { return pageTitle; } return md.heading2(pageTitle); } break; case "child_database": { let pageTitle = block.child_database.title || `child_database`; return pageTitle; } break; case "table": { const { id, has_children } = block; let tableArr = []; if (has_children) { const tableRows = await (0, notion_1.getBlockChildren)(this.notionClient, id, 100); let rowsPromise = tableRows === null || tableRows === void 0 ? void 0 : tableRows.map(async (row) => { const { type } = row; if (type !== 'table_row') return; const cells = row.table_row["cells"]; /** * this is more like a hack since matching the type text was * difficult. So converting each cell to paragraph type to * reuse the blockToMarkdown function */ let cellStringPromise = cells.map(async (cell) => await this.blockToMarkdown({ type: "paragraph", paragraph: { rich_text: cell }, })); const cellStringArr = await Promise.all(cellStringPromise); tableArr.push(cellStringArr); }); await Promise.all(rowsPromise || []); } return md.table(tableArr); } // Rest of the types // "paragraph" // "heading_1" // "heading_2" // "heading_3" // "bulleted_list_item" // "numbered_list_item" // "quote" // "to_do" // "template" // "synced_block" // "child_page" // "child_database" // "code" // "callout" // "breadcrumb" // "table_of_contents" // "link_to_page" // "audio" // "unsupported" default: { // In this case typescript is not able to index the types properly, hence ignoring the error // @ts-ignore let blockContent = block[type].text || block[type].rich_text || []; blockContent.map((content) => { if (content.type === "equation") { parsedData += md.inlineEquation(content.equation.expression); return; } const annotations = content.annotations; let plain_text = content.plain_text; plain_text = this.annotatePlainText(plain_text, annotations); if (content["href"]) plain_text = md.link(plain_text, content["href"]); parsedData += plain_text; }); } } switch (type) { case "code": { const codeContent = block.code.rich_text.map((t) => t.plain_text).join("\n"); const language = block.code.language || "plaintext"; parsedData = md.codeBlock(codeContent, language); } break; case "heading_1": { parsedData = md.heading1(parsedData); } break; case "heading_2": { parsedData = md.heading2(parsedData); } break; case "heading_3": { parsedData = md.heading3(parsedData); } break; case "quote": { parsedData = md.quote(parsedData); } break; case "callout": { const { id, has_children } = block; let callout_string = ""; if (!has_children) { return md.callout(parsedData, block[type].icon); } const callout_children_object = await (0, notion_1.getBlockChildren)(this.notionClient, id, 100); // // parse children blocks to md object const callout_children = await this.blocksToMarkdown(callout_children_object); callout_string += `${parsedData}\n`; callout_children.map((child) => { callout_string += `${child.parent}\n\n`; }); parsedData = md.callout(callout_string.trim(), block[type].icon); } break; case "bulleted_list_item": { parsedData = md.bullet(parsedData); } break; case "numbered_list_item": { parsedData = md.bullet(parsedData, block.numbered_list_item.number); } break; case "to_do": { parsedData = md.todo(parsedData, block.to_do.checked); } break; } return parsedData; } /** * Annoate text using provided annotations * @param {string} text - String to be annotated * @param {Annotations} annotations - Annotation object of a notion block * @returns {string} - Annotated text */ annotatePlainText(text, annotations) { // if text is all spaces, don't annotate if (text.match(/^\s*$/)) return text; const leadingSpaceMatch = text.match(/^(\s*)/); const trailingSpaceMatch = text.match(/(\s*)$/); const leading_space = leadingSpaceMatch ? leadingSpaceMatch[0] : ""; const trailing_space = trailingSpaceMatch ? trailingSpaceMatch[0] : ""; text = text.trim(); if (text !== "") { if (annotations.code) text = md.inlineCode(text); if (annotations.bold) text = md.bold(text); if (annotations.italic) text = md.italic(text); if (annotations.strikethrough) text = md.strikethrough(text); if (annotations.underline) text = md.underline(text); } return leading_space + text + trailing_space; } } exports.NotionToMarkdown = NotionToMarkdown; //# sourceMappingURL=notion-to-md.js.map