@jupyterlab/toc
Version:
JupyterLab - Table of Contents widget
298 lines (270 loc) • 6.98 kB
text/typescript
// Copyright (c) Jupyter Development Team.
// Distributed under the terms of the Modified BSD License.
import { Sanitizer } from '@jupyterlab/apputils';
import { IMarkdownParser, renderMarkdown } from '@jupyterlab/rendermime';
import { IRenderMime } from '@jupyterlab/rendermime-interfaces';
import { TableOfContents } from '../tokens';
/**
* Markdown heading
*/
export interface IMarkdownHeading extends TableOfContents.IHeading {
/**
* Heading line
*/
line: number;
/**
* Raw string containing the heading
*/
raw: string;
}
/**
* Build the heading html id.
*
* @param raw Raw markdown heading
* @param level Heading level
* @param sanitizer HTML sanitizer
*/
export async function getHeadingId(
markdownParser: IMarkdownParser,
raw: string,
level: number,
sanitizer?: IRenderMime.ISanitizer
): Promise<string | null> {
try {
const host = document.createElement('div');
await renderMarkdown({
markdownParser,
host,
source: raw,
trusted: false,
sanitizer: sanitizer ?? new Sanitizer(),
shouldTypeset: false,
resolver: null,
linkHandler: null,
latexTypesetter: null
});
const header = host.querySelector(`h${level}`);
if (!header) {
return null;
}
return header.id;
} catch (reason) {
console.error('Failed to parse a heading.', reason);
}
return null;
}
/**
* Parses the provided string and returns a list of headings.
*
* @param text - Input text
* @returns List of headings
*/
export function getHeadings(text: string): IMarkdownHeading[] {
// Split the text into lines:
const lines = text.split('\n');
// Iterate over the lines to get the header level and text for each line:
const headings = new Array<IMarkdownHeading>();
let isCodeBlock;
let openingFence = 0;
let fenceType;
let lineIdx = 0;
// Don't check for Markdown headings if in a YAML frontmatter block.
// We can only start a frontmatter block on the first line of the file.
// At other positions in a markdown file, '---' represents a horizontal rule.
if (lines[lineIdx] === '---') {
// Search for another '---' and treat that as the end of the frontmatter.
// If we don't find one, treat the file as containing no frontmatter.
for (
let frontmatterEndLineIdx = lineIdx + 1;
frontmatterEndLineIdx < lines.length;
frontmatterEndLineIdx++
) {
if (lines[frontmatterEndLineIdx] === '---') {
lineIdx = frontmatterEndLineIdx + 1;
break;
}
}
}
for (; lineIdx < lines.length; lineIdx++) {
const line = lines[lineIdx];
if (line === '') {
// Bail early
continue;
}
// Don't check for Markdown headings if in a code block
if (line.startsWith('```') || line.startsWith('~~~')) {
const closingFence = extractLeadingFences(line);
if (closingFence === 0) continue;
if (openingFence === 0) {
fenceType = line.charAt(0);
isCodeBlock = !isCodeBlock;
openingFence = closingFence;
continue;
} else if (fenceType === line.charAt(0) && closingFence >= openingFence) {
isCodeBlock = !isCodeBlock;
openingFence = 0;
fenceType = '';
}
}
if (isCodeBlock) {
continue;
}
const heading = parseHeading(line, lines[lineIdx + 1]); // append the next line to capture alternative style Markdown headings
if (heading) {
headings.push({
...heading,
line: lineIdx
});
}
}
return headings;
}
// Returns the length of ``` or ~~~ fences.
function extractLeadingFences(line: string) {
let match;
if (line.startsWith('`')) match = line.match(/^(`{3,})/);
else match = line.match(/^(~{3,})/);
return match ? match[0].length : 0;
}
const MARKDOWN_MIME_TYPE = [
'text/x-ipythongfm',
'text/x-markdown',
'text/x-gfm',
'text/markdown'
];
/**
* Returns whether a MIME type corresponds to a Markdown flavor.
*
* @param mime - MIME type string
* @returns boolean indicating whether a provided MIME type corresponds to a Markdown flavor
*
* @example
* const bool = isMarkdown('text/markdown');
* // returns true
*
* @example
* const bool = isMarkdown('text/plain');
* // returns false
*/
export function isMarkdown(mime: string): boolean {
return MARKDOWN_MIME_TYPE.includes(mime);
}
/**
* Interface describing a parsed heading result.
*
* @private
*/
interface IHeader {
/**
* Heading text.
*/
text: string;
/**
* Heading level.
*/
level: number;
/**
* Raw string containing the heading
*/
raw: string;
/**
* Whether the heading is marked to skip or not
*/
skip: boolean;
}
/**
* Parses a heading, if one exists, from a provided string.
*
* ## Notes
*
* - Heading examples:
*
* - Markdown heading:
*
* ```
* # Foo
* ```
*
* - Markdown heading (alternative style):
*
* ```
* Foo
* ===
* ```
*
* ```
* Foo
* ---
* ```
*
* - HTML heading:
*
* ```
* <h3>Foo</h3>
* ```
*
* @private
* @param line - Line to parse
* @param nextLine - The line after the one to parse
* @returns heading info
*
* @example
* const out = parseHeading('### Foo\n');
* // returns {'text': 'Foo', 'level': 3}
*
* @example
* const out = parseHeading('Foo\n===\n');
* // returns {'text': 'Foo', 'level': 1}
*
* @example
* const out = parseHeading('<h4>Foo</h4>\n');
* // returns {'text': 'Foo', 'level': 4}
*
* @example
* const out = parseHeading('Foo');
* // returns null
*/
function parseHeading(line: string, nextLine?: string): IHeader | null {
// Case: Markdown heading
let match = line.match(/^([#]{1,6}) (.*)/);
if (match) {
return {
text: cleanTitle(match[2]),
level: match[1].length,
raw: line,
skip: skipHeading.test(match[0])
};
}
// Case: Markdown heading (alternative style)
if (nextLine) {
match = nextLine.match(/^ {0,3}([=]{2,}|[-]{2,})\s*$/);
if (match) {
return {
text: cleanTitle(line),
level: match[1][0] === '=' ? 1 : 2,
raw: [line, nextLine].join('\n'),
skip: skipHeading.test(line)
};
}
}
// Case: HTML heading (WARNING: this is not particularly robust, as HTML headings can span multiple lines)
match = line.match(/<h([1-6]).*>(.*)<\/h\1>/i);
if (match) {
return {
text: match[2],
level: parseInt(match[1], 10),
skip: skipHeading.test(match[0]),
raw: line
};
}
return null;
}
function cleanTitle(heading: string): string {
// take special care to parse Markdown links into raw text
return heading.replace(/\[(.+)\]\(.+\)/g, '$1');
}
/**
* Ignore title with html tag with a class name equal to `jp-toc-ignore` or `tocSkip`
*/
const skipHeading =
/<\w+\s(.*?\s)?class="(.*?\s)?(jp-toc-ignore|tocSkip)(\s.*?)?"(\s.*?)?>/;