UNPKG

schema-dts-gen

Version:

Generate TypeScript Definitions for Schema.org Schema

220 lines 7.13 kB
/** * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import ts from 'typescript'; const { setSyntheticLeadingComments, SyntaxKind } = ts; import { unified } from 'unified'; import markdown from 'remark-parse'; import html from 'rehype-parse'; import wikiLinkPlugin from 'remark-wiki-link'; import mdastToHast from 'remark-rehype'; import stripWhitespace from 'rehype-minify-whitespace'; import raw from 'rehype-raw'; export function withComments(comment, node) { if (!comment) return node; try { return setSyntheticLeadingComments(node, [ { text: parseComment(comment), kind: SyntaxKind.MultiLineCommentTrivia, hasTrailingNewLine: true, pos: -1, end: -1, }, ]); } catch (error) { throw new Error(`${error}\n... while processing comment:\n${comment}`); } } export function appendParagraph(first, next) { if (!first) return next; if (shouldParseAsHtml(first)) return `${first}<br/><br/>${next}`; return `${first}\n\n${next}`; } function parseComment(comment) { const parseAsHtml = shouldParseAsHtml(comment); const commentToParse = unescape(comment); const processor = parseAsHtml ? unified().use(html, { fragment: true }).use(stripWhitespace) : unified() .use(markdown) .use(wikiLinkPlugin, { hrefTemplate: s => `https://schema.org/${s}`, pageResolver: s => [s], aliasDivider: '|', }) .use(mdastToHast, { allowDangerousHtml: true }) .use(raw) .use(stripWhitespace); const ast = parseCommentInternal(processor, commentToParse); const context = { result: [], onTag: new Map([...universalHandlers, ...htmlHandlers]), }; one(ast, context, { isFirstChild: true, isLastChild: true, parent: undefined, }); const lines = context.result.join('').trim().split('\n'); // Hack to get JSDOCs working. Microsoft does not expose JSDOC-creation API. return lines.length === 1 ? `* ${lines[0]} ` : '*\n * ' + lines.join('\n * ') + '\n '; } function parseCommentInternal(processor, comment) { return processor.runSync(processor.parse(comment)); } // Older Schema.org comment strings are represented as HTML, where whitespace // and new lines are insignificant, and all meaning is expressed through HTML // tags, including <a> and <br/>. // // Starting Schema.org 11, comments are represented as markdown. v11 still mixes // some HTML with Markdown. // // To decide how we're handling them, we'll check for the presence of newlines // without line break tags as a strong indication to use Markdown (even if other // HTML exists). Othrewise, we simply test for the existence of any HTML tag. function shouldParseAsHtml(s) { const BR = /<br\s*\/?>/gi; const NL = /\n/gi; if (NL.test(s) && !BR.test(s)) return false; // Any part of the string that is _not_ escaped in (`) cannot contain HTML in // a Markdown comment. // // Similarly, strip all `<code>` blocks entirely. These are somehow still // included in Markdown comments. const stripped = s .replace(/<code[^<>]*>((?!<\/code>).)*<\/code\s*>/gi, '') .replace(/(?!^)(`+)((?!\1).)+\1/g, ''); return /<[A-Za-z][A-Za-z0-9-]*(\s[^<>]*)?>/g.test(stripped); } // Some handlers for behaviors that apply to multiple tags: const em = { open: () => '_', close: () => '_', }; const strong = { open: () => '__', close: () => '__', }; // Our top-level tag handler. const universalHandlers = [ ['root', {}], ['text', { value: v => v }], ]; const htmlHandlers = [ [ 'p', { open: ({ isFirstChild }) => (isFirstChild ? '' : '\n'), close: () => '\n', }, ], [ 'a', { open: ({ node }) => `{@link ${node.properties['href']} `, close: () => '}', }, ], ['em', em], ['i', em], ['strong', strong], ['b', strong], ['br', { open: () => '\n' /* Ignore closing of <BR> */ }], ['li', { open: () => '- ', close: () => '\n' }], [ 'ul', { /* Ignore <UL> entirely. */ }, ], [ 'code', { open({ isFirstChild, isLastChild, parent }) { if (parent && isFirstChild && isLastChild && getNodeType(parent) === 'pre') { return ''; } return '`'; }, close({ isFirstChild, isLastChild, parent }) { if (parent && isFirstChild && isLastChild && getNodeType(parent) === 'pre') { return ''; } return '`'; }, }, ], ['pre', { open: () => '```\n', close: () => '\n```\n' }], ]; function one(node, context, nodeContext) { const handler = context.onTag.get(getNodeType(node)); if (!handler) { throw new Error(`While parsing comment: unknown node type (${getNodeType(node)}) for: ${JSON.stringify(node, undefined, 2)}.`); } if (handler.open) { context.result.push(handler.open(Object.assign(Object.assign({}, nodeContext), { node: node }))); } if (handler.value && node.value) { context.result.push(handler.value(node.value)); } if (node.children) { const p = node; for (let i = 0; i < p.children.length; ++i) { const child = p.children[i]; one(child, context, { isFirstChild: i === 0, isLastChild: i === p.children.length - 1, parent: node, }); } } if (handler.close) { context.result.push(handler.close(Object.assign(Object.assign({}, nodeContext), { node: node }))); } } function getNodeType(node) { return node.type === 'element' ? node.tagName : node.type; } // String Replacement: make sure we replace unicode strings as needed, wherever // they show up as text. const replacer = [ [/\\?\\n/g, '\n'], [/\\?\\u2014/gi, '\u2014'], [/\\?\\u2019/gi, '\u2019'], [/\\?\\u00A3/gi, '\u00A3'], ]; function unescape(str) { for (const [regexp, replacement] of replacer) { str = str.replace(regexp, replacement); } return str; } //# sourceMappingURL=comments.js.map