UNPKG

@antv/t8

Version:

T8 is a text visualization solution for unstructured data within the AntV technology stack, and it is a declarative T8 markdown syntax that can be used to describe the content of data interpretation reports.

487 lines (483 loc) 16.5 kB
'use strict'; var paragraph = require('../schema/paragraph.js'); var phrase = require('../schema/phrase.js'); /** * Parses a T8 Syntax string into a NarrativeTextSpec object. * * T8 Syntax supports: * - Markdown-style headings (# to ######) * - Paragraphs (text separated by blank lines) * - Bullet lists (- or * for unordered, 1. 2. 3. for ordered) * - Text formatting (**bold**, *italic*, __underline__) * - Links [text](url) * - Entity syntax: [displayText](entityType, key1=value1, key2="value2") * * @param syntaxString - The T8 Syntax string to parse * @returns A NarrativeTextSpec object */ function parseSyntax(syntaxString) { var lines = syntaxString.split('\n'); var sections = []; var currentParagraphs = []; var currentParagraphLines = []; var currentBulletLines = []; var inBulletList = false; var bulletListIsOrdered = false; var flushBulletList = function () { if (currentBulletLines.length > 0) { var bulletParagraph = parseBulletList(currentBulletLines, bulletListIsOrdered); if (bulletParagraph) { currentParagraphs.push(bulletParagraph); } currentBulletLines = []; inBulletList = false; } }; var flushParagraph = function () { if (currentParagraphLines.length > 0) { var text = currentParagraphLines.join('\n').trim(); if (text) { var paragraph = parseBlock(text); if (paragraph) { currentParagraphs.push(paragraph); } } currentParagraphLines = []; } }; for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) { var line = lines_1[_i]; var trimmedLine = line.trim(); // Check if it's a heading var headingMatch = trimmedLine.match(/^(#{1,6})\s+(.+)$/); if (headingMatch) { // Flush any accumulated content flushBulletList(); flushParagraph(); var level = headingMatch[1].length; var content = headingMatch[2]; var phrases = parseInlineContent(content); var headingType = "heading".concat(level); currentParagraphs.push({ type: headingType, phrases: phrases, }); continue; } // Check if it's a bullet list item (unordered: - or *) var unorderedBulletMatch = trimmedLine.match(/^[-*]\s+(.+)$/); if (unorderedBulletMatch) { flushParagraph(); if (!inBulletList) { inBulletList = true; bulletListIsOrdered = false; } currentBulletLines.push(line); continue; } // Check if it's a numbered list item (ordered: 1. 2. 3.) var orderedBulletMatch = trimmedLine.match(/^\d+\.\s+(.+)$/); if (orderedBulletMatch) { flushParagraph(); if (!inBulletList) { inBulletList = true; bulletListIsOrdered = true; } currentBulletLines.push(line); continue; } // Blank line if (trimmedLine === '') { flushBulletList(); flushParagraph(); continue; } // Regular text line if (inBulletList) { // If we're in a bullet list and hit non-bullet text, end the list flushBulletList(); } currentParagraphLines.push(line); } // Flush any remaining content flushBulletList(); flushParagraph(); // Create sections from paragraphs if (currentParagraphs.length > 0) { sections.push({ paragraphs: currentParagraphs }); } return { sections: sections, }; } /** * Parses a block of text into a ParagraphSpec. * Handles multi-line paragraphs that are not headings. */ function parseBlock(text) { if (!text.trim()) { return null; } var phrases = parseInlineContent(text); return { type: paragraph.ParagraphType.NORMAL, phrases: phrases, }; } /** * Parses bullet list lines into a BulletsParagraphSpec. */ function parseBulletList(lines, isOrdered) { if (lines.length === 0) { return null; } var bullets = []; for (var _i = 0, lines_2 = lines; _i < lines_2.length; _i++) { var line = lines_2[_i]; var trimmedLine = line.trim(); var content = ''; // Extract content after bullet marker if (isOrdered) { var match = trimmedLine.match(/^\d+\.\s+(.+)$/); if (match) { content = match[1]; } } else { var match = trimmedLine.match(/^[-*]\s+(.+)$/); if (match) { content = match[1]; } } if (content) { var phrases = parseInlineContent(content); bullets.push({ type: 'bullet-item', phrases: phrases, }); } } if (bullets.length === 0) { return null; } return { type: paragraph.ParagraphType.BULLETS, isOrder: isOrdered, bullets: bullets, }; } /** * Parses inline content (text with entity markers, formatting, and links) into an array of PhraseSpec. * Handles: * - Entity syntax: [displayText](entityType, key1=value1, key2="value2") * - Links: [text](http://url) or [text](https://url) * - Bold: **text** * - Italic: *text* * - Underline: __text__ */ function parseInlineContent(text) { var phrases = []; // Regex to match [...](...) syntax (both entities and links) var bracketRegex = /\[([^\]]+)\]\(([^)]+)\)/g; var lastIndex = 0; var match; while ((match = bracketRegex.exec(text)) !== null) { // Add any text before this match if (match.index > lastIndex) { var beforeText = text.substring(lastIndex, match.index); if (beforeText) { phrases.push.apply(phrases, parseTextWithFormatting(beforeText)); } } var displayText = match[1]; var paramString = match[2]; // Determine if it's a link or an entity // Links start with http://, https://, or / if (paramString.startsWith('http://') || paramString.startsWith('https://') || paramString.startsWith('/')) { // It's a link phrases.push({ type: phrase.PhraseType.TEXT, value: displayText, url: paramString, }); } else { // It's an entity - parse metadata var metadata = parseEntityMetadata(paramString); phrases.push({ type: phrase.PhraseType.ENTITY, value: displayText, metadata: metadata, }); } lastIndex = bracketRegex.lastIndex; } // Add any remaining text after the last match if (lastIndex < text.length) { var afterText = text.substring(lastIndex); if (afterText) { phrases.push.apply(phrases, parseTextWithFormatting(afterText)); } } return phrases; } /** * Parses text with inline formatting (bold, italic, underline) into text phrases. */ function parseTextWithFormatting(text) { var phrases = []; // Parse formatting markers: **bold**, *italic*, __underline__ // We'll process these in order of priority to avoid conflicts var currentIndex = 0; var textLength = text.length; while (currentIndex < textLength) { var foundFormatting = false; // Check for bold (**text**) if (currentIndex + 1 < textLength && text.substring(currentIndex, currentIndex + 2) === '**') { var endIndex = text.indexOf('**', currentIndex + 2); if (endIndex !== -1) { var content = text.substring(currentIndex + 2, endIndex); phrases.push({ type: phrase.PhraseType.TEXT, value: content, bold: true, }); currentIndex = endIndex + 2; foundFormatting = true; } else { // No closing marker found, treat the opening marker as plain text phrases.push({ type: phrase.PhraseType.TEXT, value: '**', }); currentIndex += 2; foundFormatting = true; } } // Check for underline (__text__) if (!foundFormatting && currentIndex + 1 < textLength && text.substring(currentIndex, currentIndex + 2) === '__') { var endIndex = text.indexOf('__', currentIndex + 2); if (endIndex !== -1) { var content = text.substring(currentIndex + 2, endIndex); phrases.push({ type: phrase.PhraseType.TEXT, value: content, underline: true, }); currentIndex = endIndex + 2; foundFormatting = true; } else { // No closing marker found, treat the opening marker as plain text phrases.push({ type: phrase.PhraseType.TEXT, value: '__', }); currentIndex += 2; foundFormatting = true; } } // Check for italic (*text*) if (!foundFormatting && currentIndex + 1 < textLength && text[currentIndex] === '*') { var endIndex = text.indexOf('*', currentIndex + 1); if (endIndex !== -1) { var content = text.substring(currentIndex + 1, endIndex); phrases.push({ type: phrase.PhraseType.TEXT, value: content, italic: true, }); currentIndex = endIndex + 1; foundFormatting = true; } else { // No closing marker found, treat the opening marker as plain text phrases.push({ type: phrase.PhraseType.TEXT, value: '*', }); currentIndex += 1; foundFormatting = true; } } // If no formatting found, accumulate plain text until next formatting marker if (!foundFormatting) { var nextMarkerIndex = textLength; var markers = ['**', '__', '*']; for (var _i = 0, markers_1 = markers; _i < markers_1.length; _i++) { var marker = markers_1[_i]; var markerIndex = text.indexOf(marker, currentIndex); if (markerIndex !== -1 && markerIndex < nextMarkerIndex) { nextMarkerIndex = markerIndex; } } var plainText = text.substring(currentIndex, nextMarkerIndex); if (plainText) { phrases.push({ type: phrase.PhraseType.TEXT, value: plainText, }); } // If we found a marker at currentIndex but couldn't process it (due to boundary conditions), // we need to treat it as plain text and advance past it if (nextMarkerIndex === currentIndex && currentIndex < textLength) { // Determine marker length at current position var markerLength = 1; if (currentIndex + 1 < textLength) { var twoChar = text.substring(currentIndex, currentIndex + 2); if (twoChar === '**' || twoChar === '__') { markerLength = 2; } } phrases.push({ type: phrase.PhraseType.TEXT, value: text.substring(currentIndex, currentIndex + markerLength), }); currentIndex += markerLength; } else { currentIndex = nextMarkerIndex; } // If we're at the end, break to avoid infinite loop if (currentIndex === textLength) { break; } } } return phrases; } /** * Parses the metadata string from an entity definition. * Format: entityType, key1=value1, key2="value2", key3=[1,2,3], key4={"a":1} * * @param metadataString - The metadata string to parse * @returns An EntityMetaData object */ function parseEntityMetadata(metadataString) { // Split by commas that are not inside brackets or quotes var parts = smartSplit(metadataString); if (parts.length === 0) { throw new Error('Entity must have at least an entityType'); } // First part is the entityType var entityType = parts[0]; var metadata = { entityType: entityType, }; // Parse remaining key=value pairs for (var i = 1; i < parts.length; i++) { var pair = parts[i]; var eqIndex = pair.indexOf('='); if (eqIndex === -1) { continue; // Skip invalid pairs } var key = pair.substring(0, eqIndex).trim(); var valueStr = pair.substring(eqIndex + 1).trim(); // Parse the value var value = parseMetadataValue(valueStr); // Add to metadata // eslint-disable-next-line @typescript-eslint/no-explicit-any metadata[key] = value; } return metadata; } /** * Splits a string by commas, but ignores commas inside brackets/braces/quotes. */ function smartSplit(str) { var result = []; var current = ''; var depth = 0; // Track nesting depth of brackets/braces var inQuotes = false; var quoteChar = ''; for (var i = 0; i < str.length; i++) { var char = str[i]; var prevChar = i > 0 ? str[i - 1] : ''; // Handle quotes if ((char === '"' || char === "'") && prevChar !== '\\') { if (!inQuotes) { inQuotes = true; quoteChar = char; } else if (char === quoteChar) { inQuotes = false; quoteChar = ''; } current += char; continue; } // If we're inside quotes, just add the character if (inQuotes) { current += char; continue; } // Track bracket/brace depth if (char === '[' || char === '{') { depth++; current += char; } else if (char === ']' || char === '}') { depth--; current += char; } else if (char === ',' && depth === 0) { // Only split on commas at depth 0 (not inside brackets/braces) result.push(current.trim()); current = ''; } else { current += char; } } // Add the last part if (current.trim()) { result.push(current.trim()); } return result; } /** * Parses a metadata value, handling strings, numbers, booleans, arrays, and objects. */ function parseMetadataValue(valueStr) { valueStr = valueStr.trim(); // Check if it's a quoted string if ((valueStr.startsWith('"') && valueStr.endsWith('"')) || (valueStr.startsWith("'") && valueStr.endsWith("'"))) { return valueStr.substring(1, valueStr.length - 1); } // Check if it's a boolean if (valueStr === 'true') { return true; } if (valueStr === 'false') { return false; } // Check if it's an array if (valueStr.startsWith('[') && valueStr.endsWith(']')) { try { return JSON.parse(valueStr); } catch (_a) { // If JSON parsing fails, return as string return valueStr; } } // Check if it's an object if (valueStr.startsWith('{') && valueStr.endsWith('}')) { try { return JSON.parse(valueStr); } catch (_b) { // If JSON parsing fails, return as string return valueStr; } } // Check if it's a number if (!isNaN(Number(valueStr)) && valueStr !== '') { return Number(valueStr); } // Return as string return valueStr; } exports.parseSyntax = parseSyntax; //# sourceMappingURL=syntax-parser.js.map