@antv/t8
Version:
T8 is a text visualization solution for unstructured data within the AntV technology stack, and it is a declarative T8 markdown syntax that can be used to describe the content of data interpretation reports.
487 lines (483 loc) • 16.5 kB
JavaScript
'use strict';
var paragraph = require('../schema/paragraph.js');
var phrase = require('../schema/phrase.js');
/**
* Parses a T8 Syntax string into a NarrativeTextSpec object.
*
* T8 Syntax supports:
* - Markdown-style headings (# to ######)
* - Paragraphs (text separated by blank lines)
* - Bullet lists (- or * for unordered, 1. 2. 3. for ordered)
* - Text formatting (**bold**, *italic*, __underline__)
* - Links [text](url)
* - Entity syntax: [displayText](entityType, key1=value1, key2="value2")
*
* @param syntaxString - The T8 Syntax string to parse
* @returns A NarrativeTextSpec object
*/
function parseSyntax(syntaxString) {
var lines = syntaxString.split('\n');
var sections = [];
var currentParagraphs = [];
var currentParagraphLines = [];
var currentBulletLines = [];
var inBulletList = false;
var bulletListIsOrdered = false;
var flushBulletList = function () {
if (currentBulletLines.length > 0) {
var bulletParagraph = parseBulletList(currentBulletLines, bulletListIsOrdered);
if (bulletParagraph) {
currentParagraphs.push(bulletParagraph);
}
currentBulletLines = [];
inBulletList = false;
}
};
var flushParagraph = function () {
if (currentParagraphLines.length > 0) {
var text = currentParagraphLines.join('\n').trim();
if (text) {
var paragraph = parseBlock(text);
if (paragraph) {
currentParagraphs.push(paragraph);
}
}
currentParagraphLines = [];
}
};
for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
var line = lines_1[_i];
var trimmedLine = line.trim();
// Check if it's a heading
var headingMatch = trimmedLine.match(/^(#{1,6})\s+(.+)$/);
if (headingMatch) {
// Flush any accumulated content
flushBulletList();
flushParagraph();
var level = headingMatch[1].length;
var content = headingMatch[2];
var phrases = parseInlineContent(content);
var headingType = "heading".concat(level);
currentParagraphs.push({
type: headingType,
phrases: phrases,
});
continue;
}
// Check if it's a bullet list item (unordered: - or *)
var unorderedBulletMatch = trimmedLine.match(/^[-*]\s+(.+)$/);
if (unorderedBulletMatch) {
flushParagraph();
if (!inBulletList) {
inBulletList = true;
bulletListIsOrdered = false;
}
currentBulletLines.push(line);
continue;
}
// Check if it's a numbered list item (ordered: 1. 2. 3.)
var orderedBulletMatch = trimmedLine.match(/^\d+\.\s+(.+)$/);
if (orderedBulletMatch) {
flushParagraph();
if (!inBulletList) {
inBulletList = true;
bulletListIsOrdered = true;
}
currentBulletLines.push(line);
continue;
}
// Blank line
if (trimmedLine === '') {
flushBulletList();
flushParagraph();
continue;
}
// Regular text line
if (inBulletList) {
// If we're in a bullet list and hit non-bullet text, end the list
flushBulletList();
}
currentParagraphLines.push(line);
}
// Flush any remaining content
flushBulletList();
flushParagraph();
// Create sections from paragraphs
if (currentParagraphs.length > 0) {
sections.push({ paragraphs: currentParagraphs });
}
return {
sections: sections,
};
}
/**
* Parses a block of text into a ParagraphSpec.
* Handles multi-line paragraphs that are not headings.
*/
function parseBlock(text) {
if (!text.trim()) {
return null;
}
var phrases = parseInlineContent(text);
return {
type: paragraph.ParagraphType.NORMAL,
phrases: phrases,
};
}
/**
* Parses bullet list lines into a BulletsParagraphSpec.
*/
function parseBulletList(lines, isOrdered) {
if (lines.length === 0) {
return null;
}
var bullets = [];
for (var _i = 0, lines_2 = lines; _i < lines_2.length; _i++) {
var line = lines_2[_i];
var trimmedLine = line.trim();
var content = '';
// Extract content after bullet marker
if (isOrdered) {
var match = trimmedLine.match(/^\d+\.\s+(.+)$/);
if (match) {
content = match[1];
}
}
else {
var match = trimmedLine.match(/^[-*]\s+(.+)$/);
if (match) {
content = match[1];
}
}
if (content) {
var phrases = parseInlineContent(content);
bullets.push({
type: 'bullet-item',
phrases: phrases,
});
}
}
if (bullets.length === 0) {
return null;
}
return {
type: paragraph.ParagraphType.BULLETS,
isOrder: isOrdered,
bullets: bullets,
};
}
/**
* Parses inline content (text with entity markers, formatting, and links) into an array of PhraseSpec.
* Handles:
* - Entity syntax: [displayText](entityType, key1=value1, key2="value2")
* - Links: [text](http://url) or [text](https://url)
* - Bold: **text**
* - Italic: *text*
* - Underline: __text__
*/
function parseInlineContent(text) {
var phrases = [];
// Regex to match [...](...) syntax (both entities and links)
var bracketRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
var lastIndex = 0;
var match;
while ((match = bracketRegex.exec(text)) !== null) {
// Add any text before this match
if (match.index > lastIndex) {
var beforeText = text.substring(lastIndex, match.index);
if (beforeText) {
phrases.push.apply(phrases, parseTextWithFormatting(beforeText));
}
}
var displayText = match[1];
var paramString = match[2];
// Determine if it's a link or an entity
// Links start with http://, https://, or /
if (paramString.startsWith('http://') || paramString.startsWith('https://') || paramString.startsWith('/')) {
// It's a link
phrases.push({
type: phrase.PhraseType.TEXT,
value: displayText,
url: paramString,
});
}
else {
// It's an entity - parse metadata
var metadata = parseEntityMetadata(paramString);
phrases.push({
type: phrase.PhraseType.ENTITY,
value: displayText,
metadata: metadata,
});
}
lastIndex = bracketRegex.lastIndex;
}
// Add any remaining text after the last match
if (lastIndex < text.length) {
var afterText = text.substring(lastIndex);
if (afterText) {
phrases.push.apply(phrases, parseTextWithFormatting(afterText));
}
}
return phrases;
}
/**
* Parses text with inline formatting (bold, italic, underline) into text phrases.
*/
function parseTextWithFormatting(text) {
var phrases = [];
// Parse formatting markers: **bold**, *italic*, __underline__
// We'll process these in order of priority to avoid conflicts
var currentIndex = 0;
var textLength = text.length;
while (currentIndex < textLength) {
var foundFormatting = false;
// Check for bold (**text**)
if (currentIndex + 1 < textLength && text.substring(currentIndex, currentIndex + 2) === '**') {
var endIndex = text.indexOf('**', currentIndex + 2);
if (endIndex !== -1) {
var content = text.substring(currentIndex + 2, endIndex);
phrases.push({
type: phrase.PhraseType.TEXT,
value: content,
bold: true,
});
currentIndex = endIndex + 2;
foundFormatting = true;
}
else {
// No closing marker found, treat the opening marker as plain text
phrases.push({
type: phrase.PhraseType.TEXT,
value: '**',
});
currentIndex += 2;
foundFormatting = true;
}
}
// Check for underline (__text__)
if (!foundFormatting && currentIndex + 1 < textLength && text.substring(currentIndex, currentIndex + 2) === '__') {
var endIndex = text.indexOf('__', currentIndex + 2);
if (endIndex !== -1) {
var content = text.substring(currentIndex + 2, endIndex);
phrases.push({
type: phrase.PhraseType.TEXT,
value: content,
underline: true,
});
currentIndex = endIndex + 2;
foundFormatting = true;
}
else {
// No closing marker found, treat the opening marker as plain text
phrases.push({
type: phrase.PhraseType.TEXT,
value: '__',
});
currentIndex += 2;
foundFormatting = true;
}
}
// Check for italic (*text*)
if (!foundFormatting && currentIndex + 1 < textLength && text[currentIndex] === '*') {
var endIndex = text.indexOf('*', currentIndex + 1);
if (endIndex !== -1) {
var content = text.substring(currentIndex + 1, endIndex);
phrases.push({
type: phrase.PhraseType.TEXT,
value: content,
italic: true,
});
currentIndex = endIndex + 1;
foundFormatting = true;
}
else {
// No closing marker found, treat the opening marker as plain text
phrases.push({
type: phrase.PhraseType.TEXT,
value: '*',
});
currentIndex += 1;
foundFormatting = true;
}
}
// If no formatting found, accumulate plain text until next formatting marker
if (!foundFormatting) {
var nextMarkerIndex = textLength;
var markers = ['**', '__', '*'];
for (var _i = 0, markers_1 = markers; _i < markers_1.length; _i++) {
var marker = markers_1[_i];
var markerIndex = text.indexOf(marker, currentIndex);
if (markerIndex !== -1 && markerIndex < nextMarkerIndex) {
nextMarkerIndex = markerIndex;
}
}
var plainText = text.substring(currentIndex, nextMarkerIndex);
if (plainText) {
phrases.push({
type: phrase.PhraseType.TEXT,
value: plainText,
});
}
// If we found a marker at currentIndex but couldn't process it (due to boundary conditions),
// we need to treat it as plain text and advance past it
if (nextMarkerIndex === currentIndex && currentIndex < textLength) {
// Determine marker length at current position
var markerLength = 1;
if (currentIndex + 1 < textLength) {
var twoChar = text.substring(currentIndex, currentIndex + 2);
if (twoChar === '**' || twoChar === '__') {
markerLength = 2;
}
}
phrases.push({
type: phrase.PhraseType.TEXT,
value: text.substring(currentIndex, currentIndex + markerLength),
});
currentIndex += markerLength;
}
else {
currentIndex = nextMarkerIndex;
}
// If we're at the end, break to avoid infinite loop
if (currentIndex === textLength) {
break;
}
}
}
return phrases;
}
/**
* Parses the metadata string from an entity definition.
* Format: entityType, key1=value1, key2="value2", key3=[1,2,3], key4={"a":1}
*
* @param metadataString - The metadata string to parse
* @returns An EntityMetaData object
*/
function parseEntityMetadata(metadataString) {
// Split by commas that are not inside brackets or quotes
var parts = smartSplit(metadataString);
if (parts.length === 0) {
throw new Error('Entity must have at least an entityType');
}
// First part is the entityType
var entityType = parts[0];
var metadata = {
entityType: entityType,
};
// Parse remaining key=value pairs
for (var i = 1; i < parts.length; i++) {
var pair = parts[i];
var eqIndex = pair.indexOf('=');
if (eqIndex === -1) {
continue; // Skip invalid pairs
}
var key = pair.substring(0, eqIndex).trim();
var valueStr = pair.substring(eqIndex + 1).trim();
// Parse the value
var value = parseMetadataValue(valueStr);
// Add to metadata
// eslint-disable-next-line @typescript-eslint/no-explicit-any
metadata[key] = value;
}
return metadata;
}
/**
* Splits a string by commas, but ignores commas inside brackets/braces/quotes.
*/
function smartSplit(str) {
var result = [];
var current = '';
var depth = 0; // Track nesting depth of brackets/braces
var inQuotes = false;
var quoteChar = '';
for (var i = 0; i < str.length; i++) {
var char = str[i];
var prevChar = i > 0 ? str[i - 1] : '';
// Handle quotes
if ((char === '"' || char === "'") && prevChar !== '\\') {
if (!inQuotes) {
inQuotes = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuotes = false;
quoteChar = '';
}
current += char;
continue;
}
// If we're inside quotes, just add the character
if (inQuotes) {
current += char;
continue;
}
// Track bracket/brace depth
if (char === '[' || char === '{') {
depth++;
current += char;
}
else if (char === ']' || char === '}') {
depth--;
current += char;
}
else if (char === ',' && depth === 0) {
// Only split on commas at depth 0 (not inside brackets/braces)
result.push(current.trim());
current = '';
}
else {
current += char;
}
}
// Add the last part
if (current.trim()) {
result.push(current.trim());
}
return result;
}
/**
* Parses a metadata value, handling strings, numbers, booleans, arrays, and objects.
*/
function parseMetadataValue(valueStr) {
valueStr = valueStr.trim();
// Check if it's a quoted string
if ((valueStr.startsWith('"') && valueStr.endsWith('"')) || (valueStr.startsWith("'") && valueStr.endsWith("'"))) {
return valueStr.substring(1, valueStr.length - 1);
}
// Check if it's a boolean
if (valueStr === 'true') {
return true;
}
if (valueStr === 'false') {
return false;
}
// Check if it's an array
if (valueStr.startsWith('[') && valueStr.endsWith(']')) {
try {
return JSON.parse(valueStr);
}
catch (_a) {
// If JSON parsing fails, return as string
return valueStr;
}
}
// Check if it's an object
if (valueStr.startsWith('{') && valueStr.endsWith('}')) {
try {
return JSON.parse(valueStr);
}
catch (_b) {
// If JSON parsing fails, return as string
return valueStr;
}
}
// Check if it's a number
if (!isNaN(Number(valueStr)) && valueStr !== '') {
return Number(valueStr);
}
// Return as string
return valueStr;
}
exports.parseSyntax = parseSyntax;
//# sourceMappingURL=syntax-parser.js.map