UNPKG

@mitre/nuxt-smartscript

Version:

Smart typography transformations for Nuxt - automatic superscript, subscript, and symbol formatting

151 lines (150 loc) 6.76 kB
import { logger } from "./logger.js"; const TRADEMARK_PATTERN = /™|\(TM\)/; const REGISTERED_PATTERN = /®|\(R\)(?!\))/; const COPYRIGHT_PATTERN = /©|\(C\)(?!\))/; const ORDINALS_PATTERN = /\b(\d+)(st|nd|rd|th)\b/; const CHEMICALS_PATTERN = /([A-Z][a-z]?)(\d+)|\)(\d+)/; const MATH_SUPER_PATTERN = /(?<=^|[\s=+\-*/().,\da-z])([a-zA-Z])\^(\d+|[a-zA-Z]|\{[^}]+\})/; const MATH_SUB_PATTERN = /(?<=^|[\s=+\-*/().,])([a-zA-Z])_(\d+|[a-zA-Z]|\{[^}]+\})/; const TRADEMARK_VALIDATE = /^(?:™|\(TM\))$/; const REGISTERED_VALIDATE = /^(?:®|\(R\))$/; const ORDINAL_VALIDATE = /^\d+(?:st|nd|rd|th)$/; const CHEMICAL_ELEMENT_VALIDATE = /^[A-Z][a-z]?\d+$/; const CHEMICAL_PARENS_VALIDATE = /^\)\d+$/; const MATH_SUPER_VALIDATE = /^[a-z]\^/i; const MATH_SUB_VALIDATE = /^[a-z]_/i; const ORDINAL_EXTRACT = /^(\d+)(st|nd|rd|th)$/; const CHEMICAL_ELEMENT_EXTRACT = /^([A-Z][a-z]?)(\d+)$/; const CHEMICAL_PARENS_EXTRACT = /^\)(\d+)$/; const MATH_VARIABLE_EXTRACT = /^([a-z])[\^_](.+)$/i; function safeCreateRegex(pattern, defaultPattern, name) { if (!pattern) { return new RegExp(defaultPattern.source, "g"); } try { const regex = new RegExp(pattern, "g"); "test".match(regex); logger.debug("Custom pattern applied for:", name, "\u2192", pattern); return regex; } catch (error) { logger.warn("Invalid custom pattern for:", name, "\u2192", pattern); logger.debug("Pattern validation error:", error); logger.info("Using default pattern for:", name); return new RegExp(defaultPattern.source, "g"); } } export function createPatterns(config) { const NEVER_MATCH = /\b\B/g; const transforms = { trademark: config.transformations?.trademark !== false, registered: config.transformations?.registered !== false, copyright: config.transformations?.copyright !== false, ordinals: config.transformations?.ordinals !== false, chemicals: config.transformations?.chemicals !== false, mathSuper: config.transformations?.mathSuper !== false, mathSub: config.transformations?.mathSub !== false }; const custom = config.customPatterns || {}; return { // Matches ™, (TM), or standalone TM trademark: transforms.trademark ? safeCreateRegex(custom.trademark, TRADEMARK_PATTERN, "trademark") : NEVER_MATCH, // Matches ®, (R) but not (R)) registered: transforms.registered ? safeCreateRegex(custom.registered, REGISTERED_PATTERN, "registered") : NEVER_MATCH, // Matches ©, (C) but not (C)) copyright: transforms.copyright ? safeCreateRegex(custom.copyright, COPYRIGHT_PATTERN, "copyright") : NEVER_MATCH, // Matches ordinal numbers (1st, 2nd, 3rd, 4th, etc.) ordinals: transforms.ordinals ? safeCreateRegex(custom.ordinals, ORDINALS_PATTERN, "ordinals") : NEVER_MATCH, // Matches chemical formulas: H2, SO4, )3 chemicals: transforms.chemicals ? safeCreateRegex(custom.chemicals, CHEMICALS_PATTERN, "chemicals") : NEVER_MATCH, // Matches math superscript notation: x^2, x^n, x^{expr} // Pattern: /(?<=^|[\s=+\-*/().,\d]|[a-z])([a-zA-Z])\^(\d+|[a-zA-Z]|\{[^}]+\})/g // // Breakdown: // - (?<=...) - Positive lookbehind to ensure proper context // - ^|[\s=+\-*/().,\d] - After start of string, whitespace, operators, or digits // - |[a-z] - OR after lowercase letter (enables E=mc^2 to match c^2) // - ([a-zA-Z]) - Capture group 1: single letter variable // - \^ - Literal caret symbol // - (...) - Capture group 2: the exponent, which can be: // - \d+ - One or more digits (x^2, x^10) // - [a-zA-Z] - Single letter (x^n, x^i) // - \{[^}]+\} - Expression in braces (x^{n+1}, x^{10}) // // Examples that MATCH: // - "x^2" → ["x^2"] // - "E=mc^2" → ["c^2"] (after lowercase 'm') // - "2x^2" → ["x^2"] (after digit) // - "f(x)=x^2" → ["x^2"] (after equals) // // Examples that DON'T MATCH: // - "file^name" - 'e' is after 'l' but we still match (limitation) // - "MAX^2" - 'X' is after uppercase 'A' (blocked by lookbehind) mathSuper: transforms.mathSuper ? safeCreateRegex(custom.mathSuper, MATH_SUPER_PATTERN, "mathSuper") : NEVER_MATCH, // Matches math subscript notation: x_1, x_n, x_{expr} // Pattern uses lookbehind to prevent matching in identifiers // Examples: "x_1" → match, "file_name" → no match mathSub: transforms.mathSub ? safeCreateRegex(custom.mathSub, MATH_SUB_PATTERN, "mathSub") : NEVER_MATCH }; } export function createCombinedPattern(patterns, _config) { const sources = []; if (patterns.trademark.source !== "\\b\\B") sources.push(patterns.trademark.source); if (patterns.registered.source !== "\\b\\B") sources.push(patterns.registered.source); if (patterns.copyright.source !== "\\b\\B") sources.push(patterns.copyright.source); if (patterns.ordinals.source !== "\\b\\B") sources.push(patterns.ordinals.source); if (patterns.chemicals.source !== "\\b\\B") sources.push(patterns.chemicals.source); if (patterns.mathSuper.source !== "\\b\\B") sources.push(patterns.mathSuper.source); if (patterns.mathSub.source !== "\\b\\B") sources.push(patterns.mathSub.source); if (sources.length === 0) { return /\b\B/g; } const combinedSource = sources.join("|"); return new RegExp(combinedSource, "g"); } function stripBraces(text) { return text.replace(/[{}]/g, ""); } export const PatternMatchers = { isTrademark: (text) => TRADEMARK_VALIDATE.test(text), isRegistered: (text) => REGISTERED_VALIDATE.test(text), isCopyright: (text) => COPYRIGHT_PATTERN.test(text), isOrdinal: (text) => ORDINAL_VALIDATE.test(text), isChemicalElement: (text) => CHEMICAL_ELEMENT_VALIDATE.test(text), isChemicalParentheses: (text) => CHEMICAL_PARENS_VALIDATE.test(text), isMathSuperscript: (text) => MATH_SUPER_VALIDATE.test(text), isMathSubscript: (text) => MATH_SUB_VALIDATE.test(text) }; export const PatternExtractors = { extractOrdinal: (text) => { const match = text.match(ORDINAL_EXTRACT); return match && match[1] && match[2] ? { number: match[1], suffix: match[2] } : null; }, extractChemicalElement: (text) => { const match = text.match(CHEMICAL_ELEMENT_EXTRACT); return match && match[1] && match[2] ? { element: match[1], count: match[2] } : null; }, extractChemicalParentheses: (text) => { const match = text.match(CHEMICAL_PARENS_EXTRACT); return match && match[1] ? match[1] : null; }, extractMathScript: (text) => { return stripBraces(text.substring(1)); }, extractMathWithVariable: (text) => { const match = text.match(MATH_VARIABLE_EXTRACT); if (match && match[1] && match[2]) { return { variable: match[1], script: stripBraces(match[2]) }; } return null; } };