@mitre/nuxt-smartscript
Version:
Smart typography transformations for Nuxt - automatic superscript, subscript, and symbol formatting
151 lines (150 loc) • 6.76 kB
JavaScript
import { logger } from "./logger.js";
const TRADEMARK_PATTERN = /™|\(TM\)/;
const REGISTERED_PATTERN = /®|\(R\)(?!\))/;
const COPYRIGHT_PATTERN = /©|\(C\)(?!\))/;
const ORDINALS_PATTERN = /\b(\d+)(st|nd|rd|th)\b/;
const CHEMICALS_PATTERN = /([A-Z][a-z]?)(\d+)|\)(\d+)/;
const MATH_SUPER_PATTERN = /(?<=^|[\s=+\-*/().,\da-z])([a-zA-Z])\^(\d+|[a-zA-Z]|\{[^}]+\})/;
const MATH_SUB_PATTERN = /(?<=^|[\s=+\-*/().,])([a-zA-Z])_(\d+|[a-zA-Z]|\{[^}]+\})/;
const TRADEMARK_VALIDATE = /^(?:™|\(TM\))$/;
const REGISTERED_VALIDATE = /^(?:®|\(R\))$/;
const ORDINAL_VALIDATE = /^\d+(?:st|nd|rd|th)$/;
const CHEMICAL_ELEMENT_VALIDATE = /^[A-Z][a-z]?\d+$/;
const CHEMICAL_PARENS_VALIDATE = /^\)\d+$/;
const MATH_SUPER_VALIDATE = /^[a-z]\^/i;
const MATH_SUB_VALIDATE = /^[a-z]_/i;
const ORDINAL_EXTRACT = /^(\d+)(st|nd|rd|th)$/;
const CHEMICAL_ELEMENT_EXTRACT = /^([A-Z][a-z]?)(\d+)$/;
const CHEMICAL_PARENS_EXTRACT = /^\)(\d+)$/;
const MATH_VARIABLE_EXTRACT = /^([a-z])[\^_](.+)$/i;
function safeCreateRegex(pattern, defaultPattern, name) {
if (!pattern) {
return new RegExp(defaultPattern.source, "g");
}
try {
const regex = new RegExp(pattern, "g");
"test".match(regex);
logger.debug("Custom pattern applied for:", name, "\u2192", pattern);
return regex;
} catch (error) {
logger.warn("Invalid custom pattern for:", name, "\u2192", pattern);
logger.debug("Pattern validation error:", error);
logger.info("Using default pattern for:", name);
return new RegExp(defaultPattern.source, "g");
}
}
export function createPatterns(config) {
const NEVER_MATCH = /\b\B/g;
const transforms = {
trademark: config.transformations?.trademark !== false,
registered: config.transformations?.registered !== false,
copyright: config.transformations?.copyright !== false,
ordinals: config.transformations?.ordinals !== false,
chemicals: config.transformations?.chemicals !== false,
mathSuper: config.transformations?.mathSuper !== false,
mathSub: config.transformations?.mathSub !== false
};
const custom = config.customPatterns || {};
return {
// Matches ™, (TM), or standalone TM
trademark: transforms.trademark ? safeCreateRegex(custom.trademark, TRADEMARK_PATTERN, "trademark") : NEVER_MATCH,
// Matches ®, (R) but not (R))
registered: transforms.registered ? safeCreateRegex(custom.registered, REGISTERED_PATTERN, "registered") : NEVER_MATCH,
// Matches ©, (C) but not (C))
copyright: transforms.copyright ? safeCreateRegex(custom.copyright, COPYRIGHT_PATTERN, "copyright") : NEVER_MATCH,
// Matches ordinal numbers (1st, 2nd, 3rd, 4th, etc.)
ordinals: transforms.ordinals ? safeCreateRegex(custom.ordinals, ORDINALS_PATTERN, "ordinals") : NEVER_MATCH,
// Matches chemical formulas: H2, SO4, )3
chemicals: transforms.chemicals ? safeCreateRegex(custom.chemicals, CHEMICALS_PATTERN, "chemicals") : NEVER_MATCH,
// Matches math superscript notation: x^2, x^n, x^{expr}
// Pattern: /(?<=^|[\s=+\-*/().,\d]|[a-z])([a-zA-Z])\^(\d+|[a-zA-Z]|\{[^}]+\})/g
//
// Breakdown:
// - (?<=...) - Positive lookbehind to ensure proper context
// - ^|[\s=+\-*/().,\d] - After start of string, whitespace, operators, or digits
// - |[a-z] - OR after lowercase letter (enables E=mc^2 to match c^2)
// - ([a-zA-Z]) - Capture group 1: single letter variable
// - \^ - Literal caret symbol
// - (...) - Capture group 2: the exponent, which can be:
// - \d+ - One or more digits (x^2, x^10)
// - [a-zA-Z] - Single letter (x^n, x^i)
// - \{[^}]+\} - Expression in braces (x^{n+1}, x^{10})
//
// Examples that MATCH:
// - "x^2" → ["x^2"]
// - "E=mc^2" → ["c^2"] (after lowercase 'm')
// - "2x^2" → ["x^2"] (after digit)
// - "f(x)=x^2" → ["x^2"] (after equals)
//
// Examples that DON'T MATCH:
// - "file^name" - 'e' is after 'l' but we still match (limitation)
// - "MAX^2" - 'X' is after uppercase 'A' (blocked by lookbehind)
mathSuper: transforms.mathSuper ? safeCreateRegex(custom.mathSuper, MATH_SUPER_PATTERN, "mathSuper") : NEVER_MATCH,
// Matches math subscript notation: x_1, x_n, x_{expr}
// Pattern uses lookbehind to prevent matching in identifiers
// Examples: "x_1" → match, "file_name" → no match
mathSub: transforms.mathSub ? safeCreateRegex(custom.mathSub, MATH_SUB_PATTERN, "mathSub") : NEVER_MATCH
};
}
export function createCombinedPattern(patterns, _config) {
const sources = [];
if (patterns.trademark.source !== "\\b\\B")
sources.push(patterns.trademark.source);
if (patterns.registered.source !== "\\b\\B")
sources.push(patterns.registered.source);
if (patterns.copyright.source !== "\\b\\B")
sources.push(patterns.copyright.source);
if (patterns.ordinals.source !== "\\b\\B")
sources.push(patterns.ordinals.source);
if (patterns.chemicals.source !== "\\b\\B")
sources.push(patterns.chemicals.source);
if (patterns.mathSuper.source !== "\\b\\B")
sources.push(patterns.mathSuper.source);
if (patterns.mathSub.source !== "\\b\\B")
sources.push(patterns.mathSub.source);
if (sources.length === 0) {
return /\b\B/g;
}
const combinedSource = sources.join("|");
return new RegExp(combinedSource, "g");
}
function stripBraces(text) {
return text.replace(/[{}]/g, "");
}
export const PatternMatchers = {
isTrademark: (text) => TRADEMARK_VALIDATE.test(text),
isRegistered: (text) => REGISTERED_VALIDATE.test(text),
isCopyright: (text) => COPYRIGHT_PATTERN.test(text),
isOrdinal: (text) => ORDINAL_VALIDATE.test(text),
isChemicalElement: (text) => CHEMICAL_ELEMENT_VALIDATE.test(text),
isChemicalParentheses: (text) => CHEMICAL_PARENS_VALIDATE.test(text),
isMathSuperscript: (text) => MATH_SUPER_VALIDATE.test(text),
isMathSubscript: (text) => MATH_SUB_VALIDATE.test(text)
};
export const PatternExtractors = {
extractOrdinal: (text) => {
const match = text.match(ORDINAL_EXTRACT);
return match && match[1] && match[2] ? { number: match[1], suffix: match[2] } : null;
},
extractChemicalElement: (text) => {
const match = text.match(CHEMICAL_ELEMENT_EXTRACT);
return match && match[1] && match[2] ? { element: match[1], count: match[2] } : null;
},
extractChemicalParentheses: (text) => {
const match = text.match(CHEMICAL_PARENS_EXTRACT);
return match && match[1] ? match[1] : null;
},
extractMathScript: (text) => {
return stripBraces(text.substring(1));
},
extractMathWithVariable: (text) => {
const match = text.match(MATH_VARIABLE_EXTRACT);
if (match && match[1] && match[2]) {
return {
variable: match[1],
script: stripBraces(match[2])
};
}
return null;
}
};