UNPKG

typopo

Version:

Fix frequent microtypography errors in multiple languages. Write neat texts without bothering about typography rules. Typopo works for English, German, Slovak, Czech and Rusyn language.

github.com/surfinzap/typopo

surfinzap/typopo

195 lines (162 loc) • 6.8 kB

JavaScript

import { base } from "../../const.js"; /** Fixes spaces around initials for First and up to two Middle names It won’t fix any other abbreviation. Algorithm [1] Identify and replace pattern "I. FullName" [2] Identify and replace pattern "I. I. FullName" [3] Identify and replace pattern "I. I. I. FullName" @param {string} input text for identification @returns {string} corrected output */ export function fixInitials(string, locale) { const initialPattern = `([${base.uppercaseChars}][${base.allChars}]?\\.)([${base.spaces}]?)`; const fullNamePattern = `([${base.allChars}]{2,}[^\\.])`; const patterns = [ // prettier-ignore { // "I. FullName" pattern: `${initialPattern}${fullNamePattern}`, replacement: `$1${base.nbsp}$3` }, { // "I. I. FullName" pattern: `${initialPattern}${initialPattern}${fullNamePattern}`, replacement: `$1${locale.spaceAfter.abbreviation}$3${base.space}$5`, }, { // "I. I. I. FullName" pattern: `${initialPattern}${initialPattern}${initialPattern}${fullNamePattern}`, replacement: `$1${locale.spaceAfter.abbreviation}$3${locale.spaceAfter.abbreviation}$5${base.space}$7`, }, ]; for (const { pattern, replacement } of patterns) { string = string.replace(new RegExp(pattern, "g"), replacement); } return string; } // /** Fixes spaces between multiple-word abbreviations. Each locale has its own pattern for fixing abbreviations, please refer to the test suites: - dots after each abbreviated word - locale-specific space between abbreviated words - normal space after the last abbreviated word Algorithm [1] Change multiple-word abbreviations from all locales abbr. patterns [2] Identify and fix multiple-word abbreviations before the word [3] Identify and fix multiple-word abbreviations after the word or on their own @param {string} input text for identification @returns {string} corrected output */ export function fixMultipleWordAbbreviations(string, locale) { /* Partial patterns for a composition */ let precedingNonLatinBoundary = `([^${base.allChars}${base.enDash}${base.emDash}]|^)`; let followingWord = `([${base.allChars}]|\\D)`; let followingNonLatinBoundary = `([^${base.allChars}${locale.leftDoubleQuote}${locale.leftSingleQuote}${base.backtick}\\p{Emoji}]|$)`; /* [1] Change multiple-word abbreviations from all locales to abbr. patterns */ let abbreviationPatterns = []; for (let i = 0; i < locale.multipleWordAbbreviations.length; i++) { let splitAbbreviation = locale.multipleWordAbbreviations[i].split(" "); let abbrevationPattern = ""; for (let j = 0; j < splitAbbreviation.length; j++) { abbrevationPattern += `(${splitAbbreviation[j]})(\\.)([${base.spaces}]?)`; } abbreviationPatterns[i] = abbrevationPattern; } /* [2] Identify multiple-word abbreviations before the word Algorithm as follows: * build up pattern by setting preceding and following boundaries * build replacement of concatenating * preceding boundary * n-1 word abbreviations where locale-specific space will be * nth abbreviation the will follow with a normal space * following boundary */ for (let i = 0; i < abbreviationPatterns.length; i++) { let pattern = `${precedingNonLatinBoundary}${abbreviationPatterns[i]}${followingWord}`; let replacement = "$1"; let abbrCount = (abbreviationPatterns[i].match(/\(/g) || []).length / 3; for (let j = 0; j < abbrCount - 1; j++) { replacement += `$${j * 3 + 2}.${locale.spaceAfter.abbreviation}`; } replacement += `$${(abbrCount - 1) * 3 + 2}. $${abbrCount * 3 + 2}`; string = string.replace(new RegExp(pattern, "gi"), replacement); } /* [3] Identify multiple-word abbreviations after the word Algorithm follows: * build up pattern by setting preceding and following boundaries * build replacement of concatenating * preceding boundary * n-1 word abbreviations where locale-specific space will be * nth abbreviation the will follow with no space * following boundary */ for (let i = 0; i < abbreviationPatterns.length; i++) { let pattern = `${precedingNonLatinBoundary}${abbreviationPatterns[i]}${followingNonLatinBoundary}`; let replacement = "$1"; let abbrCount = (abbreviationPatterns[i].match(/\(/g) || []).length / 3; for (let j = 0; j < abbrCount - 1; j++) { replacement += `$${j * 3 + 2}.${locale.spaceAfter.abbreviation}`; } replacement += `$${(abbrCount - 1) * 3 + 2}.$${abbrCount * 3 + 2}`; string = string.replace(new RegExp(pattern, "giu"), replacement); } return string; } // /** Fixes spaces between single-word abbreviations. Each locale has its own pattern for fixing abbreviations, please refer to the test suites. Algorithm [1] Change single-word abbreviations from all locales abbr. patterns [2] Identify and fix single-word abbreviations before the word [3] Identify and fix single-word abbreviations after the word or on their own @param {string} input text for identification @returns {string} corrected output */ export function fixSingleWordAbbreviations(string, locale) { /* [1] Change single-word abbreviations from all locales abbr. patterns */ let abbreviationPatterns = []; for (let i = 0; i < locale.singleWordAbbreviations.length; i++) { abbreviationPatterns[i] = `(${locale.singleWordAbbreviations[i]})(\\.)([${base.spaces}]?)`; } /* [2] Identify single-word abbreviations before the word */ // prettier-ignore let precedingNonLatinBoundary = `([^${base.allChars}${base.enDash}${base.emDash}${base.nbsp}\\.]|^)`; let followingWord = `([${base.allChars}\\d]+)([^\\.]|$)`; for (let i = 0; i < abbreviationPatterns.length; i++) { // prettier-ignore string = string.replace( new RegExp( `${precedingNonLatinBoundary}${abbreviationPatterns[i]}${followingWord}`, "gi" ), `$1$2$3${base.nbsp}$5$6` ); } /* [3] Identify single-word abbreviations after the word */ let precedingWord = `([${base.allChars}\\d])([${base.spaces}])`; let followingNonLatinBoundary = `([^${base.spaces}${base.allChars}\\d]|$)`; for (let i = 0; i < abbreviationPatterns.length; i++) { // prettier-ignore string = string.replace( new RegExp( `${precedingWord}${abbreviationPatterns[i]}${followingNonLatinBoundary}`, "gi" ), `$1${base.nbsp}$3$4$5$6` ); } return string; } // export function fixAbbreviations(string, locale) { string = fixInitials(string, locale); string = fixMultipleWordAbbreviations(string, locale); string = fixSingleWordAbbreviations(string, locale); return string; }