UNPKG

typopo

Version:

Fix frequent microtypography errors in multiple languages. Write neat texts without bothering about typography rules. Typopo works for English, German, Slovak, Czech and Rusyn language.

291 lines (256 loc) 8.54 kB
import { replaceWithOverlapHandling } from "../../utils/regex-overlap.js"; import { base } from "../../const.js"; // /** Identify: - improperly used hyphen with spaces around - improperly used or spaced en dash - improperly used or spaced em dash between words, or, between a word and a nummber and fix dash and spacing for given locale Example see tests Exceptions - improperly spaced dash in words such as "e-shop", e.g. "e -shop" (we fix this in hyphen.js) - hyphens at the beginning of the paragraph that indicate unordered list @param {string} string: input text for identification @param {string} locale: locale option @returns {string} output with fixed dashes and spaces between words */ export function fixDashesBetweenWords(string, locale) { // prettier-ignore return string.replace( new RegExp( `([${base.allChars}\\d])` + `(` + `[${base.spaces}]*[${base.enDash}${base.emDash}]{1,3}[${base.spaces}]*` + `|` + `[${base.spaces}]+[${base.hyphen}]{1,3}[${base.spaces}]+` + `)` + `([${base.allChars}\\d])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$3` ); } // /** Replace hyphen or dash placed between a word and punctuation, or placed at the end of a paragaph. Examples (en-us): so there is a dash -, → so there is a dash—, so there is a dash-, → so there is a dash—, so there is a dash -? → so there is a dash—? so there is a dash - → so there is a dash— @param {string} string — input text for identification @param {string} locale: locale option @returns {string} — output with locale-specific dash and spacing between a word and a punctuation. */ export function fixDashBetweenWordAndPunctuation(string, locale) { // prettier-ignore return string.replace( new RegExp( `([${base.allChars}])` + `([${base.spaces}]?)` + `([${base.hyphen}${base.enDash}${base.emDash}]{1,3})` + `([${base.spaces}]?)` + `([${base.sentencePunctuation}\\n\\r])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}$5` ); } // /** Replace hyphen or dash, placed between words and brackets, with locale-specific dash and spacing. Examples (en-us): word-(bracket → word—(bracket bracket)-word → bracket)—word word-) → word—) (-word → (—word word)-(word → word)—(word Special case - dashes entirely within brackets preserve dash type, only remove spaces: ( - ) → (-) [ – ] → [–] { — } → {—} @param {string} string — input text for identification @param {string} locale — locale option @returns {string} — output with locale-specific dash and spacing between words and brackets */ export function fixDashBetweenWordAndBrackets(string, locale) { // Dashes entirely within brackets // Only remove spaces, but preserve the original dash type // prettier-ignore string = string.replace( new RegExp( `([${base.openingBrackets}])` + `[${base.spaces}]*` + `([${base.hyphen}${base.enDash}${base.emDash}]+)` + `[${base.spaces}]*` + `([${base.closingBrackets}])`, "g" ), `$1$2$3` ); // Fix word followed by dash followed by opening bracket // prettier-ignore string = string.replace( new RegExp( `([${base.allChars}])` + `[${base.spaces}]*` + `[${base.hyphen}${base.enDash}${base.emDash}]{1,3}` + `[${base.spaces}]*` + `([${base.openingBrackets}])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$2` ); // Fix closing bracket followed by dash followed by word // prettier-ignore string = string.replace( new RegExp( `([${base.closingBrackets}])` + `[${base.spaces}]*` + `[${base.hyphen}${base.enDash}${base.emDash}]{1,3}` + `[${base.spaces}]*` + `([${base.allChars}])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$2` ); // Fix word followed by dash followed by closing bracket // prettier-ignore string = string.replace( new RegExp( `([${base.allChars}])` + `[${base.spaces}]*` + `[${base.hyphen}${base.enDash}${base.emDash}]{1,3}` + `[${base.spaces}]*` + `([${base.closingBrackets}])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$2` ); // Fix opening bracket followed by dash followed by word // prettier-ignore string = string.replace( new RegExp( `([${base.openingBrackets}])` + `[${base.spaces}]*` + `[${base.hyphen}${base.enDash}${base.emDash}]{1,3}` + `[${base.spaces}]*` + `([${base.allChars}])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$2` ); // Fix closing bracket followed by dash followed by opening bracket // prettier-ignore string = string.replace( new RegExp( `([${base.closingBrackets}])` + `[${base.spaces}]*` + `[${base.hyphen}${base.enDash}${base.emDash}]` + `[${base.spaces}]*` + `([${base.openingBrackets}])`, "g" ), `$1${locale.dashWords.spaceBefore}${locale.dashWords.dash}${locale.dashWords.spaceAfter}$2` ); return string; } // /** Replace hyphen or dash, placed between 2 cardinal numbers, with an en dash; including cases when there is an extra space from either one side or both sides of the dash Algorithm is split in two passes, to prevent the loops of matching the already fixed en dash. [1] Match the pattern with overlap handling [2] Replace enDash adepts with actual enDashes @param {string} string — input text for identification @returns {string} — output with en dash between cardinal numbers */ export function fixDashBetweenCardinalNumbers(string) { /* [1] Match the pattern with overlap handling */ // prettier-ignore string = replaceWithOverlapHandling( string, new RegExp( `(\\d)` + `([${base.spaces}]?` + `[${base.hyphen}${base.enDash}${base.emDash}]{1,3}` + `[${base.spaces}]?)` + `(\\d)`, "g" ), `$1{{typopo__endash}}$3` ); /* [2] Replace enDash adepts with actual enDashes */ // prettier-ignore return string.replace( new RegExp( `{{typopo__endash}}`, "g" ), base.enDash ); } // /** Replace hyphen or dash, placed between percentage range, with an en dash; including cases when there is an extra space from either one side or both sides of the dash @param {string} string — input text for identification @returns {string} — output with en dash between percentage range */ export function fixDashBetweenPercentageRange(string) { // prettier-ignore return string.replace( new RegExp( `([${base.percent}${base.permille}${base.permyriad}])` + `([${base.spaces}]?[${base.hyphen}${base.enDash}${base.emDash}]{1,3}[${base.spaces}]?)` + `(\\d)`, "g" ), `$1${base.enDash}$3` ); } // /** * Replace hyphen or dash, placed between 2 ordinal numbers, with an en dash; including cases when there is an extra space from either one side or both sides of the dash @param {string} string — input text for identification @param {string} locale: locale option @returns {string} — output with dash between ordinal numbers */ export function fixDashBetweenOrdinalNumbers(string, locale) { // prettier-ignore return string.replace( new RegExp( `(\\d)` + `(${locale.ordinalIndicator})` + `([${base.spaces}]?[${base.hyphen}${base.enDash}${base.emDash}]{1,3}[${base.spaces}]?)` + `(\\d)` + `(${locale.ordinalIndicator})`, "gi" ), `$1$2${base.enDash}$4$5` ); } // /** Fixes dashes @param {string} string — input text for identification @param {string} locale: locale option @returns {string} — output with fixed dashes */ export function fixDash(string, locale) { string = fixDashesBetweenWords(string, locale); string = fixDashBetweenWordAndPunctuation(string, locale); string = fixDashBetweenWordAndBrackets(string, locale); string = fixDashBetweenCardinalNumbers(string); string = fixDashBetweenPercentageRange(string); string = fixDashBetweenOrdinalNumbers(string, locale); return string; }