UNPKG

typopo

Version:

Fix frequent microtypography errors in multiple languages. Write neat texts without bothering about typography rules. Typopo works for English, German, Slovak, Czech and Rusyn language.

616 lines (495 loc) 16.9 kB
import { base } from "../../const.js"; import { identifyMarkdownCodeTicks, placeMarkdownCodeTicks } from "../../utils/markdown.js"; // /** Identify ’n’ contractions as apostrophes Example rock 'n' roll → rock ’n’ roll fish 'n' chips → fish ’n’ chips Exceptions Press 'N' to continue (should be identified as single quotes) @param {string} string: input text for identification @returns {string} output with identified contractions as apostrophes */ export function identifyContractedAnd(string) { let commonContractions = [ ["dead", "buried"], ["drill", "bass"], ["drum", "bass"], ["rock", "roll"], ["pick", "mix"], ["fish", "chips"], ["salt", "shake"], ["mac", "cheese"], ["pork", "beans"], ["drag", "drop"], ["rake", "scrape"], ["hook", "kill"], ]; commonContractions.forEach((item) => { // prettier-ignore string = string.replace( new RegExp( `(${item[0]})` + `([${base.spaces}]?)` + `(${base.singleQuoteAdepts})` + `(n)` + `(${base.singleQuoteAdepts})` + `([${base.spaces}]?)` + `(${item[1]})`, "gi"), `$1${base.nbsp}{{typopo__apostrophe}}$4{{typopo__apostrophe}}${base.nbsp}$7` ); }); return string; } // /** Identify common contractions at the beginning of the word as apostrophes Example ’em, ’cause,… see list of words in the function @param {string} string: input text for identification @returns {string} output with identified contractions as apostrophes */ export function identifyContractedBeginnings(string) { let contractedWords = "cause|em|mid|midst|mongst|prentice|round|sblood|ssdeath|sfoot|sheart|shun|slid|slife|slight|snails|strewth|til|tis|twas|tween|twere|twill|twixt|twould"; // prettier-ignore return string.replace( new RegExp( `(${base.singleQuoteAdepts})` + `(${contractedWords})`, "gi" ), `{{typopo__apostrophe}}$2` ); } // /** Identify common contractions at the ends of the word as apostrophes Example contraction of an -ing form, e.g. nottin’ @param {string} string: input text for identification @returns {string} output with identified contractions as apostrophes */ export function identifyContractedEnds(string) { // prettier-ignore return string.replace( new RegExp( `(\\Bin)` + `(${base.singleQuoteAdepts})`, "gi" ), `$1{{typopo__apostrophe}}` ); } // /** Identify in-word contractions as apostrophes Examples Don’t, I’m, O’Doole, 69’ers,… @param {string} string: input text for identification @returns {string} output with identified contractions as apostrophes */ export function identifyInWordContractions(string) { // prettier-ignore return string.replace( new RegExp( `([\\d${base.allChars}])` + `(${base.singleQuoteAdepts})+` + `([${base.allChars}])`, "g" ), `$1{{typopo__apostrophe}}$3` ); } // /** Identify contracted years Example in ’70s, INCHEBA ’89,… Exceptions 12 '45″ // when there is a wrongly spaced feet @param {string} string: input text for identification @returns {string} output with identified contractions as apostrophes */ export function identifyContractedYears(string) { // prettier-ignore return string.replace( new RegExp( `([^0-9]|[A-Z][0-9])` + `([${base.spaces}])` + `(${base.singleQuoteAdepts})` + `([\\d]{2})`, "g" ), `$1$2{{typopo__apostrophe}}$4` ); } // /** Identify feet and arcminutes following a 1–3 numbers Example 12' 45″ → 12′ 45″ Single-quotes module impact Function falsely identifies feet, where we are expecting quotes, e.g. 'Konference 2020' in quotes → ‘Konference 2020’ in quotes → this is corrected in replaceSinglePrimeWSingleQuote Implementation note We’re not using base.singleQuoteAdepts variable as commas and low-positioned quotes are ommited @param {string} string: input text for identification @returns {string} output with identified single primes as a temporary variable string, e.g. {{typopo__single-prime}} */ export function identifySinglePrimes(string) { return string.replace(/(\d)( ?)('|‘|’|‛|′)/g, "$1$2{{typopo__single-prime}}"); } // /** Identify unpaired left single quote Algorithm Find left single quotes: - following a space, en dash or em dash - preceding a word @param {string} string: input text for identification @returns {string} output with identified unpaired left single quote */ export function identifyUnpairedLeftSingleQuote(string) { // prettier-ignore return string.replace( new RegExp( `(^|[${base.spaces}${base.emDash}${base.enDash}])` + `(${base.singleQuoteAdepts}|,)` + `([${base.allChars}${base.ellipsis}])`, "g" ), `$1{{typopo__lsq--unpaired}}$3` ); } // /** Identify unpaired right single quote Algorithm Find right single quotes: - following a word - optionally, following a sentence punctuation - optionally, preceding a space or a sentence punctuation @param {string} string: input text for identification @returns {string} output with identified unpaired right single quote */ export function identifyUnpairedRightSingleQuote(string) { // prettier-ignore return string.replace( new RegExp( `([${base.allChars}])` + `([${base.sentencePunctuation}${base.ellipsis}])?` + `(${base.singleQuoteAdepts})` + `([ ${base.sentencePunctuation}])?`, "g" ), `$1$2{{typopo__rsq--unpaired}}$4` ); } /** Identify single quotes within double quotes Limitations Since it’s difficult to identify apostrophe contracting end of the word (e.g. “jes’”), it’s difficult to identify single quotes universally. Therefore we’re identifying only single quotes and single quote pairs that are enclosed in double quote pairs. Algorithm - find text in double quotes - in quoted text find - unpaired left single quote - unpaired right single quote - single quote pairs @param {string} string: input text for identification @returns {string} output with identified unpaired left single quote */ export function identifySingleQuotesWithinDoubleQuotes(string) { return string.replace( // prettier-ignore new RegExp( `(${base.doubleQuoteAdepts})` + `(.*?)` + `(${base.doubleQuoteAdepts})`, "g" ), function ($0, $1, $2, $3) { $2 = identifyUnpairedLeftSingleQuote($2); $2 = identifyUnpairedRightSingleQuote($2); $2 = identifySingleQuotePairs($2); return $1 + $2 + $3; } ); } // /** Identify single quote pairs Example "a 'quoted material' here" → “a ‘quoted material’ here” Assumptions and Limitations - This function assumes apostrophes and unpaired single quotes were identified. The function itself is part of the identifySingleQuotesWithinDoubleQuotes. - It is difficult to identify all contractions at the end of the word, and thus it is difficult to identify single quote pairs. This function therefore only identifies one single quote pair with a double quote pair @param {string} string: input text for identification @returns {string} output with identified single quote pair */ export function identifySingleQuotePairs(string) { // identify one phrase wrapped in single quotes // note the greediness is because of Rusyn contractions // prettier-ignore return string.replace( new RegExp( `({{typopo__lsq--unpaired}})` + `(.*)` + `({{typopo__rsq--unpaired}})`, "g" ), `{{typopo__lsq}}$2{{typopo__rsq}}` ); } // /** Identify single quote pair around a single word Example 'word' → ‘word’ @param {string} string: input text for identification @returns {string} output with identified single quote pairs around single word */ export function identifySingleQuotePairAroundSingleWord(string) { // prettier-ignore return string.replace( new RegExp( `(\\B)` + `(${base.singleQuoteAdepts})` + `([${base.allChars}]+)` + `(${base.singleQuoteAdepts})` + `(\\B)`, "g" ), `$1{{typopo__lsq}}$3{{typopo__rsq}}$5` ); } // /** Identify residual apostrophes Finds remaining single quote adepts and changes them to apostrophes. Limitation This function runs as last in the row identification function as it catches what’s left. @param {string} string: input text for identification @returns {string} output with identified single quote pairs */ export function identifyResidualApostrophes(string) { // prettier-ignore return string.replace( new RegExp( `(${base.singleQuoteAdepts})`, "g" ), `{{typopo__apostrophe}}` ); } // /** Replace a single qoute & a single prime with a single quote pair Assumptions and Limitations This function follows previous functions that identify single primes or unpaired single quotes. So it may happen that previous functions falsely identify a single quote pair around situations such as: - He said: “What about 'Localhost 3000', is that good?” Algorithm Find unpaired single quote and single prime in pair and change them to a single quote pair @param {string} string: input text for identification @returns {string} output with a single quote pair */ export function replaceSinglePrimeWSingleQuote(string) { // prettier-ignore string = string.replace( new RegExp( `({{typopo__lsq--unpaired}})` + `(.*?)` + `({{typopo__single-prime}})`, "g" ), `{{typopo__lsq}}$2{{typopo__rsq}}` ); // prettier-ignore string = string.replace( new RegExp( `({{typopo__single-prime}})` + `(.*?)` + `({{typopo__rsq--unpaired}})`, "g" ), `{{typopo__lsq}}$2{{typopo__rsq}}` ); return string; } // /** Swap single quotes and terminal punctuation for a quoted part There are two different rules to follow quotes: 1. Quotes contain only quoted material: ‘Sometimes it can be a whole sentence.’ Sometimes it can be only a ‘quoted part’. The difference is where the terminal and sentence pause punctuation is. 2. American editorial style Similar as the first rule, but commas (,) and periods (.) go before closing quotation marks, regardless whether they are part of the quoted material. The aim here is to support the first rule. Examples ‘Sometimes it can be a whole sentence.’ Sometimes it can be only a ‘quoted part’. So we’re looking to swap here: Sometimes it can be only a ‘quoted part.’ → Sometimes it can be only a ‘quoted part’. Exceptions Byl to ‘Karel IV.’, ktery Algorithm Three different cases, see comments in code @param {string} string: input text for identification @param {string} locale: locale option @returns {string} output with swapped single quotes and terminal punctuation within a quoted part */ export function swapSingleQuotesAndTerminalPunctuation(string, locale) { // place punctuation outside of quoted part // prettier-ignore string = string.replace( new RegExp( `([^${base.sentencePunctuation}])` + `([${base.spaces}])` + `(${locale.leftSingleQuote})` + `([^${locale.rightSingleQuote}]+?)` + `([^${base.romanNumerals}])` + `([${base.terminalPunctuation}${base.ellipsis}])` + `(${locale.rightSingleQuote})`, "g" ), `$1$2$3$4$5$7$6` ); // place punctuation within a quoted sentence that’s in the middle of the sentence. // prettier-ignore string = string.replace( new RegExp( `([^${base.sentencePunctuation}])` + `([${base.spaces}])` + `(${locale.leftSingleQuote})` + `(.+?)` + `([^${base.romanNumerals}])` + `(${locale.rightSingleQuote})` + `([${base.terminalPunctuation}${base.ellipsis}])` + `([${base.spaces}])` + `([${base.lowercaseChars}])`, "g" ), `$1$2$3$4$5$7$6$8$9` ); // place punctuation within a quoted sentence // following a previous sentence or starting from a beginning // prettier-ignore string = string.replace( new RegExp( `([${base.sentencePunctuation}][${base.spaces}]|^)` + `(${locale.leftSingleQuote})` + `([^${locale.rightSingleQuote}]+?)` + `([^${base.romanNumerals}])` + `(${locale.rightSingleQuote})` + `([${base.terminalPunctuation}${base.ellipsis}])` + `(\\B)`, "g" ), `$1$2$3$4$6$5$7` ); return string; } // /** Remove extra space around a single prime Example 12 ′ 45″ → 12′ 45″ Assumptions and Limitations The functions runs after all single quotes and single primes have been identified. @param {string} string: input text for identification @returns {string} output with adjusted spacing around single quotes and single primes */ export function removeExtraSpaceAroundSinglePrime(string) { // prettier-ignore return string.replace( new RegExp( `([${base.spaces}])` + `(${base.singlePrime})`, "g" ), `$2` ) } // /** Replace all identified punctuation with appropriate punctuation in given language Context In single-quotes module, we first identify single quote and single prime adepts, and then we replace them temporarily with labels as “{{typopo__single-prime}}”. This is the function in the sequence to swap temporary labels to desired quotes. @param {string} string: input text for identification @param {string} locale: locale option @returns {string} an output with locale-specific single quotes and single primes */ export function placeLocaleSingleQuotes(string, locale) { string = string.replace(/({{typopo__single-prime}})/g, base.singlePrime); string = string.replace( /{{typopo__apostrophe}}|{{typopo__lsq--unpaired}}|{{typopo__rsq--unpaired}}/g, base.apostrophe ); string = string.replace(/{{typopo__lsq}}/g, locale.leftSingleQuote); string = string.replace(/{{typopo__rsq}}/g, locale.rightSingleQuote); string = string.replace(/{{typopo__markdown_syntax_highlight}}/g, "```"); return string; } // /** Corrects improper use of single quotes, single primes and apostrophes Assumptions and Limitations This function assumes that double quotes are always used in pair, i.e. authors did not forget to close double quotes in their text. Further, single quotes are used as secondary and they're properly spaced, e.g. ␣'word or sentence portion'␣ (and not like ␣'␣word␣'␣) Algorithm [0] Identify markdown code ticks [1] Identify common apostrophe contractions [2] Identify feet, arcminutes, minutes [3] Identify single quote pair around a single word [4] Identify single quotes [5] Replace a single qoute & a single prime with a single quote pair [6] Identify residual apostrophes [7] Replace all identified punctuation with appropriate punctuation in given language [8] Swap quotes and terminal punctuation [9] Consolidate spaces around single primes @param {string} string — input text for identification @param {string} language — language options @returns {string} — corrected output */ export function fixSingleQuotesPrimesAndApostrophes(string, locale, configuration) { configuration = configuration || {}; /* [0] Identify markdown code ticks */ string = identifyMarkdownCodeTicks(string, configuration); /* [1] Identify common apostrophe contractions */ string = identifyContractedAnd(string); string = identifyContractedBeginnings(string); string = identifyInWordContractions(string); string = identifyContractedYears(string); string = identifyContractedEnds(string); /* [2] Identify feet, arcminutes, minutes */ string = identifySinglePrimes(string); /* [3] Identify single quote pair around a single word */ string = identifySingleQuotePairAroundSingleWord(string); /* [4] Identify single quotes within double quotes */ string = identifySingleQuotesWithinDoubleQuotes(string); /* [5] Replace a single qoute & a single prime with a single quote pair */ string = replaceSinglePrimeWSingleQuote(string); /* [6] Identify residual apostrophes*/ string = identifyResidualApostrophes(string); /* [7] Replace all identified punctuation with appropriate punctuation in given language */ string = placeLocaleSingleQuotes(string, locale); string = placeMarkdownCodeTicks(string, configuration); /* [8] Swap quotes and terminal punctuation */ string = swapSingleQuotesAndTerminalPunctuation(string, locale); /* [9] Consolidate spaces around single primes */ string = removeExtraSpaceAroundSinglePrime(string); return string; }