typopo
Version:
Fix frequent microtypography errors in multiple languages. Write neat texts without bothering about typography rules. Typopo works for English, German, Slovak, Czech and Rusyn language.
402 lines (357 loc) • 13.5 kB
JavaScript
import {
removeNbspBetweenMultiCharWords,
addNbspAfterPreposition,
addNbspAfterAmpersand,
addNbspAfterCardinalNumber,
addNbspAfterOrdinalNumber,
addNbspWithinOrdinalDate,
addNbspAfterRomanNumeral,
fixNbspForNameWithRegnalNumber,
fixSpaceBeforePercent,
addNbspBeforeSingleLetter,
addNbspAfterSymbol,
replaceSpacesWithNbspAfterSymbol,
fixNbsp,
} from "../../src/modules/whitespace/nbsp.js";
import { supportedLocales } from "../../src/locale/locale.js";
import { createTestSuite, transformTestSet } from "../test-utils.js";
const nbspBetweenMultiCharWordsSet = {
"vo dvore": "vo dvore",
"Ku komore": "Ku komore",
"vo vo vo vo": "vo vo vo vo",
"vo vo vo": "vo vo vo",
"ňa moja": "ňa moja",
"Ťa tvoja": "Ťa tvoja",
};
createTestSuite(
"Remove non-breaking space between multi-letter words",
nbspBetweenMultiCharWordsSet,
removeNbspBetweenMultiCharWords,
{},
fixNbsp,
supportedLocales
);
const filenameFalsePositiveSet = {
"url-to-image-5.jpg": "url-to-image-5.jpg",
"url_to_image_5.jpg": "url_to_image_5.jpg",
"url%to%image%5.jpg": "url%to%image%5.jpg",
"url to image 5.jpg": "url to image 5.jpg",
"URL-TO-IMAGE-5.JPG": "URL-TO-IMAGE-5.JPG",
"URL_TO_IMAGE_5.JPG": "URL_TO_IMAGE_5.JPG",
"URL%TO%IMAGE%5.JPG": "URL%TO%IMAGE%5.JPG",
"URL TO IMAGE 5.JPG": "URL TO IMAGE 5.JPG",
};
const nbspAfterPrepositionSet = {
"V potoku": "V potoku",
"Koniec. V potoku": "Koniec. V potoku",
"Koniec? V potoku": "Koniec? V potoku",
"Koniec! V potoku": "Koniec! V potoku",
"Koniec… V potoku": "Koniec… V potoku",
"Koniec: V potoku": "Koniec: V potoku",
"Koniec; V potoku": "Koniec; V potoku",
"Koniec, V potoku": "Koniec, V potoku",
"© V Inc.": "© V Inc.",
"® V Inc.": "® V Inc.",
"℗ V Inc.": "℗ V Inc.",
"Skáče o tyči": "Skáče o tyči",
"v obchode a v hospode": "v obchode a v hospode",
"v a v a v": "v a v a v",
"a з коминів": "a з коминів",
"a я іду здоїти": "a я іду здоїти",
"a в хырбетї": "a в хырбетї",
"што є му вытыкане": "што є му вытыкане",
"ся ї не": "ся ї не",
"a s’a": "a s’a",
// false positives
"client’s customer": "client’s customer",
"Ctrl+I and Ctrl+B or pasting an image.": "Ctrl+I and Ctrl+B or pasting an image.",
"Ctrl-I and Ctrl-B or pasting an image.": "Ctrl-I and Ctrl-B or pasting an image.",
"získává investici $25M na něco": "získává investici $25M na něco", //no nbsp after $25M
"starŷm kresli": "starŷm kresli", // non-latin chars in word
"The product X is missing the feature Y.": "The product X is missing the feature Y.", // no nbsp after a single capital letter in the middle of the sentence
};
const nbspAfterPrepositionEnUsSet = {
"When I talk": "When I talk", // do not add nbsp before I
"I was there.": "I was there.",
};
const nbspAfterPrepositionOtherSet = {
"Vzorka I je fajn": "Vzorka I je fajn", // remove 2nd nbsp
"I v potoku.": "I v potoku.",
"When I was there.": "When I was there.",
};
supportedLocales.forEach((locale) => {
createTestSuite(
"Add non-breaking spaces after single-letter prepositions",
{
...nbspAfterPrepositionSet,
...(locale === "en-us" ? nbspAfterPrepositionEnUsSet : nbspAfterPrepositionOtherSet),
},
addNbspAfterPreposition,
{},
fixNbsp,
locale
);
});
const nbspAfterAmpersandSet = {
"Bed & Breakfast": "Bed & Breakfast",
};
createTestSuite(
"Add a non-breaking space after “&”",
nbspAfterAmpersandSet,
addNbspAfterAmpersand,
{},
fixNbsp,
supportedLocales
);
const nbspAfterCardinalNumberSet = {
"5 mm": "5 mm",
"5 mm": "5 mm", // nbsp
"5 mm": "5 mm", // hairSpace
"5 mm": "5 mm", // narrowNbsp
"5 Kč": "5 Kč",
"15 mm": "15 mm",
// false positive
// no nbsp after 3+ digits
"152 mm": "152 mm",
"2020 rokov": "2020 rokov",
/* false positive,
* a number is already bound with abbreviation
* Na str.⎵5 je obsah → Na str.⎵5 je obsah
* !→ Na str. 5⎵je obsah
*/
"Na str. 5 je obsah": "Na str. 5 je obsah",
};
createTestSuite(
"Add a non-breaking space after a cardinal number",
nbspAfterCardinalNumberSet,
addNbspAfterCardinalNumber,
{},
fixNbsp,
supportedLocales
);
const nbspAfterOrdinalNumberEnUsSet = {
"1st amendment": "1st amendment",
"2nd amendment": "2nd amendment",
"3rd amendment": "3rd amendment",
"4th amendment": "4th amendment",
"18th amendment": "18th amendment",
"1st March": "1st March",
"2nd March": "2nd March",
"3rd March": "3rd March",
"15th March": "15th March",
// false positive, 3+ digits
"158th amendment": "158th amendment",
"1158th amendment": "1158th amendment",
};
createTestSuite(
"Add a non-breaking space after an ordinal number",
nbspAfterOrdinalNumberEnUsSet,
addNbspAfterOrdinalNumber,
{},
fixNbsp
);
const nbspAfterOrdinalNumberOtherSet = {
"1. dodatok": "1. dodatok",
"1.dodatok": "1. dodatok",
"1.štava": "1. štava",
"12. dodatok": "12. dodatok",
"12. január": "12. január",
"21. Festival otrlého diváka": "21. Festival otrlého diváka",
// false positives
"10.00": "10.00",
"Je to str. 5. Dalsia veta.": "Je to str. 5. Dalsia veta.",
"158. festival": "158. festival", // fp, 3+ digits
"…dokonce i v roce 2021. Důsledky…": "…dokonce i v roce 2021. Důsledky…",
};
createTestSuite(
"Add a non-breaking space after an ordinal number",
nbspAfterOrdinalNumberOtherSet,
addNbspAfterOrdinalNumber,
{},
fixNbsp,
supportedLocales.filter((locale) => locale !== "en-us")
);
const nbspOrdinalDate = {
"12. 1. 2017": "12.${ordinalDateFirstSpace}1.${ordinalDateSecondSpace}2017",
"12.1.2017": "12.${ordinalDateFirstSpace}1.${ordinalDateSecondSpace}2017",
"10.00": "10.00", // false positive for the example above
};
supportedLocales.forEach((locale) => {
createTestSuite(
"Fix spaces with an ordinal date",
transformTestSet(nbspOrdinalDate, locale),
addNbspWithinOrdinalDate,
{},
fixNbsp,
locale
);
});
const nbspAfterRomanNumeralSet = {
"I. kapitola": "I. kapitola",
"bola to I. kapitola": "bola to I. kapitola",
"III. kapitola": "III. kapitola",
"III.kapitola": "III. kapitola",
"X. ročník": "X. ročník",
"Bol to X. ročník.": "Bol to X. ročník.",
"V. ročník": "V. ročník",
"L. ročník": "L. ročník",
"D. ročník": "D. ročník",
"8. V. 1945": "8. V. 1945",
"8. V.1945": "8. V. 1945",
// false positives
"Ch. G. D. Lambert": "Ch. G. D. Lambert",
"Ch. G. D. Lambert": "Ch. G. D. Lambert",
"G. D. Lambert": "G. D. Lambert",
"Ch. Ch. D. Lambert": "Ch. Ch. D. Lambert",
"CH. D. Lambert": "CH. D. Lambert",
"Ch. Ch. Šalda": "Ch. Ch. Šalda",
"CH. CH. Šalda": "CH. CH. Šalda",
"Ch.Ch. Šalda": "Ch.Ch. Šalda",
"CH.CH. Šalda": "CH.CH. Šalda",
};
const nbspAfterRomanNumeralUnitSet = {
"Karel IV.": "Karel IV.",
};
createTestSuite(
"Add a non-breaking space after a roman numeral",
{ ...nbspAfterRomanNumeralSet, ...nbspAfterRomanNumeralUnitSet },
addNbspAfterRomanNumeral,
nbspAfterRomanNumeralSet,
fixNbsp,
supportedLocales.filter((locale) => locale !== "en-us")
);
const nbspNameRegnalNumberSet = {
// Place non-breaking space between name and roman numeral
"Karel IV${romanOrdinalIndicator} byl římsko-německý král.": "Karel IV${romanOrdinalIndicator} byl římsko-německý král.",
"Karel IV${romanOrdinalIndicator} byl římsko-německý král.": "Karel IV${romanOrdinalIndicator} byl římsko-německý král.",
"Karel IV${romanOrdinalIndicator}": "Karel IV${romanOrdinalIndicator}",
"Karel X${romanOrdinalIndicator}": "Karel X${romanOrdinalIndicator}",
//false positive
"je to IV. cenová skupina": "je to IV. cenová skupina",
"Try Ctrl+I": "Try Ctrl+I",
// unsupported (It’s more common to use “I + verb” in text than citing regnal names so this case is unsupported for now)
"Charles I.": "Charles I.",
};
const nbspNameRegnalNumberUnitSet = {
"When I talk to emerging product designers": "When I talk to emerging product designers",
};
supportedLocales.forEach((locale) => {
createTestSuite(
"Fix non-breaking space around a name with a regnal number",
{ ...transformTestSet(nbspNameRegnalNumberSet, locale), ...nbspNameRegnalNumberUnitSet },
fixNbspForNameWithRegnalNumber,
transformTestSet(nbspNameRegnalNumberSet, locale),
fixNbsp,
locale
);
});
const spaceBeforePercentSet = {
"20 %": "20${spaceBeforePercent}%",
"20 %–30 %": "20${spaceBeforePercent}%–30${spaceBeforePercent}%",
"20 ‰": "20${spaceBeforePercent}‰",
"20 ‰–30 ‰": "20${spaceBeforePercent}‰–30${spaceBeforePercent}‰",
"20 ‱": "20${spaceBeforePercent}‱",
"20 ‱–30 ‱": "20${spaceBeforePercent}‱–30${spaceBeforePercent}‱",
};
supportedLocales.forEach((locale) => {
createTestSuite(
"Add a locale-specific space before %, ‰, ‱",
transformTestSet(spaceBeforePercentSet, locale),
fixSpaceBeforePercent,
{},
fixNbsp,
locale
);
});
const nbspBeforeSingleLetterSet = {
"The product X is missing the feature Y.": "The product X is missing the feature Y.",
"Sputnik V": "Sputnik V",
"Človek Č": "Človek Č",
"© V Inc.": "© V Inc.",
// false positives
"bola to I. kapitola": "bola to I. kapitola",
"pán Šťastný": "pán Šťastný",
"pán ŠŤASTNÝ": "pán ŠŤASTNÝ",
"One sentence ends. A bad apple.": "One sentence ends. A bad apple.",
"One sentence ends? A bad apple.": "One sentence ends? A bad apple.",
"One sentence ends! A bad apple.": "One sentence ends! A bad apple.",
"sentence; C-level executive": "sentence; C-level executive",
"sentence: C-level executive": "sentence: C-level executive",
"sentence, C-level executive": "sentence, C-level executive",
"I’d say… A-player": "I’d say… A-player",
"sentence (brackets) A-player": "sentence (brackets) A-player",
"sentence [brackets] A-player": "sentence [brackets] A-player",
"sentence {brackets} A-player": "sentence {brackets} A-player",
"A × A": "A × A",
};
const nbspBeforeSingleLetterUnitSet = {
"famous company — A Inc.": "famous company — A Inc.",
"quoted part${rdq} A capital letter": "quoted part${rdq} A capital letter",
"quoted part${rsq} A capital letter": "quoted part${rsq} A capital letter",
"apostrophe${apos} A capital letter": "apostrophe${apos} A capital letter",
};
const nbspBeforeSingleLetterEnUsSet = {
"When I talk": "When I talk", // do not add nbsp before I
};
const nbspBeforeSingleLetterOtherSet = {
"Vzorka I": "Vzorka I",
"Vzorka I je fajn": "Vzorka I je fajn", // remove nbsp after I
"Vzorka I je fajn": "Vzorka I je fajn", // remove hairSpace after I
"Vzorka I je fajn": "Vzorka I je fajn", // remove narrowNbsp after I
};
supportedLocales.forEach((locale) => {
createTestSuite(
"Add a non-breaking space before a single capital letter in a sentence",
{
...nbspBeforeSingleLetterSet,
...transformTestSet(nbspBeforeSingleLetterUnitSet, locale),
...(locale === "en-us" ? nbspBeforeSingleLetterEnUsSet : nbspBeforeSingleLetterOtherSet),
},
addNbspBeforeSingleLetter,
{},
fixNbsp,
locale
);
});
const nbspAfterSymbolSet = {
// in-depth tests are in the respective test files for symbols,
// e.g. copyrights, numero-sign, section-sign
"©2017": "© 2017",
"Company ©2017": "Company © 2017",
};
createTestSuite(
"Add a space after a symbol, e.g. ©",
nbspAfterSymbolSet,
(text) => addNbspAfterSymbol(text, "©"),
{},
null,
supportedLocales
);
const oneNbspAfterSymbolSet = {
// in-depth tests are in the respective test files for symbols,
// e.g. copyrights, numero-sign, section-sign
"Company © 2017": "Company © 2017",
"Company © 2017": "Company © 2017", // hairSpace
"Company © 2017": "Company © 2017", // narrowNbsp
"Company © 2017": "Company © 2017",
"Company © 2017": "Company © 2017",
};
createTestSuite(
"Add a space after a symbol, e.g. ©",
oneNbspAfterSymbolSet,
(text) => replaceSpacesWithNbspAfterSymbol(text, "©"),
{},
null,
supportedLocales
);
export const nbspSet = {
...nbspBetweenMultiCharWordsSet,
...nbspAfterPrepositionSet,
...nbspAfterAmpersandSet,
...nbspAfterCardinalNumberSet,
...nbspOrdinalDate,
...nbspNameRegnalNumberSet,
...spaceBeforePercentSet,
...nbspBeforeSingleLetterSet,
...filenameFalsePositiveSet,
};