UNPKG

typopo

Version:

Fix frequent microtypography errors in multiple languages. Write neat texts without bothering about typography rules. Typopo works for English, German, Slovak, Czech and Rusyn language.

375 lines (303 loc) 12.2 kB
#!/usr/bin/env node import Locale from "../../src/locale/locale.js"; import { removeEmptyLines } from "../../src/modules/whitespace/lines.js"; import { fixNbsp } from "../../src/modules/whitespace/nbsp.js"; import { fixSpaces } from "../../src/modules/whitespace/spaces.js"; import { fixPeriod } from "../../src/modules/punctuation/period.js"; import { fixEllipsis } from "../../src/modules/punctuation/ellipsis.js"; import { fixDash } from "../../src/modules/punctuation/dash.js"; import { fixDoubleQuotesAndPrimes } from "../../src/modules/punctuation/double-quotes.js"; import { fixSingleQuotesPrimesAndApostrophes } from "../../src/modules/punctuation/single-quotes.js"; import { fixMultiplicationSign } from "../../src/modules/symbols/multiplication-sign.js"; import { fixSectionSign } from "../../src/modules/symbols/section-sign.js"; import { fixCopyrights } from "../../src/modules/symbols/copyrights.js"; import { fixNumeroSign } from "../../src/modules/symbols/numero-sign.js"; import { fixPlusMinus } from "../../src/modules/symbols/plus-minus.js"; import { fixMarks } from "../../src/modules/symbols/marks.js"; import { fixExponents } from "../../src/modules/symbols/exponents.js"; import { fixNumberSign } from "../../src/modules/symbols/number-sign.js"; import { fixAbbreviations } from "../../src/modules/words/abbreviations.js"; import { fixCase } from "../../src/modules/words/case.js"; import { fixPubId } from "../../src/modules/words/pub-id.js"; import { excludeExceptions, placeExceptions } from "../../src/modules/words/exceptions.js"; // // const testString = "word - )"; const testLocale = "de-de"; // // function debugFixTypos(inputString, locale = "en-us", configuration = {}) { console.log("=".repeat(80)); console.log("DEBUG: Typopo String Transformation"); console.log("=".repeat(80)); let currentLocale = new Locale(locale); configuration = { removeLines: true, removeWhitespacesBeforeMarkdownList: true, keepMarkdownCodeBlocks: false, ...configuration, }; console.log(`Input: "${inputString}"`); console.log(`Locale: ${locale}`); console.log(`Configuration:`, configuration); console.log("-".repeat(80)); let string = inputString; let stepNumber = 1; function logStep(stepName, newString, oldString) { const changed = newString !== oldString; console.log(`${stepNumber}. ${stepName}`); console.log(` Before: "${oldString}"`); console.log(` After: "${newString}"`); if (changed) { console.log(` ✓ CHANGED`); // Show character codes for changed characters to see Unicode differences console.log( ` Before codes: [${Array.from(oldString) .map((c) => c.charCodeAt(0)) .join(", ")}]` ); console.log( ` After codes: [${Array.from(newString) .map((c) => c.charCodeAt(0)) .join(", ")}]` ); } else { console.log(` - no change`); } console.log(); stepNumber++; return newString; } // Step 1: Exclude exceptions from fixing const { processedText, exceptions } = excludeExceptions(string); const oldString1 = string; string = processedText; logStep("excludeExceptions", string, oldString1); // Step 2: Remove empty lines (if configured) if (configuration.removeLines) { const oldString2 = string; string = removeEmptyLines(string); logStep("removeEmptyLines", string, oldString2); } // Step 3: Fix ellipsis (before spaces cleanup) const oldString3 = string; string = fixEllipsis(string, currentLocale); logStep("fixEllipsis", string, oldString3); // Step 4: Clean up spaces const oldString4 = string; string = fixSpaces(string, currentLocale, configuration); logStep("fixSpaces", string, oldString4); // Step 5: Fix punctuation - period const oldString5 = string; string = fixPeriod(string); logStep("fixPeriod", string, oldString5); // Step 6: Fix punctuation - dash const oldString6 = string; string = fixDash(string, currentLocale); logStep("fixDash", string, oldString6); // Step 8: Fix single quotes, primes, and apostrophes const oldString8 = string; string = fixSingleQuotesPrimesAndApostrophes(string, currentLocale, configuration); logStep("fixSingleQuotesPrimesAndApostrophes", string, oldString8); // Step 9: Fix double quotes and primes const oldString9 = string; string = fixDoubleQuotesAndPrimes(string, currentLocale, configuration); logStep("fixDoubleQuotesAndPrimes", string, oldString9); // Step 10: Fix multiplication sign const oldString10 = string; string = fixMultiplicationSign(string); logStep("fixMultiplicationSign", string, oldString10); // Step 11: Fix section sign const oldString11 = string; string = fixSectionSign(string, currentLocale); logStep("fixSectionSign", string, oldString11); // Step 12: Fix copyrights const oldString12 = string; string = fixCopyrights(string, currentLocale); logStep("fixCopyrights", string, oldString12); // Step 13: Fix numero sign const oldString13 = string; string = fixNumeroSign(string, currentLocale); logStep("fixNumeroSign", string, oldString13); // Step 14: Fix plus/minus const oldString14 = string; string = fixPlusMinus(string); logStep("fixPlusMinus", string, oldString14); // Step 15: Fix marks const oldString15 = string; string = fixMarks(string); logStep("fixMarks", string, oldString15); // Step 16: Fix exponents const oldString16 = string; string = fixExponents(string); logStep("fixExponents", string, oldString16); // Step 17: Fix number sign const oldString17 = string; string = fixNumberSign(string); logStep("fixNumberSign", string, oldString17); // Step 18: Fix case const oldString18 = string; string = fixCase(string); logStep("fixCase", string, oldString18); // Step 19: Fix publication ID const oldString19 = string; string = fixPubId(string); logStep("fixPubId", string, oldString19); // Step 20: Fix abbreviations const oldString20 = string; string = fixAbbreviations(string, currentLocale); logStep("fixAbbreviations", string, oldString20); // Step 21: Fix non-breaking spaces const oldString21 = string; string = fixNbsp(string, currentLocale); logStep("fixNbsp", string, oldString21); // Step 22: Place excluded exceptions const oldString22 = string; string = placeExceptions(string, exceptions); logStep("placeExceptions", string, oldString22); console.log("=".repeat(80)); console.log(`FINAL RESULT: "${string}"`); console.log("=".repeat(80)); return string; } function getTypoTransformationSteps(inputString, locale = "en-us", configuration = {}) { let currentLocale = new Locale(locale); configuration = { removeLines: true, removeWhitespacesBeforeMarkdownList: true, keepMarkdownCodeBlocks: false, ...configuration, }; let string = inputString; let steps = []; function addStep(stepName, newString, oldString) { const changed = newString !== oldString; steps.push({ step: stepName, before: oldString, after: newString, changed: changed, beforeCodes: Array.from(oldString).map((c) => c.charCodeAt(0)), afterCodes: Array.from(newString).map((c) => c.charCodeAt(0)), }); return newString; } // Apply all the same transformations as debugFixTypos const { processedText, exceptions } = excludeExceptions(string); string = addStep("excludeExceptions", processedText, string); if (configuration.removeLines) { const oldString = string; string = removeEmptyLines(string); string = addStep("removeEmptyLines", string, oldString); } const oldString3 = string; string = fixEllipsis(string, currentLocale); string = addStep("fixEllipsis", string, oldString3); const oldString4 = string; string = fixSpaces(string, currentLocale, configuration); string = addStep("fixSpaces", string, oldString4); const oldString5 = string; string = fixPeriod(string); string = addStep("fixPeriod", string, oldString5); const oldString6 = string; string = fixDash(string, currentLocale); string = addStep("fixDash", string, oldString6); const oldString8 = string; string = fixSingleQuotesPrimesAndApostrophes(string, currentLocale, configuration); string = addStep("fixSingleQuotesPrimesAndApostrophes", string, oldString8); const oldString9 = string; string = fixDoubleQuotesAndPrimes(string, currentLocale, configuration); string = addStep("fixDoubleQuotesAndPrimes", string, oldString9); const oldString10 = string; string = fixMultiplicationSign(string); string = addStep("fixMultiplicationSign", string, oldString10); const oldString11 = string; string = fixSectionSign(string, currentLocale); string = addStep("fixSectionSign", string, oldString11); const oldString12 = string; string = fixCopyrights(string, currentLocale); string = addStep("fixCopyrights", string, oldString12); const oldString13 = string; string = fixNumeroSign(string, currentLocale); string = addStep("fixNumeroSign", string, oldString13); const oldString14 = string; string = fixPlusMinus(string); string = addStep("fixPlusMinus", string, oldString14); const oldString15 = string; string = fixMarks(string); string = addStep("fixMarks", string, oldString15); const oldString16 = string; string = fixExponents(string); string = addStep("fixExponents", string, oldString16); const oldString17 = string; string = fixNumberSign(string); string = addStep("fixNumberSign", string, oldString17); const oldString18 = string; string = fixCase(string); string = addStep("fixCase", string, oldString18); const oldString19 = string; string = fixPubId(string); string = addStep("fixPubId", string, oldString19); const oldString20 = string; string = fixAbbreviations(string, currentLocale); string = addStep("fixAbbreviations", string, oldString20); const oldString21 = string; string = fixNbsp(string, currentLocale); string = addStep("fixNbsp", string, oldString21); const oldString22 = string; string = placeExceptions(string, exceptions); string = addStep("placeExceptions", string, oldString22); return { input: inputString, output: string, locale: locale, configuration: configuration, steps: steps, changedSteps: steps.filter((step) => step.changed), }; } export { debugFixTypos, getTypoTransformationSteps }; /* Console output */ // Example 1: Console output debug (detailed) console.log("=== CONSOLE DEBUG EXAMPLE ==="); debugFixTypos(testString, testLocale); console.log("\n\n=== PROGRAMMATIC API EXAMPLE ==="); // Example 2: Get steps as data structure const result = getTypoTransformationSteps(testString, testLocale); console.log(`Input: ${result.input}`); console.log(`Output: ${result.output}`); console.log(`\nOnly changed steps:`); result.changedSteps.forEach((step, index) => { console.log(`${index + 1}. ${step.step}`); console.log(` "${step.before}""${step.after}"`); // Show specific character differences const beforeChars = Array.from(step.before); const afterChars = Array.from(step.after); const differences = []; for (let i = 0; i < Math.max(beforeChars.length, afterChars.length); i++) { const beforeChar = beforeChars[i] || ""; const afterChar = afterChars[i] || ""; const beforeCode = beforeChar ? beforeChar.charCodeAt(0) : null; const afterCode = afterChar ? afterChar.charCodeAt(0) : null; if (beforeCode !== afterCode) { differences.push({ position: i, before: beforeChar, after: afterChar, beforeCode: beforeCode, afterCode: afterCode, }); } } if (differences.length > 0) { console.log(" Character differences:"); differences.forEach((diff) => { console.log( ` Position ${diff.position}: "${diff.before}" (${diff.beforeCode}) → "${diff.after}" (${diff.afterCode})` ); }); } console.log(); });