UNPKG

hownz

Version:

Safely clean the copied text of hidden surprises. Checks for invisible code, hidden watermarks, and tracking symbols.

375 lines (335 loc) 16.3 kB
#!/usr/bin/env node process.noDeprecation = true; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { GoogleGenAI } from "@google/genai"; import os from 'os'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const loadCurrencyData = () => { try { const currencyPath = path.resolve(__dirname, 'usd.json'); if (fs.existsSync(currencyPath)) { const currencyData = fs.readFileSync(currencyPath, 'utf8'); return JSON.parse(currencyData); } return []; } catch (error) { // Silently fail if usd.json is malformed or other read errors occur return []; } }; const CURRENCY_MAP = loadCurrencyData(); const DEFAULT_APPROVED_CHARS = `'\"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:,-.*[]!<>()?;&$@~/%^{}|\\-_=\t \r\nбвгджзклмнпрстфхцчшщаеёиоуыэюяБВГДЗКЛМНПРСТФХЙЧЩАЭЫОУЯЕИЁЮіўІЎґєєїҐЄЇāēīōūĀĒĪŌŪαβγδεζηθικλμνξοπρσςτυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ+-*/=≠<>≤≥≈∑∫∂π∞√∈∉∀∃∝∧∨¬⇒⇔≡∪∩⊂⊃⊆⊇∅∇×÷±∓∏∛∜∠⊥∥∋∌⊄⊅∊∉iːɪeæʌɑːɒɔːʊuːəɜːeɪaɪɔɪəʊaʊɪəeəʊəpbtbd tʃdʒkgfvθðszʃʒhmnŋlrwjIVXLCDMivxlcdm`; const DEFAULT_CONDENSE_CHARS = "*?!,-:;#`\t"; const CONTRACTIONS = { "couldn't've": "could not have", "mightn't've": "might not have", "mustn't've": "must not have", "shouldn't've": "should not have", "wouldn't've": "would not have", "i'm": "i am", "you're": "you are", "he's": "he is", "she's": "she is", "it's": "it is", "we're": "we are", "they're": "they are", "i've": "i have", "you've": "you have", "we've": "we have", "they've": "they have", "i'd": "i would", "you'd": "you would", "he'd": "he would", "she'd": "he would", "it'd": "it would", "we'd": "we would", "they'd": "they would", "i'll": "i will", "you'll": "you will", "he'll": "he will", "she'll": "she will", "it'll": "it will", "we'll": "we will", "they'll": "they will", "don't": "do not", "doesn't": "does not", "didn't": "did not", "isn't": "is not", "aren't": "are not", "wasn't": "was not", "weren't": "were not", "haven't": "have not", "hasn't": "has not", "hadn't": "had not", "won't": "will not", "wouldn't": "would not", "can't": "cannot", "couldn't": "could not", "shouldn't": "should not", "mightn't": "might not", "mustn't": "must not", "shan't": "shall not", "let's": "let us", "that's": "that is", "there's": "there is", "here's": "here is", "what's": "what is", "where's": "where is", "when's": "when is", "why's": "why is", "how's": "how is", "who's": "who is", "gonna": "going to", "wanna": "want to", "gotcha": "got you", "kinda": "kind of", "sorta": "sort of", "dunno": "don't know" }; const getCharsFromJson = (jsonPath) => { try { const filePath = path.resolve(__dirname, jsonPath); if (!fs.existsSync(filePath)) { console.warn(`Warning: file not found ${jsonPath}`); return ''; } const fileContent = fs.readFileSync(filePath, 'utf-8'); const data = JSON.parse(fileContent); let allChars = []; for (const key in data) { if (Array.isArray(data[key])) { allChars.push(...data[key]); } } const uniqueChars = [...new Set(allChars)]; return uniqueChars.join(''); } catch (e) { console.warn(`Warning: could not parse ${jsonPath}`); return ''; } }; const getPackageVersion = () => { try { const packageJsonPath = path.resolve(__dirname, 'package.json'); const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); return packageJson.version; } catch (error) { return 'unknown'; } }; const MAX_CALLS_PER_MINUTE_CLI = 20; const ONE_MINUTE_MS_CLI = 60 * 1000; const configDir = path.join(os.homedir(), '.hownz'); const rateLimitFile = path.join(configDir, 'ratelimit.json'); const checkRateLimitCli = () => { try { if (!fs.existsSync(configDir)) { fs.mkdirSync(configDir, { recursive: true }); } const now = Date.now(); let timestamps = []; if (fs.existsSync(rateLimitFile)) { try { timestamps = JSON.parse(fs.readFileSync(rateLimitFile, 'utf8')); } catch (e) { timestamps = []; } } const recentTimestamps = timestamps.filter(ts => now - ts < ONE_MINUTE_MS_CLI); if (recentTimestamps.length >= MAX_CALLS_PER_MINUTE_CLI) { return false; } recentTimestamps.push(now); fs.writeFileSync(rateLimitFile, JSON.stringify(recentTimestamps)); return true; } catch (e) { console.warn(`Warning: Could not manage rate limit file. Rate limiting disabled. Error: ${e.message}`); return true; } }; const showHelp = () => { console.log(`hownz Text Cleaner CLI (v${getPackageVersion()}) -------------------------- A tool to clean text, removing hidden characters and formatting. Usage: hownz "your text..." [options] echo "piped text" | hownz [options] hownz < input.txt [options] Options: -a, --auto-file Save output to an auto-generated file instead of console. -o, --output [file] Save output to a specific file. --here Force output to console (useful for piped input). -l, --lang Include additional language characters from app.json. -e, --emojis Include emoji characters from admin.json. -m, --mark Mark unrecognized characters with '#'. -s, --summarize Summarize text using the Gemini API (requires GEMINI_API_KEY). -t, --text Show the list of currently approved characters. --list Show the list of contraction replacements. --lg Show contents of the language dictionary (app.json). --ej Show contents of the emoji dictionary (admin.json). -v, --version Display the current version. -h, --help Display this help guide. Examples: hownz "Text with potential hidden stuff" hownz -a "Save this to a file" cat report.log | hownz -s hownz < doc.txt --here > cleaned-doc.txt hownz "Clean & mark: ñ, ö, ∴" -lm echo "Save with ñ & 😀" | hownz -ale For a detailed guide with more examples, visit: https://hownz.com/admin.html `); }; const showApprovedChars = (approvedChars) => { console.log('Approved characters list:\n' + approvedChars); }; const showReplacements = () => { console.log('Contraction replacements list:'); for (const [key, value] of Object.entries(CONTRACTIONS)) { console.log(` '${key}' will be replaced with '${value}'`); } }; const handleCurrencySymbols = (text) => { if (!CURRENCY_MAP || CURRENCY_MAP.length === 0) { return text; } const escapeRegex = s => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); const sortedCurrencies = [...CURRENCY_MAP].sort((a, b) => b.symbol.length - a.symbol.length); let processedText = text; sortedCurrencies.forEach(({ symbol }) => { const escapedSymbol = escapeRegex(symbol); const consecutiveRegex = new RegExp(`(?:${escapedSymbol}){2,}`, 'g'); processedText = processedText.replace(consecutiveRegex, ''); }); sortedCurrencies.forEach(({ symbol, abbreviation }) => { const escapedSymbol = escapeRegex(symbol); const singleRegex = new RegExp(escapedSymbol, 'g'); processedText = processedText.replace(singleRegex, abbreviation); }); return processedText; }; const expandContractions = text => { let processedText = text.replace(/[’]/g, "'"); const contractionKeys = Object.keys(CONTRACTIONS).sort((a, b) => b.length - a.length); const regex = new RegExp(`\\b(${contractionKeys.join("|")})\\b`, "gi"); return processedText.replace(regex, match => { const expansion = CONTRACTIONS[match.toLowerCase()]; if (match === match.toUpperCase()) return expansion.toUpperCase(); if (match[0] === match[0].toUpperCase()) return expansion.charAt(0).toUpperCase() + expansion.slice(1); return expansion; }); }; const cleanText = (text, markUnrecognized = false, approvedChars = DEFAULT_APPROVED_CHARS) => { let textToProcess = text; textToProcess = handleCurrencySymbols(textToProcess); textToProcess = expandContractions(textToProcess); const phrasesToRemoveRegex = /\b(Here is|Let me know|you are)\b/gi; textToProcess = textToProcess.replace(phrasesToRemoveRegex, ''); textToProcess = textToProcess.replace(/—/g, ' - '); textToProcess = textToProcess.replace(/['"]{2,}/g, ''); const approvedSet = new Set(approvedChars.split('')); let result = ''; for (const char of textToProcess) { if (approvedSet.has(char)) { result += char; } else if (markUnrecognized) { result += '#'; } } const escapedChars = DEFAULT_CONDENSE_CHARS.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); if (escapedChars) { const condenseRegex = new RegExp(`([${escapedChars}])\\1+`, 'g'); result = result.replace(condenseRegex, '$1'); } result = result.replace(/ {2,}/g, ' '); result = result.replace(/#{2,}/g, '#'); return result.trim(); }; const main = async () => { const args = process.argv.slice(2); let textInput = []; let options = { autoFile: false, output: null, here: false, lang: false, emojis: false, mark: false, summarize: false, showText: false, showList: false, showLg: false, showEj: false, showVersion: false, showHelp: false }; for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg.startsWith('--')) { switch (arg) { case '--auto-file': options.autoFile = true; break; case '--output': if (i + 1 < args.length && !args[i + 1].startsWith('-')) { options.output = args[++i]; } else { console.error("Error: --output flag requires a filename."); process.exit(1); } break; case '--here': options.here = true; break; case '--lang': options.lang = true; break; case '--emojis': options.emojis = true; break; case '--mark': options.mark = true; break; case '--summarize': options.summarize = true; break; case '--text': options.showText = true; break; case '--list': options.showList = true; break; case '--lg': options.showLg = true; break; case '--ej': options.showEj = true; break; case '--version': options.showVersion = true; break; case '--help': options.showHelp = true; break; default: textInput.push(arg); } } else if (arg === '-o') { if (i + 1 < args.length && !args[i + 1].startsWith('-')) { options.output = args[++i]; } else { console.error("Error: -o flag requires a filename."); process.exit(1); } } else if (arg.startsWith('-') && arg.length > 1) { const flags = arg.slice(1); for (const flag of flags) { switch (flag) { case 'a': options.autoFile = true; break; case 'l': options.lang = true; break; case 'e': options.emojis = true; break; case 'm': options.mark = true; break; case 's': options.summarize = true; break; case 't': options.showText = true; break; case 'v': options.showVersion = true; break; case 'h': options.showHelp = true; break; default: console.error(`Error: Unknown short flag '${flag}' in '${arg}'`); process.exit(1); } } } else { textInput.push(arg); } } if (options.showHelp) { showHelp(); return; } if (options.showVersion) { console.log(getPackageVersion()); return; } if (options.showList) { showReplacements(); return; } let approvedChars = DEFAULT_APPROVED_CHARS; if (options.lang) approvedChars += getCharsFromJson('app.json'); if (options.emojis) approvedChars += getCharsFromJson('admin.json'); if (options.showText) { showApprovedChars(approvedChars); return; } if (options.showLg) { console.log(fs.readFileSync(path.resolve(__dirname, 'app.json'), 'utf-8')); return; } if (options.showEj) { console.log(fs.readFileSync(path.resolve(__dirname, 'admin.json'), 'utf-8')); return; } if (options.summarize && !process.env.GEMINI_API_KEY) { console.error(`Error: GEMINI_API_KEY is not set. Please set it to use AI summarization.`); process.exit(1); } const isPiped = !process.stdin.isTTY; const hasDirectArgs = textInput.length > 0; let autoGenerateFilename = options.autoFile; if (isPiped && !hasDirectArgs && !options.output && !autoGenerateFilename && !options.here) { autoGenerateFilename = true; } const handleInput = async (text) => { if (!text || !text.trim()) { showHelp(); return; } let outputText; if (options.summarize) { if (!checkRateLimitCli()) { console.error('\nError: API call limit reached (20 per minute). Please try again later.'); process.exit(1); } process.stdout.write("Summarizing with AI... "); try { const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); const cleanedForSummary = cleanText(text, false, approvedChars); const prompt = `Please provide a concise summary of the following text:\n\n${cleanedForSummary}`; const response = await ai.models.generateContent({ model: 'gemini-2.5-flash', contents: prompt, config: { systemInstruction: 'You summarize pre-cleaned text. Be concise.' } }); process.stdout.write("Done.\n"); outputText = `Gemini Generated Summary:\n\n${response.text.trim()}`; } catch (error) { process.stdout.write("Failed.\n"); console.error('\nError summarizing text:', error.message); process.exit(1); } } else { outputText = cleanText(text, options.mark, approvedChars); } if ((options.output || autoGenerateFilename) && !options.here) { let finalFilename = options.output; if (autoGenerateFilename) { const randomNumber = Math.floor(Math.random() * (1588 - 100 + 1)) + 100; finalFilename = `1-hownz.com-${randomNumber}.txt`; } fs.writeFile(finalFilename, outputText + '\n', 'utf8', (err) => { if (err) { console.error(`Error writing to file: ${err.message}`); process.exit(1); } console.log(`Cleaned text successfully saved to ${finalFilename}`); }); } else { process.stdout.write(outputText + '\n'); } }; if (hasDirectArgs) { const input = textInput.join(' '); await handleInput(input); } else if (isPiped) { let input = ''; process.stdin.on('readable', () => { let chunk; while ((chunk = process.stdin.read()) !== null) { input += chunk; } }); process.stdin.on('end', async () => { await handleInput(input); }); } else { showHelp(); } }; main();