hownz
Version:
Safely clean the copied text of hidden surprises. Checks for invisible code, hidden watermarks, and tracking symbols.
375 lines (335 loc) • 16.3 kB
JavaScript
process.noDeprecation = true;
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { GoogleGenAI } from "@google/genai";
import os from 'os';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const loadCurrencyData = () => {
try {
const currencyPath = path.resolve(__dirname, 'usd.json');
if (fs.existsSync(currencyPath)) {
const currencyData = fs.readFileSync(currencyPath, 'utf8');
return JSON.parse(currencyData);
}
return [];
} catch (error) {
// Silently fail if usd.json is malformed or other read errors occur
return [];
}
};
const CURRENCY_MAP = loadCurrencyData();
const DEFAULT_APPROVED_CHARS = `'\"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:,-.*[]!<>()?;&$@~/%^{}|\\-_=\t \r\nбвгджзклмнпрстфхцчшщаеёиоуыэюяБВГДЗКЛМНПРСТФХЙЧЩАЭЫОУЯЕИЁЮіўІЎґєєїҐЄЇāēīōūĀĒĪŌŪαβγδεζηθικλμνξοπρσςτυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ+-*/=≠<>≤≥≈∑∫∂π∞√∈∉∀∃∝∧∨¬⇒⇔≡∪∩⊂⊃⊆⊇∅∇×÷±∓∏∛∜∠⊥∥∋∌⊄⊅∊∉iːɪeæʌɑːɒɔːʊuːəɜːeɪaɪɔɪəʊaʊɪəeəʊəpbtbd tʃdʒkgfvθðszʃʒhmnŋlrwjIVXLCDMivxlcdm`;
const DEFAULT_CONDENSE_CHARS = "*?!,-:;#`\t";
const CONTRACTIONS = {
"couldn't've": "could not have", "mightn't've": "might not have", "mustn't've": "must not have", "shouldn't've": "should not have", "wouldn't've": "would not have", "i'm": "i am", "you're": "you are", "he's": "he is", "she's": "she is", "it's": "it is", "we're": "we are", "they're": "they are", "i've": "i have", "you've": "you have", "we've": "we have", "they've": "they have", "i'd": "i would", "you'd": "you would", "he'd": "he would", "she'd": "he would", "it'd": "it would", "we'd": "we would", "they'd": "they would", "i'll": "i will", "you'll": "you will", "he'll": "he will", "she'll": "she will", "it'll": "it will", "we'll": "we will", "they'll": "they will", "don't": "do not", "doesn't": "does not", "didn't": "did not", "isn't": "is not", "aren't": "are not", "wasn't": "was not", "weren't": "were not", "haven't": "have not", "hasn't": "has not", "hadn't": "had not", "won't": "will not", "wouldn't": "would not", "can't": "cannot", "couldn't": "could not", "shouldn't": "should not", "mightn't": "might not", "mustn't": "must not", "shan't": "shall not", "let's": "let us", "that's": "that is", "there's": "there is", "here's": "here is", "what's": "what is", "where's": "where is", "when's": "when is", "why's": "why is", "how's": "how is", "who's": "who is", "gonna": "going to", "wanna": "want to", "gotcha": "got you", "kinda": "kind of", "sorta": "sort of", "dunno": "don't know"
};
const getCharsFromJson = (jsonPath) => {
try {
const filePath = path.resolve(__dirname, jsonPath);
if (!fs.existsSync(filePath)) {
console.warn(`Warning: file not found ${jsonPath}`);
return '';
}
const fileContent = fs.readFileSync(filePath, 'utf-8');
const data = JSON.parse(fileContent);
let allChars = [];
for (const key in data) {
if (Array.isArray(data[key])) {
allChars.push(...data[key]);
}
}
const uniqueChars = [...new Set(allChars)];
return uniqueChars.join('');
} catch (e) {
console.warn(`Warning: could not parse ${jsonPath}`);
return '';
}
};
const getPackageVersion = () => {
try {
const packageJsonPath = path.resolve(__dirname, 'package.json');
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
return packageJson.version;
} catch (error) {
return 'unknown';
}
};
const MAX_CALLS_PER_MINUTE_CLI = 20;
const ONE_MINUTE_MS_CLI = 60 * 1000;
const configDir = path.join(os.homedir(), '.hownz');
const rateLimitFile = path.join(configDir, 'ratelimit.json');
const checkRateLimitCli = () => {
try {
if (!fs.existsSync(configDir)) {
fs.mkdirSync(configDir, { recursive: true });
}
const now = Date.now();
let timestamps = [];
if (fs.existsSync(rateLimitFile)) {
try {
timestamps = JSON.parse(fs.readFileSync(rateLimitFile, 'utf8'));
} catch (e) { timestamps = []; }
}
const recentTimestamps = timestamps.filter(ts => now - ts < ONE_MINUTE_MS_CLI);
if (recentTimestamps.length >= MAX_CALLS_PER_MINUTE_CLI) {
return false;
}
recentTimestamps.push(now);
fs.writeFileSync(rateLimitFile, JSON.stringify(recentTimestamps));
return true;
} catch (e) {
console.warn(`Warning: Could not manage rate limit file. Rate limiting disabled. Error: ${e.message}`);
return true;
}
};
const showHelp = () => {
console.log(`hownz Text Cleaner CLI (v${getPackageVersion()})
--------------------------
A tool to clean text, removing hidden characters and formatting.
Usage:
hownz "your text..." [options]
echo "piped text" | hownz [options]
hownz < input.txt [options]
Options:
-a, --auto-file Save output to an auto-generated file instead of console.
-o, --output [file] Save output to a specific file.
--here Force output to console (useful for piped input).
-l, --lang Include additional language characters from app.json.
-e, --emojis Include emoji characters from admin.json.
-m, --mark Mark unrecognized characters with '#'.
-s, --summarize Summarize text using the Gemini API (requires GEMINI_API_KEY).
-t, --text Show the list of currently approved characters.
--list Show the list of contraction replacements.
--lg Show contents of the language dictionary (app.json).
--ej Show contents of the emoji dictionary (admin.json).
-v, --version Display the current version.
-h, --help Display this help guide.
Examples:
hownz "Text with potential hidden stuff"
hownz -a "Save this to a file"
cat report.log | hownz -s
hownz < doc.txt --here > cleaned-doc.txt
hownz "Clean & mark: ñ, ö, ∴" -lm
echo "Save with ñ & 😀" | hownz -ale
For a detailed guide with more examples, visit:
https://hownz.com/admin.html
`);
};
const showApprovedChars = (approvedChars) => {
console.log('Approved characters list:\n' + approvedChars);
};
const showReplacements = () => {
console.log('Contraction replacements list:');
for (const [key, value] of Object.entries(CONTRACTIONS)) {
console.log(` '${key}' will be replaced with '${value}'`);
}
};
const handleCurrencySymbols = (text) => {
if (!CURRENCY_MAP || CURRENCY_MAP.length === 0) {
return text;
}
const escapeRegex = s => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
const sortedCurrencies = [...CURRENCY_MAP].sort((a, b) => b.symbol.length - a.symbol.length);
let processedText = text;
sortedCurrencies.forEach(({ symbol }) => {
const escapedSymbol = escapeRegex(symbol);
const consecutiveRegex = new RegExp(`(?:${escapedSymbol}){2,}`, 'g');
processedText = processedText.replace(consecutiveRegex, '');
});
sortedCurrencies.forEach(({ symbol, abbreviation }) => {
const escapedSymbol = escapeRegex(symbol);
const singleRegex = new RegExp(escapedSymbol, 'g');
processedText = processedText.replace(singleRegex, abbreviation);
});
return processedText;
};
const expandContractions = text => {
let processedText = text.replace(/[’]/g, "'");
const contractionKeys = Object.keys(CONTRACTIONS).sort((a, b) => b.length - a.length);
const regex = new RegExp(`\\b(${contractionKeys.join("|")})\\b`, "gi");
return processedText.replace(regex, match => {
const expansion = CONTRACTIONS[match.toLowerCase()];
if (match === match.toUpperCase()) return expansion.toUpperCase();
if (match[0] === match[0].toUpperCase()) return expansion.charAt(0).toUpperCase() + expansion.slice(1);
return expansion;
});
};
const cleanText = (text, markUnrecognized = false, approvedChars = DEFAULT_APPROVED_CHARS) => {
let textToProcess = text;
textToProcess = handleCurrencySymbols(textToProcess);
textToProcess = expandContractions(textToProcess);
const phrasesToRemoveRegex = /\b(Here is|Let me know|you are)\b/gi;
textToProcess = textToProcess.replace(phrasesToRemoveRegex, '');
textToProcess = textToProcess.replace(/—/g, ' - ');
textToProcess = textToProcess.replace(/['"]{2,}/g, '');
const approvedSet = new Set(approvedChars.split(''));
let result = '';
for (const char of textToProcess) {
if (approvedSet.has(char)) {
result += char;
} else if (markUnrecognized) {
result += '#';
}
}
const escapedChars = DEFAULT_CONDENSE_CHARS.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
if (escapedChars) {
const condenseRegex = new RegExp(`([${escapedChars}])\\1+`, 'g');
result = result.replace(condenseRegex, '$1');
}
result = result.replace(/ {2,}/g, ' ');
result = result.replace(/#{2,}/g, '#');
return result.trim();
};
const main = async () => {
const args = process.argv.slice(2);
let textInput = [];
let options = {
autoFile: false, output: null, here: false, lang: false, emojis: false,
mark: false, summarize: false, showText: false, showList: false,
showLg: false, showEj: false, showVersion: false, showHelp: false
};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
switch (arg) {
case '--auto-file': options.autoFile = true; break;
case '--output':
if (i + 1 < args.length && !args[i + 1].startsWith('-')) {
options.output = args[++i];
} else {
console.error("Error: --output flag requires a filename.");
process.exit(1);
}
break;
case '--here': options.here = true; break;
case '--lang': options.lang = true; break;
case '--emojis': options.emojis = true; break;
case '--mark': options.mark = true; break;
case '--summarize': options.summarize = true; break;
case '--text': options.showText = true; break;
case '--list': options.showList = true; break;
case '--lg': options.showLg = true; break;
case '--ej': options.showEj = true; break;
case '--version': options.showVersion = true; break;
case '--help': options.showHelp = true; break;
default: textInput.push(arg);
}
} else if (arg === '-o') {
if (i + 1 < args.length && !args[i + 1].startsWith('-')) {
options.output = args[++i];
} else {
console.error("Error: -o flag requires a filename.");
process.exit(1);
}
} else if (arg.startsWith('-') && arg.length > 1) {
const flags = arg.slice(1);
for (const flag of flags) {
switch (flag) {
case 'a': options.autoFile = true; break;
case 'l': options.lang = true; break;
case 'e': options.emojis = true; break;
case 'm': options.mark = true; break;
case 's': options.summarize = true; break;
case 't': options.showText = true; break;
case 'v': options.showVersion = true; break;
case 'h': options.showHelp = true; break;
default:
console.error(`Error: Unknown short flag '${flag}' in '${arg}'`);
process.exit(1);
}
}
} else {
textInput.push(arg);
}
}
if (options.showHelp) { showHelp(); return; }
if (options.showVersion) { console.log(getPackageVersion()); return; }
if (options.showList) { showReplacements(); return; }
let approvedChars = DEFAULT_APPROVED_CHARS;
if (options.lang) approvedChars += getCharsFromJson('app.json');
if (options.emojis) approvedChars += getCharsFromJson('admin.json');
if (options.showText) { showApprovedChars(approvedChars); return; }
if (options.showLg) { console.log(fs.readFileSync(path.resolve(__dirname, 'app.json'), 'utf-8')); return; }
if (options.showEj) { console.log(fs.readFileSync(path.resolve(__dirname, 'admin.json'), 'utf-8')); return; }
if (options.summarize && !process.env.GEMINI_API_KEY) {
console.error(`Error: GEMINI_API_KEY is not set. Please set it to use AI summarization.`);
process.exit(1);
}
const isPiped = !process.stdin.isTTY;
const hasDirectArgs = textInput.length > 0;
let autoGenerateFilename = options.autoFile;
if (isPiped && !hasDirectArgs && !options.output && !autoGenerateFilename && !options.here) {
autoGenerateFilename = true;
}
const handleInput = async (text) => {
if (!text || !text.trim()) {
showHelp();
return;
}
let outputText;
if (options.summarize) {
if (!checkRateLimitCli()) {
console.error('\nError: API call limit reached (20 per minute). Please try again later.');
process.exit(1);
}
process.stdout.write("Summarizing with AI... ");
try {
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
const cleanedForSummary = cleanText(text, false, approvedChars);
const prompt = `Please provide a concise summary of the following text:\n\n${cleanedForSummary}`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt,
config: {
systemInstruction: 'You summarize pre-cleaned text. Be concise.'
}
});
process.stdout.write("Done.\n");
outputText = `Gemini Generated Summary:\n\n${response.text.trim()}`;
} catch (error) {
process.stdout.write("Failed.\n");
console.error('\nError summarizing text:', error.message);
process.exit(1);
}
} else {
outputText = cleanText(text, options.mark, approvedChars);
}
if ((options.output || autoGenerateFilename) && !options.here) {
let finalFilename = options.output;
if (autoGenerateFilename) {
const randomNumber = Math.floor(Math.random() * (1588 - 100 + 1)) + 100;
finalFilename = `1-hownz.com-${randomNumber}.txt`;
}
fs.writeFile(finalFilename, outputText + '\n', 'utf8', (err) => {
if (err) {
console.error(`Error writing to file: ${err.message}`);
process.exit(1);
}
console.log(`Cleaned text successfully saved to ${finalFilename}`);
});
} else {
process.stdout.write(outputText + '\n');
}
};
if (hasDirectArgs) {
const input = textInput.join(' ');
await handleInput(input);
} else if (isPiped) {
let input = '';
process.stdin.on('readable', () => {
let chunk;
while ((chunk = process.stdin.read()) !== null) {
input += chunk;
}
});
process.stdin.on('end', async () => {
await handleInput(input);
});
} else {
showHelp();
}
};
main();