extra-amazontranslate
Version:
Translate long text from one language to another, via Amazon Translate.
187 lines (158 loc) • 5.88 kB
JavaScript
const cp = require('child_process');
const fs = require('fs');
const _ = require('lodash');
const boolean = require('boolean').boolean;
const AWS = require('aws-sdk');
const awsconfig = require('extra-awsconfig');
var iso6391 = null;
var getStdin = null;
var got = null;
// Global variables
const E = process.env;
const STDIO = [0, 1, 2];
const OPTIONS = {
log: boolean(E['TRANSLATE_LOG'] || '0'),
retries: parseInt(E['TRANSLATE_RETRIES'] || '8', 10),
source: E['TRANSLATE_SOURCE'] || 'auto',
target: E['TRANSLATE_TARGET'] || 'en',
service: {
url: E['TRANSLATE_SERVICE_URL'] || null
},
block: {
separator: E['TRANSLATE_BLOCK_SEPARATOR'] || '.',
length: parseInt(E['TRANSLATE_BLOCK_LENGTH'] || '2500', 10)
},
config: null
};
// ES modules dependency loader :).
async function importDependencies() {
if (iso6391!=null) return;
var $ = await Promise.all([
import('iso-639-1'),
import('get-stdin'),
import('got')
]);
iso6391 = $[0].default;
getStdin = $[1].default;
got = $[2].default;
}
// Shorten text.
function shorten(txt, len=50) {
return txt.length>len? txt.substring(0, len-4)+' ...':txt;
}
// Get text split by topics.
function splitTopic(txt, z=[]) {
var re = /(\s*)(=+)(\s*)(.*?)\3\2(\s*\r?\n)/g;
for(var m=null, mi=0; (m=re.exec(txt))!=null;) {
z.push(txt.substring(mi, m.index), m[1]+m[2]+m[3], m[4], m[3]+m[2]+m[5]);
mi = m.index+m[0].length;
}
if(mi<txt.length) z.push(txt.substring(mi));
return z;
}
// Get text split by blocks.
function splitBlock(txt, siz, sep, z=[]) {
if(txt.length<siz) { z.push(txt); return z; }
for(;txt;) {
var i = txt.lastIndexOf(sep, siz);
if(i<0) z.push(txt.substring(0, siz), '');
else z.push(txt.substring(0, i), sep);
txt = txt.substring(i<0? siz:i+1);
}
z.push('');
return z;
}
// Get text split.
function split(txt, siz=2500, sep='.', z=[]) {
var tops = splitTopic(txt);
for(var i=0, I=tops.length; i<I; i+=2) {
splitBlock(tops[i], siz, sep, z);
z.push(tops[i+1]||'');
}
return z;
}
// Translate text.
async function translate(aws, txt, o) {
var params = {Text: txt, SourceLanguageCode: o.source||'auto', TargetLanguageCode: o.target||'en'};
if(o.service.url) {
var res = await got(o.service.url, {body: JSON.stringify({method: 'translateText', params})});
return JSON.parse(res.body).data.TranslatedText;
}
return new Promise((fres, frej) => aws.translateText(params, (err, data) => {
return err? frej(err):fres(data.TranslatedText);
}));
}
// Translate text with retries.
async function translateRetry(aws, txt, id, o) {
for(var i=0, I=o.retries||8, err=null; i<I; i++) {
try {
var z = await translate(aws, txt, o);
if(o.log) console.log('-translateRetry', id, shorten(txt));
return z;
}
catch(e) { err = e; }
}
throw err;
}
// Get language code from name.
function langCode(nam) {
return nam!=='auto' && !iso6391.validate(nam)? iso6391.getCode(nam):nam;
}
/**
* Translate long text from one language to another (via "Amazon Translate").
* @param {string} txt Input text to be translated.
* @param {object} [o] Translation and config options.
* @returns {Promise<string>} Translated text.
*/
async function amazontranslate(txt, o) {
await importDependencies();
var o = _.merge({}, OPTIONS, o), z=[];
o.source = langCode(o.source); o.target = langCode(o.target);
var aws = new AWS.Translate(awsconfig(o.config)), txts=[];
split(txt, o.block.length, o.block.separator, txts);
if(o.log) console.log('@amazontranslate:', Math.floor(txts.length/2), shorten(txt));
for(var i=0, I=txts.length; i<I; i+=2) {
if(txts[i]) z.push(translateRetry(aws, txts[i], i/2, o));
if(txts[i+1]) z.push(txts[i+1]);
}
return (await Promise.all(z)).join('');
}
// Get options from arguments.
function options(o, k, a, i) {
o.config = o.config||{};
var e = k.indexOf('='), v = null, bool = () => true, str = () => a[++i];
if(e>=0) { v = k.substring(e+1); bool = () => boolean(v); str = () => v; k = k.substring(o, e); }
if(k==='--help') o.help = bool();
else if(k==='-l' || k==='--log') o.log = bool();
else if(k==='-o' || k==='--output') o.output = str();
else if(k==='-t' || k==='--text') o.text = str();
else if(k==='-r' || k==='--retries') o.retries = parseInt(str(), 10);
else if(k==='-os' || k==='--source') o.source = str();
else if(k==='-ot' || k==='--target') o.target = str();
else if(k==='-bs' || k==='--block_separator') _.set(o, 'block.separator', str());
else if(k==='-bl' || k==='--block_length') _.set(o, 'block.length', parseInt(str(), 10));
else if(k==='-su' || k==='--service_url') _.set(o, 'service.url', str());
else if(k.startsWith('-c')) return awsconfig.options(o.config, '-'+k.substring(2), a, i);
else if(k.startsWith('--config_')) return awsconfig.options(o.config, '--'+k.substring(9), a, i);
else o.argv = a[i];
return i+1;
}
amazontranslate.options = options;
module.exports = amazontranslate;
// Run on shell.
async function shell(a) {
await importDependencies();
var o = {argv: await getStdin()};
for(var i=2, I=a.length; i<I;)
i = options(o, a[i], a, i);
if(o.help) return cp.execSync('less README.md', {cwd: __dirname, stdio: STDIO});
try {
var txt = o.text? fs.readFileSync(o.text, 'utf8'):o.argv||'';
var out = await amazontranslate(txt, o);
if(o.output) fs.writeFileSync(o.output, out);
else console.log(out);
}
catch(err) { console.error(err.message); }
}
if(require.main===module) shell(process.argv);