twi2fido
Version:
Aggregates microblog entries from Twitter and prepares them for being posted to Fidonet.
410 lines (362 loc) • 15.9 kB
JavaScript
var fs = require('fs');
var path = require('path');
var util = require('util');
var async = require('async');
var cl = require('ciel');
var escapeStringRegExp = require('escape-string-regexp');
var fiunis = require('fiunis');
var iconv = require('iconv-lite');
var isgd = require('isgd');
var moment = require('moment');
var simteconf = require('simteconf');
var twitter = require('twitter');
var unescapeHTML = require('lodash.unescape');
var XRegExp = require('xregexp');
var config = simteconf( path.join(__dirname, 'twi2fido.config') );
var getLastReadFromFile = filename => {
try {
var readData = fs.readFileSync(filename, {encoding: 'utf8'});
if( /^\s*$/.test(readData) ) return null;
return readData;
} catch(e) {
return null;
}
};
var eraseFile = filename => {
try {
fs.unlinkSync(filename);
} catch(e) {}
};
var limit = 78; // length limit for lines of runes and runewords
var getShortImageRune = (imageURL, linkURL, srcAltText) => {
var rune;
var altText = '(image)';
if( typeof srcAltText === 'string' ){ // escape runic special characters
srcAltText = srcAltText.replace( /]/g, '\\]' );
altText = `(${srcAltText})`;
}
// step 1, almost always fails
rune = `[](${linkURL} "zoom")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return rune;
// step 2, almost always works
rune = `[\n](${linkURL} "zoom")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return rune;
// step 3, should always work
rune = `[\n](${linkURL} "zoom")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return rune;
return null; // URLs too large
};
var getAnimRuneword = mediaURL => {
var rune; // actually a runeword, but maintaining similarity to the above
if( typeof mediaURL.video_info !== 'object' ) return null; // source fault
if( !Array.isArray(mediaURL.video_info.variants) ) return null;
var zeroVariant = mediaURL.video_info.variants[0];
if( zeroVariant.content_type !== 'video/mp4' ) return null;
if( typeof zeroVariant.url !== 'string' ) return null;
var linkURL = zeroVariant.url;
if( linkURL.length < 1 ) return null;
// step 1, almost always fails
rune = `[(animation)](${linkURL} "runeanim")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return rune;
// step 2, should always work: chunk = linkURL + 2 characters
rune = `[(animation)\n](${linkURL} "runeanim")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return rune;
return null; // URL too large
};
var getVideoRuneword = (mediaURL, cbRuneword) => {
var rune; // actually a runeword, but maintaining similarity to the above
if( typeof mediaURL.video_info !== 'object' ) return cbRuneword(null);
if( !Array.isArray(mediaURL.video_info.variants) ) return cbRuneword(null);
var vidVariants = mediaURL.video_info.variants.filter(
nextVariant => typeof nextVariant.bitrate === 'number'
);
if( vidVariants.length < 1 ) return cbRuneword(null);
var sourceVideoURL = vidVariants.sort(
(a, b) => b.bitrate - a.bitrate // [0] is to contain the largest bitrate
)[0].url;
if( typeof sourceVideoURL !== 'string' ) return cbRuneword(null);
if( sourceVideoURL.length < 4 ) return cbRuneword(null); // 'ftp:'.length
isgd.shorten(sourceVideoURL, linkURL => {
if(
!linkURL.startsWith('https://is.gd/') &&
!linkURL.startsWith('http://is.gd/')
){
cl.fail('Cannot shorten ' + sourceVideoURL);
cl.fail(linkURL); // is likely to contain an error message
return cbRuneword(null);
}
// step 1, might fail (though likely to work because of shortening)
rune = `[(video)](${linkURL} "runevideo")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return cbRuneword(rune);
// step 2, should always work: chunk = linkURL + 2 characters
rune = `[(video)\n](${linkURL} "runevideo")`;
if(
rune.split(/[ \n]/).every( chunk => chunk.length <= limit )
) return cbRuneword(rune);
return cbRuneword(null); // URL too large
});
};
var cbTweetToContent = (source, sourceText, cbContent) => cbContent(null, [
source.user.name, ' (@', source.user.screen_name, ') ',
moment(
source.created_at,
'ddd MMM DD HH:mm:ss ZZ YYYY'
).utc().format('YYYY-MM-DD HH:mm:ss'),
' (UTC)\n\n',
'https://twitter.com/', source.user.screen_name, '/status/', source.id_str,
'\n\n',
sourceText,
'\n\n\n\n'
].join(''));
var getHashtagRegExp = hashtags => XRegExp([
'(?:',
hashtags.map( nextHashtag => escapeStringRegExp(nextHashtag) ).join('|'),
')(?=$|\\PL)'
].join(''), 'giA');
module.exports = (loginName, options) => {
var textOutput = path.resolve(__dirname, options.textOutput);
var fileLastRead = path.resolve(__dirname, options.fileLastRead);
var debugOutput = path.resolve(__dirname, 'debug.json');
var spaceIDX = options.CHRS.indexOf(' ');
if( spaceIDX < 0 ){
cl.fail(
`The given charset "${options.CHRS
}" does not have an <encoding><whitespace><level> form.`
);
cl.fail([
'The standard http://ftsc.org/docs/fts-5003.001',
' does not currently recommend it.'
].join(''));
if(!( options.debug )) eraseFile(textOutput);
process.exit(1);
}
var encodingCHRS = options.CHRS.slice(0, spaceIDX);
if(!( iconv.encodingExists(encodingCHRS) )){
cl.fail(`The given encoding "${encodingCHRS}" is unknown.`);
cl.fail([
'The module https://github.com/ashtuchkin/iconv-lite',
' does not support it.'
].join(''));
if(!( options.debug )) eraseFile(textOutput);
process.exit(1);
}
var modeUTF8 = (encodingCHRS === 'UTF-8' || encodingCHRS === 'UTF8');
var twi = new twitter({
consumer_key: config.last('ConsumerKey'),
consumer_secret: config.last('ConsumerSecret'),
access_token_key: config.last('AccessTokenKey'),
access_token_secret: config.last('AccessTokenSecret')
});
var tweeOptions = {
// include_rts: false, ← can become a future setting!
count: 100,
include_ext_alt_text: true,
screen_name: loginName,
tweet_mode: 'extended'
};
if(!( options.debug )){
var lastRead = getLastReadFromFile(fileLastRead);
if( lastRead !== null ) tweeOptions.since_id = lastRead;
}
twi.get('statuses/user_timeline', tweeOptions, (err, tweetList) => {
var lastreadID = null;
if( err ) throw new Error( util.inspect(err, { depth: null }) );
if( options.debug ){
fs.writeFileSync(
debugOutput, util.inspect(tweetList, { depth: null })
);
cl.ok('Debug output has been written: ' + debugOutput);
process.exit();
}
// non-zero length before filtering → lastread update is needed:
if( tweetList.length > 0 ) lastreadID = tweetList[0].id_str;
// filtering:
if( options.hashtags.length > 0 ){
tweetList = tweetList.filter(nextTweet => {
// same as in the iterator (see ≈26 lines below):
var sourceText = unescapeHTML(
(
nextTweet.retweeted_status || nextTweet
).full_text
);
return getHashtagRegExp(options.hashtags).test(sourceText);
});
}
if( options.counting ){
if( tweetList.length < 1 ){
cl.skip('Zero tweets are waiting to be reposted in Fidonet.');
} else {
cl.status([
tweetList.length,
' tweet',
(tweetList.length > 1) ? 's are' : ' is',
' waiting to be reposted in Fidonet.'
].join(''));
}
process.exit();
}
// zero length after filtering → nothing to do, immediate exit:
if( tweetList.length < 1 ){
// exiting sequence initiated:
if( lastreadID !== null ) fs.writeFileSync(fileLastRead, lastreadID);
eraseFile(textOutput);
cl.skip('Zero tweets received, output file erased.');
return;
}
// the list of microblog entries is not empty → processing:
tweetList.reverse(); // undo reverse chronological order
async.map(
tweetList, // `tweetList` elements → Fidonet message's text portions
(tweet, cbContent) => {
// same as in the filter (see ≈26 lines above):
var source = tweet.retweeted_status || tweet;
var sourceText = unescapeHTML(source.full_text);
// expand simple URLs in `sourceText`:
if(
typeof source.entities !== 'undefined' &&
Array.isArray(source.entities.urls)
) sourceText = source.entities.urls.reduce((txt, objURL) => {
if(
typeof objURL.url === 'string' &&
typeof objURL.expanded_url === 'string' &&
objURL.expanded_url.length <= 78
) return txt.split(objURL.url).join(objURL.expanded_url);
return txt;
}, sourceText);
if(
typeof source.extended_entities === 'undefined' ||
!Array.isArray(source.extended_entities.media)
) return cbTweetToContent(source, sourceText, cbContent);
//(cannot expand media URLs in `sourceText` → nothing else to do)
// expand media URLs in `sourceText`:
var arrMediaURLs = source.extended_entities.media;
async.eachSeries(
arrMediaURLs,
(mediaURL, doneMediaURL) => {
if(
typeof mediaURL.url !== 'string' ||
typeof mediaURL.display_url !== 'string' ||
('https://' + mediaURL.display_url).length > 78
) return doneMediaURL(null);
// ( cannot do anything with such `mediaURL` )
// `HTTPSURL` replaces `mediaURL.url` inside `sourceText`,
// though the last `mediaURL.url` can be replaced by rune(s)
// later at the end of `sourceText` (see details below):
var HTTPSURL = 'https://' + mediaURL.display_url;
var frags = sourceText.split(mediaURL.url);
if(
options.noRunes ||
frags.length < 2 || frags[frags.length-1] !== ''
){
// either Fidonet runes are disabled,
// or the tweet does not end with `mediaURL.url`,
// therefore cannot cause rune(s) or a runeword:
sourceText = frags.join(HTTPSURL);
return doneMediaURL(null);
}
// create a separator to insert before the final rune(s):
var separuner = '\n\n';
// and a special case when the tweet contains only rune(s):
if( frags.length === 2 && frags[0] === '' ) separuner = '';
// ( in that case frags[1] is also '' since the prev check )
// detect and render the necessary runes or runewords:
if( mediaURL.type === 'photo' ){
var imageRunes = arrMediaURLs.filter(nextMediaURL =>
nextMediaURL.display_url === mediaURL.display_url
).map(nextMediaURL => getShortImageRune(
nextMediaURL.media_url_https,
nextMediaURL.media_url_https + ':orig',
nextMediaURL.ext_alt_text
)).filter(nextRune => nextRune !== null);
if( imageRunes.length > 0 ){
frags.pop();
frags[
frags.length-1
] += separuner + imageRunes.join('\n\n');
}
sourceText = frags.join(HTTPSURL);
return doneMediaURL(null);
} else if( mediaURL.type === 'animated_gif' ){
var animRuneword = getAnimRuneword(mediaURL);
if( typeof animRuneword === 'string' ){
frags.pop();
frags[frags.length-1] += separuner + animRuneword;
}
sourceText = frags.join(HTTPSURL);
return doneMediaURL(null);
} else if( mediaURL.type === 'video' ){
getVideoRuneword(mediaURL, videoRuneword => {
if( typeof videoRuneword === 'string' ){
frags.pop();
frags[frags.length-1] += separuner + videoRuneword;
}
sourceText = frags.join(HTTPSURL);
return doneMediaURL(null);
});
} else { // unknown mediaURL type, nothing to do:
sourceText = frags.join(HTTPSURL);
return doneMediaURL(null);
}
},
err => {
if( err ) return cbContent(err);
return cbTweetToContent(source, sourceText, cbContent);
}
);
}, // converted `tweetList` to portions of Fidonet message's content
(err, arrContent) => {
if( err ) throw err;
// add an empty line “after kludges” (though they're added later):
var content = '\u00A0\n' + arrContent.join('');
twi.get( // trying to get an avatar for the corresponding kludge
'users/show',
{ screen_name: loginName },
(err, userdata) => {
if(
!err &&
typeof userdata.profile_image_url_https === 'string'
){
content = [
'\x01AVATAR: ',
userdata.profile_image_url_https.replace(
/_normal\.(jpe?g|png|gif|svg|webp)$/,
'.$1'
),
'\n',
content
].join('');
}
content = `\x01CHRS: ${options.CHRS
}\n\x01SOURCESITE: Twitter\n${content}`;
if( !modeUTF8 ) content = fiunis.encode(
content, encodingCHRS
);
fs.writeFileSync(textOutput, content);
// everything is OK → exiting sequence initiated:
if(
lastreadID !== null
) fs.writeFileSync(fileLastRead, lastreadID);
cl.ok([
tweetList.length,
' tweet',
(tweetList.length > 1) ? 's' : '',
' written.'
].join(''));
}
);
}
);
});
};