feedr
Version:
Use feedr to fetch the data from a remote url, respect its caching, and parse its data. Despite its name, it's not just for feed data but also for all data that you can feed into it (including binary data).
773 lines (680 loc) • 16.7 kB
JavaScript
// Requires
const extendr = require('extendr')
const eachr = require('eachr')
const { TaskGroup } = require('taskgroup')
const typeChecker = require('typechecker')
const safefs = require('safefs')
const safeps = require('safeps')
const pathUtil = require('path')
const request = require('request')
// Define
class Feedr {
// Helpers
static create(...args) {
return new Feedr(...args)
}
// Check to see if the feed === still relevant
// feed={cache}, cache=boolean/`preferred`/number
// metaData={expires, date}
// return boolean
static isFeedCacheStillRelevant(feed, metaData) {
return (
feed.cache && // User always wants to use cache
(feed.cache === 'preferred' ||
// If the cache === still relevant according to the website
(metaData.expires && new Date() < new Date(metaData.expires)) ||
// If the cache === still relevant according to the user
(typeChecker.isNumber(feed.cache) &&
metaData.date &&
new Date() <
new Date(new Date(metaData.date).getTime() + feed.cache)))
)
}
// Constructor
constructor(config = {}) {
// Prepare
const me = this
// Extend and dereference our configuration
this.config = extendr.deep(
{
log: null,
cache: 1000 * 60 * 60 * 24, // one day by default
tmpPath: null,
requestOptions: null,
plugins: null
},
this.config || {},
config
)
// Get the temp path right away
safeps.getTmpPath(function(err, tmpPath) {
if (err) {
console.error(err)
} else {
me.config.tmpPath = tmpPath
}
})
}
// Log
log(...args) {
if (this.config.log) this.config.log(...args)
return this
}
// Read Feeds
// feeds = {feedName:feed}
// next(err,result)
readFeeds(...args) {
// Prepare
const me = this
const failures = []
// Prepare options
let feeds = null
const defaultfeed = {} // what is this?
let next = null
// Extract the configuration from the arguments
args.forEach(function(arg, index) {
if (typeChecker.isFunction(arg)) {
next = arg
} else if (typeChecker.isArray(arg)) {
feeds = arg
} else if (typeChecker.isPlainObject(arg)) {
if (index === 0) {
feeds = arg
} else {
extendr.extend(defaultfeed, arg)
}
}
})
// Extract
const results = {}
// Tasks
const tasks = TaskGroup.create({
concurrency: 0,
abortOnError: false
}).done(function() {
let message = 'Feedr finished fetching'
let err = null
if (failures.length !== 0) {
message +=
`with ${failures.length} failures:\n` +
failures
.map(function(i) {
return i.message
})
.join('\n')
err = new Error(message)
me.log('warn', err)
} else {
me.log('debug', message)
}
next(err, results)
})
// Feeds
eachr(feeds, function(feed, index) {
tasks.addTask(function(complete) {
// Prepare
if (typeChecker.isString(feed)) {
feed = { url: feed }
}
feeds[index] = feed = extendr.deep({}, defaultfeed, feed)
// Read
me.readFeed(feed, function(err, data) {
// Handle
if (err) {
me.log(
'warn',
`Feedr failed to fetch [${feed.url}] to [${feed.path}]`,
err.stack
)
failures.push(err)
} else {
results[index] = data
}
// Complete
complete(err)
})
})
})
// Start
tasks.run()
// Chain
return this
}
// Prepare Feed Details
prepareFeed(feed) {
// Set defaults
if (feed.hash == null)
feed.hash = require('crypto')
.createHash('md5')
.update(`feedr-${JSON.stringify(feed.url)}`)
.digest('hex')
if (feed.basename == null)
feed.basename = pathUtil.basename(feed.url.replace(/[?#].*/, ''))
if (feed.extension == null) feed.extension = pathUtil.extname(feed.basename)
if (feed.name == null) feed.name = feed.hash + feed.extension
if (feed.path == null)
feed.path = pathUtil.join(this.config.tmpPath, feed.name)
if (feed.metaPath == null)
feed.metaPath =
pathUtil.join(this.config.tmpPath, feed.name) + '-meta.json'
if (feed.cache == null) feed.cache = this.config.cache
if (feed.parse == null) feed.parse = true
if (feed.parse === 'raw') feed.parse = false
if (feed.check == null) feed.check = true
if (feed.plugins == null)
feed.plugins = this.config.plugins || 'github xml cson json yaml string'
if (feed.metaData == null) feed.metaData = {}
// Return
return feed
}
// Cleanup response data
cleanData(data) {
// Prepare
const me = this
const keys = []
// Discover the keys inside data, and delve deeper
eachr(data, function(value, key) {
if (typeChecker.isPlainObject(data)) {
data[key] = me.cleanData(value)
}
keys.push(key)
})
// Check if we are a simple rest object
// If so, make it a simple value
if (keys.length === 1 && keys[0] === '_content') {
data = data._content
}
// Return the result
return data
}
// Read Feed
// next(err,data)
readFeed(...args) {
// Prepare
const me = this
let url, feed, next
// Extract the configuration from the arguments
args.forEach(function(arg) {
if (typeChecker.isString(arg)) {
url = arg
} else if (typeChecker.isFunction(arg)) {
next = arg
} else if (typeChecker.isPlainObject(arg)) {
feed = arg
}
})
// Check for url
if (!feed) feed = {}
if (url) feed.url = url
if (!feed.url) {
next(new Error('Feed url was not supplied'))
return this
}
// Check deprecations
if (feed.checkReponse) {
next(new Error('Feed checkResponse option is deprecated for check'))
return this
}
// Ensure optional
feed = this.prepareFeed(feed)
// Plugins
const plugins = {}
if (typeChecker.isString(feed.plugins)) {
feed.plugins = feed.plugins.split(' ')
}
if (typeChecker.isArray(feed.plugins)) {
for (let i = 0; i < feed.plugins.length; ++i) {
const name = feed.plugins[i]
try {
plugins[name] = require('./plugins/' + name)
} catch (err) {
next(err)
return this
}
}
}
// Generators
function generateParser(name, method, opts, complete) {
me.log('debug', `Feedr parse [${feed.url}] with ${name} attempt`)
method(opts, function(err, data) {
if (err) {
complete(err)
return
}
if (data) {
me.log(
'debug',
`Feedr parse [${feed.url}] with ${name} attempt, used`
)
opts.data = data
} else {
me.log(
'debug',
`Feedr parse [${feed.url}] with ${name} attempt, ignored`
)
}
complete(null, data)
})
}
function generateChecker(name, method, opts, complete) {
me.log('debug', `Feedr check [${feed.url}] with ${name} attempt`)
method(opts, function(err, data) {
if (err) {
complete(err)
return
}
me.log(
'debug',
`Feedr check [${feed.url}] with ${name} attempt, success`
)
complete(null, data)
})
}
// ------------------------------
// Parser
let parseResponse = null
// Specific
if (typeChecker.isString(feed.parse)) {
// Exists
if (
typeChecker.isFunction(plugins[feed.parse] && plugins[feed.parse].parse)
) {
parseResponse = generateParser.bind(
null,
feed.parse,
plugins[feed.parse].parse
)
}
// Missing
else {
next(new Error('Invalid parse value: ' + feed.parse))
return this
}
}
// Custom
else if (typeChecker.isFunction(feed.parse)) {
parseResponse = generateParser.bind(null, 'custom', feed.parse)
}
// Auto
else if (feed.parse === true) {
parseResponse = function(opts, parseComplete) {
const checkTasks = new TaskGroup().done(parseComplete)
eachr(plugins, function(value, key) {
if (value.parse != null) {
checkTasks.addTask(function(parseTaskComplete) {
generateParser.bind(
null,
key,
value.parse
)(opts, function(err, data) {
if (data) {
checkTasks.clear()
}
parseTaskComplete(err)
})
})
}
})
checkTasks.run()
}
}
// Raw
else {
parseResponse = function(opts, parseComplete) {
parseComplete()
}
}
// ------------------------------
// Checker
let checkResponse = null
// Specific
if (typeChecker.isString(feed.check)) {
// Exists
if (
typeChecker.isFunction(plugins[feed.check] && plugins[feed.check].check)
) {
checkResponse = generateChecker.bind(
null,
feed.check,
plugins[feed.check].check
)
}
// Missing
else {
next(new Error('Invalid check value: ' + feed.check))
return this
}
}
// Custom
else if (typeChecker.isFunction(feed.check)) {
checkResponse = generateChecker.bind(null, 'custom', feed.check)
}
// Auto
else if (feed.check) {
checkResponse = function(opts, checkComplete) {
const checkTasks = new TaskGroup().done(checkComplete)
eachr(plugins, function(value, key) {
if (value.check != null) {
checkTasks.addTask(function(checkTaskComplete) {
generateChecker.bind(
null,
key,
value.check
)(opts, checkTaskComplete)
})
}
})
checkTasks.run()
}
}
// Raw
else {
checkResponse = function(opts, checkComplete) {
checkComplete()
}
}
// Request options
const requestOptions = extendr.deep(
{
url: feed.url,
timeout: 1 * 60 * 1000,
encoding: null,
headers: {
'User-Agent': 'Wget/1.14 (linux-gnu)'
}
},
me.config.requestOptions || {},
feed.requestOptions || {}
)
// Read a file
function readFile(path, readFileComplete) {
// Log
me.log(
'debug',
`Feedr === reading [${feed.url}] on [${path}], checking exists`
)
// Check the the file exists
safefs.exists(path, function(exists) {
// Check it exists
if (!exists) {
// Log
me.log(
'debug',
`Feedr === reading [${feed.url}] on [${path}], it doesn't exist`
)
// Exit
readFileComplete()
return
}
// Log
me.log(
'debug',
`Feedr === reading [${feed.url}] on [${path}], it exists, now reading`
)
// It does exist, so let's continue to read the cached fie
safefs.readFile(path, null, function(err, rawData) {
// Check
if (err) {
// Log
me.log(
'debug',
`Feedr === reading [${feed.url}] on [${path}], it exists, read failed`,
err.stack
)
// Exit
readFileComplete(err)
return
}
// Log
me.log(
'debug',
`Feedr === reading [${feed.url}] on [${path}], it exists, read completed`
)
// Return the parsed cached data
readFileComplete(null, rawData)
})
})
}
// Parse a file
function readMetaFile(path, readMetaFileComplete) {
// Log
me.log('debug', `Feedr === parsing meta file [${feed.url}] on [${path}]`)
// Parse
readFile(path, function(err, rawData) {
// Check
if (err || !rawData) {
// Log
me.log(
'debug',
`Feedr === parsing meta file [${feed.url}] on [${path}], read failed`,
err && err.stack
)
// Exit
readMetaFileComplete(err)
return
}
// Attempt
let data = null
try {
data = JSON.parse(rawData.toString())
} catch (err) {
// Log
me.log(
'warn',
`Feedr === parsing meta file [${feed.url}] on [${path}], parse failed`,
err.stack
)
// Exit
readMetaFileComplete(err)
return
}
// Log
me.log(
'debug',
`Feedr === parsing meta file [${feed.url}] on [${path}], parse completed`
)
// Exit
readMetaFileComplete(null, data)
})
}
// Write the feed
function writeFeed(response, data, writeFeedComplete) {
// Log
me.log('debug', `Feedr === writing [${feed.url}] to [${feed.path}]`)
// Prepare
const writeTasks = TaskGroup.create({ concurrency: 0 }).done(function(
err
) {
if (err) {
// Log
me.log(
'warn',
`Feedr === writing [${feed.url}] to [${feed.path}], write failed`,
err.stack
)
// Exit
writeFeedComplete(err)
return
}
// Log
me.log(
'debug',
`Feedr === writing [${feed.url}] to [${feed.path}], write completed`
)
// Exit
writeFeedComplete(null, data)
})
writeTasks.addTask('store the meta data in a cache somewhere', function(
writeTaskComplete
) {
const writeData = JSON.stringify(
{
headers: response.headers,
parse: feed.parse
},
null,
' '
)
safefs.writeFile(feed.metaPath, writeData, writeTaskComplete)
})
writeTasks.addTask('store the parsed data in a cache somewhere', function(
writeTaskComplete
) {
const writeData = feed.parse ? JSON.stringify(data) : data
safefs.writeFile(feed.path, writeData, writeTaskComplete)
})
// Fire the write tasks
writeTasks.run()
}
// Get the file via reading the cached copy
// next(err, data, meta)
function viaCache(viaCacheComplete) {
// Log
me.log('debug', `Feedr === remembering [${feed.url}] from cache`)
// Prepare
let meta = null
let data = null
const readTasks = TaskGroup.create().done(function(err) {
viaCacheComplete(err, data, meta && meta.headers)
})
readTasks.addTask('read the meta data in a cache somewhere', function(
viaCacheTaskComplete
) {
readMetaFile(feed.metaPath, function(err, result) {
if (err || !result) {
viaCacheTaskComplete(err)
return
}
meta = result
viaCacheTaskComplete()
})
})
readTasks.addTask('read the parsed data in a cache somewhere', function(
viaCacheTaskComplete
) {
readFile(feed.path, function(err, rawData) {
if (err || !rawData) {
viaCacheTaskComplete(err)
return
}
if (
feed.parse === false ||
(feed.parse === true && meta.parse === false)
) {
data = rawData
} else {
try {
data = JSON.parse(rawData.toString())
} catch (err) {
viaCacheTaskComplete(err)
return
}
}
viaCacheTaskComplete()
})
})
// Fire the write tasks
readTasks.run()
}
// Get the file via performing a fresh request
// next(err, data, meta)
function viaRequest(viaRequestComplete) {
// Log
me.log(
'debug',
`Feedr === fetching [${feed.url}] to [${feed.path}], requesting`
)
// Add etag if we have it
if (feed.cache && feed.metaData.etag) {
if (requestOptions.headers['If-None-Match'] == null) {
requestOptions.headers['If-None-Match'] = feed.metaData.etag
}
}
// Fetch and Save
request(requestOptions, function(err, response, data) {
// Log
const opts = { feedr: me, feed, response, data }
me.log(
'debug',
`Feedr === fetching [${feed.url}] to [${feed.path}], requested`
)
// What should happen if an error occurs
function handleError(err) {
// Log
me.log(
'warn',
`Feedr === fetching [${feed.url}] to [${feed.path}], failed`,
err.stack
)
// Exit
if (feed.cache) {
viaCache(next)
return
}
viaRequestComplete(err, opts.data, requestOptions.headers)
}
// Check error
if (err) {
handleError(err)
return
}
// Check cache
if (feed.cache && response.statusCode === 304) {
viaCache(next)
return
}
// Determine Parse Type
parseResponse(opts, function(err) {
if (err) {
handleError(err)
return
}
// Log
me.log(
'debug',
`Feedr === fetching [${feed.url}] to [${feed.path}], requested, checking`
)
// Exit
checkResponse(opts, function(err) {
if (err) {
handleError(err)
return
}
writeFeed(response, opts.data, function(err) {
viaRequestComplete(err, opts.data, requestOptions.headers)
})
})
})
})
}
// Refresh if we don't want to use the cache
if (feed.cache === false) {
viaRequest(next)
return this
}
// Fetch the latest cache data to check if it === still valid
readMetaFile(feed.metaPath, function(err, metaData) {
// There isn't a cache file
if (err || !metaData) {
viaRequest(next)
return
}
// Apply to the feed details
feed.metaData = metaData
// There === an expires header and it === still valid
// cache preferred, use cache if exists, otherwise fall back to relevant
// cache number, use cache if within number, otherwise fall back to relevant
if (Feedr.isFeedCacheStillRelevant(feed, metaData)) {
viaCache(next)
return
}
// There was no expires header
viaRequest(next)
})
// Chain
return this
}
}
// Exports
module.exports = Feedr