kasha
Version:
Pre-render your Single-Page Application.
404 lines (323 loc) • 9.99 kB
JavaScript
const cuuid = require('cuuid')
const config = require('../lib/config')
const logger = require('../lib/logger')
const mongo = require('../lib/mongo')
const nsqWriter = require('../lib/nsqWriter')
const nsqReader = require('../lib/nsqReader')
const RESTError = require('../lib/RESTError')
const normalizeDoc = require('../lib/normalizeDoc')
const callback = require('../lib/callback')
const poll = require('../lib/poll')
const prerenderer = require('./prerenderer')
const updateSitemap = require('./updateSitemap')
const validHTTPStatus = require('../lib/validHTTPStatus')
const JOB_TIMEOUT = 15 * 1000
let reader, jobCounter = 0, stopping = false
;(async() => {
try {
await mongo.connect(config.mongodb.url, config.mongodb.database, config.mongodb.workerOptions)
await nsqWriter.connect()
logger.warn('Launching chromium...')
await prerenderer.launch()
prerenderer.on('disconnected', () => {
logger.error('Chromium disconnected')
})
logger.warn('Chromium launched')
reader = nsqReader.connect(global.argv.async ? 'kasha-async-queue' : 'kasha-sync-queue', 'worker', config.nsq.reader)
main()
process.once('SIGINT', exit)
process.once('SIGTERM', exit)
logger.warn('Kasha Worker started')
} catch (e) {
logger.error(e)
await closeConnections()
process.exitCode = 1
}
})()
// graceful exit
async function exit() {
if (stopping) {
return
}
stopping = true
logger.warn('Closing the worker... Please wait for finishing the in-flight jobs...')
reader.pause()
const interval = setInterval(async() => {
if (jobCounter === 0) {
clearInterval(interval)
await closeConnections()
logger.warn('exit successfully')
}
}, 1000)
}
async function closeConnections() {
await mongo.close()
await nsqWriter.close()
await nsqReader.close()
if (prerenderer) {
logger.warn('Closing prerenderer...')
await prerenderer.close()
logger.warn('Prerender closed')
}
}
function main() {
/*
snapshots schema:
site: String
path: String
profile: String
status: Number
redirect: String
meta: Object
openGraph: Object
links: Array
html: String
staticHTML: String
error: String
renderTimes: Number
updatedAt: Date
privateExpires: Date
sharedExpires: Date
removeAt: Date
lock: String
*/
const snapshots = mongo.db.collection('snapshots')
reader.on('message', async msg => {
jobCounter++
const req = msg.json()
logger.debug(req, 'receive job')
const {
replyTo,
correlationId,
site,
path,
profile,
userAgent,
callbackURL,
metaOnly,
rewrites
} = req
const url = site + path
let { cacheStatus } = req
const msgTimestamp = msg.timestamp.dividedBy(1000000).integerValue().toNumber()
const msgAttemps = msg.attempts
const jobStartTime = Date.now()
if (replyTo) {
if (msgTimestamp + JOB_TIMEOUT < Date.now()) {
logger.debug(`drop job: ${url} @${profile}`)
return handleResult({ error: new RESTError('WORKER_BUSY').toJSON() })
}
}
const lock = cuuid()
const lockQuery = {
site,
path,
profile,
lock: null
}
if (cacheStatus !== 'BYPASS') {
// expired
lockQuery.privateExpires = { $lt: new Date() }
}
try {
logger.debug(`lock: ${url} @${profile} with ${lock}`)
await snapshots.updateOne(lockQuery, {
$set: {
updatedAt: new Date(),
lock
},
$setOnInsert: {
renderTimes: 0,
privateExpires: new Date(),
sharedExpires: new Date(),
removeAt: new Date(Date.now() + 30 * 1000) // set to 30 secs later, prevent from cache cleaning
}
}, { upsert: true })
} catch (err) {
// don't block the queue
msg.finish()
// 11000: duplicate key on upsert
if (err.code !== 11000) {
const id = cuuid()
logger.error({ err, id })
return handleResult({ error: new RESTError('INTERNAL_ERROR', id).toJSON() })
}
// the document maybe locked by others, or is valid
let doc
try {
doc = await poll(site, path, profile)
} catch (e) {
return handleResult({ error: e.toJSON() })
}
return handleResult(doc)
}
let doc
try {
logger.debug(`prerender ${url} @${profile}`)
doc = await prerenderer.render(url, {
timeout: JOB_TIMEOUT,
userAgent,
// always followRedirect when caching pages
// in case of a request with followRedirect=true waits a cache lock of request with followRedirect=false
followRedirect: true,
extraMeta: {
status: { selector: 'meta[http-equiv="Status" i]', property: 'content' },
location: { selector: 'meta[http-equiv="Location" i]', property: 'content' },
lastModified: { selector: 'meta[http-equiv="Last-Modified" i]', property: 'content' },
cacheControl: { selector: 'meta[http-equiv="Cache-Control" i]', property: 'content' },
expires: { selector: 'meta[http-equiv="Expires" i]', property: 'content' },
error: { selector: 'meta[name="error"]', property: 'content' }
},
rewrites
})
logger.debug(`prerender ${url} @${profile} successfully`)
} catch (e) {
logger.debug(`prerender ${url} @${profile} failed.`, e)
doc = {
error: new RESTError('RENDER_ERROR', e.message).toJSON(),
updatedAt: new Date()
}
updateSitemap(site, path, doc)
updateSnapshot(doc)
if (cacheStatus === 'BYPASS') {
return handleResult(doc)
}
const staleDoc = await fetchStaleDoc()
if (staleDoc) {
cacheStatus = 'STALE'
return handleResult(staleDoc)
}
return handleResult(doc)
}
doc.updatedAt = new Date()
if (doc.meta && doc.meta.status) {
const s = parseInt(doc.meta.status)
if (!isNaN(s) && s >= 100 && s < 600) {
doc.status = s
if ([301, 302].includes(doc.status) && doc.meta.location) {
doc.redirect = doc.meta.location
} else if (doc.status === 503) {
mongo.db.collection('sites').updateOne(
{ host: new URL(site).host },
{
$set: { [profile ? `profiles.${profile}.serviceUnavailable` : 'serviceUnavailable']: new Date() }
}
)
}
}
}
if (!validHTTPStatus.includes(doc.status)) {
let message = 'HTTP ' + doc.status
if (doc.meta && doc.meta.error) {
message += '. ' + doc.meta.error
}
doc.error = new RESTError('FETCH_ERROR', url, message).toJSON()
updateSitemap(site, path, doc)
const staleDoc = await fetchStaleDoc()
if (staleDoc && validHTTPStatus.includes(staleDoc.status)) {
updateSnapshot({
error: doc.error,
updatedAt: doc.updatedAt
})
if (cacheStatus === 'BYPASS') {
return handleResult(doc)
}
cacheStatus = 'STALE'
return handleResult(staleDoc)
}
updateSnapshot(doc)
return handleResult(doc)
}
doc.error = null
if (doc.meta) {
if (doc.meta.cacheControl) {
let maxage = doc.meta.cacheControl.match(/max-age=(\d+)/)
if (maxage) {
maxage = parseInt(maxage[1])
if (maxage >= 0) {
doc.privateExpires = new Date(Date.now() + maxage * 1000)
} else {
maxage = null
}
}
let sMaxage = doc.meta.cacheControl.match(/s-maxage=(\d+)/)
if (sMaxage) {
sMaxage = parseInt(sMaxage[1])
if (sMaxage >= 0) {
doc.sharedExpires = new Date(Date.now() + sMaxage * 1000)
} else {
sMaxage = null
}
}
}
if (!doc.privateExpires && doc.meta.expires) {
const d = new Date(doc.meta.expires)
if (!isNaN(d.getTime())) {
doc.privateExpires = d
}
}
}
if (!doc.privateExpires) {
doc.privateExpires = new Date(Date.now() + (doc.status < 400 ? config.cache.maxage : 10) * 1000)
}
if (!doc.sharedExpires) {
doc.sharedExpires = new Date(Date.now() + (doc.status < 400 ? config.cache.sMaxage : 10) * 1000)
}
if (doc.sharedExpires < doc.privateExpires) {
doc.sharedExpires = doc.privateExpires
}
doc.removeAt = new Date(doc.sharedExpires + config.cache.removeAfter * 1000)
updateSitemap(site, path, doc)
updateSnapshot(doc)
return handleResult(doc)
function handleResult(doc) {
logger.info({
url,
profile,
code: doc.error && doc.error.code,
status: doc.status,
queued: jobStartTime - msgTimestamp + 'ms',
render: Date.now() - jobStartTime + 'ms',
attemps: msgAttemps
})
if (callbackURL || replyTo) {
let error = null
if (doc.status) {
doc = normalizeDoc(doc, metaOnly)
} else {
error = doc.error
}
if (callbackURL) {
callback(callbackURL, error, doc, cacheStatus)
} else if (replyTo) {
nsqWriter.writer.publish(replyTo, {
correlationId,
error,
doc,
cacheStatus
})
}
}
if (!msg.hasResponded) {
msg.finish()
}
jobCounter--
}
function updateSnapshot(doc) {
const query = { site, path, profile, lock }
logger.debug('update snapshot:', query)
return snapshots.updateOne(query, {
$set: {
...doc,
lock: null
},
$inc: {
renderTimes: 1
}
}).catch(e => logger.error(e))
}
function fetchStaleDoc() {
return snapshots.findOne({ site, path, profile, status: { $type: 'int' } }).catch(e => logger.error(e))
}
})
}