gatsby-transformer-open-graph
Version:
Gatsby plugin to add Oepn Graph data to MarkdownRemark nodes
214 lines (179 loc) • 4.56 kB
JavaScript
const crypto = require(`crypto`)
const Queue = require(`better-queue`)
const { createRemoteFileNode } = require(`gatsby-source-filesystem`)
async function getMetadata(targetUrl) {
const { body: html, url } = await got(targetUrl)
const metadata = await metascraper({ html, url })
return metadata
}
const metascraper = require('metascraper')([
require('metascraper-image')(),
require('metascraper-title')(),
require('metascraper-date')(),
require('metascraper-url')(),
require('metascraper-description')(),
require('metascraper-publisher')(),
require('metascraper-author')(),
])
const got = require('got')
function getMetaData(url) {
;(async () => {
const { body: html, url } = await got(targetUrl)
const metadata = await metascraper({ html, url })
return metadata
})()
}
const opengraphQueue = new Queue(
(input, cb) => {
createOpengraphNode(input)
.then(r => cb(null, r))
.catch(e => cb(e))
},
{ concurrent: 20, maxRetries: 2, retryDelay: 1000 }
)
const createContentDigest = obj =>
crypto
.createHash(`md5`)
.update(JSON.stringify(obj))
.digest(`hex`)
exports.onPreBootstrap = (
{ store, cache, actions, createNodeId, getNodes },
pluginOptions
) => {
const { createNode, touchNode } = actions
const opengraphNodes = getNodes().filter(n => n.internal.type === `Opengraph`)
if (opengraphNodes.length === 0) {
return null
}
let anyQueued = false
opengraphNodes.forEach(n => {
anyQueued = true
opengraphQueue.push({
url: n.url,
parent: n.parent,
store,
cache,
createNode,
createNodeId,
})
})
if (!anyQueued) {
return null
}
return new Promise((resolve, reject) => {
opengraphQueue.on(`drain`, () => {
resolve()
})
})
}
exports.onCreateNode = async ({
node,
actions,
store,
cache,
createNodeId,
}) => {
const { createNode, createParentChildLink } = actions
//console.log("node.internal.type=", node.internal.type)
// Only get MarkdownRemark nodes
if (node.internal.type !== `MarkdownRemark`) {
return
} else {
if (!node.frontmatter.link) {
return
}
}
const opengraphNode = await new Promise((resolve, reject) => {
opengraphQueue
.push({
url: node.frontmatter.link,
parent: node.id,
store,
cache,
createNode,
createNodeId,
})
.on(`finish`, r => {
resolve(r)
})
.on(`failed`, e => {
reject(e)
})
})
if (opengraphNode && opengraphNode.id){
createParentChildLink({
parent: node,
child: opengraphNode,
});
}
else {
return;
}
}
const createOpengraphNode = async ({
url,
parent,
store,
cache,
createNode,
createNodeId,
}) => {
try {
console.info('TEST process opengraph data for = ', url)
const targetUrl = url
const metadata = await getMetadata(targetUrl)
// if (metadata.title === "Terms of Service Violation"){
// console.log("ERROR Bloomberg TOS Violation, returning")
// return
// }
//console.log("metadata = ", metadata)
let fixedImageUrl = metadata.image
if (metadata.image && metadata.image.includes('wsj')) {
fixedImageUrl = fixedImageUrl + '?image.jpg'
}
const fileNode = await createRemoteFileNode({
url: fixedImageUrl,
store,
cache,
createNode,
createNodeId,
})
if (!fileNode) {
//console.error(`Remote file node is null`, metadata.image)
throw new Error(`Remote file node is null`, metadata.image)
}
const opengraphNode = {
id: createNodeId(`${parent} >>> Opengraph`),
url,
description: metadata.description,
publisher: metadata.publisher,
title: metadata.title,
date: metadata.date,
imageUrl: metadata.image,
parent,
children: [],
internal: {
type: `Opengraph`,
},
image___NODE: fileNode.id,
}
opengraphNode.internal.contentDigest = createContentDigest(opengraphNode)
createNode(opengraphNode)
return opengraphNode
} catch (e) {
console.log(`Failed to opengraph ${url} due to ${e}. Will skip OpenGraph for this article...`)
const opengraphNode = {
id: createNodeId(`${parent} >>> Opengraph`),
url,
parent,
children: [],
internal: {
type: `Opengraph`,
}
}
opengraphNode.internal.contentDigest = createContentDigest(opengraphNode)
createNode(opengraphNode)
return opengraphNode
//throw e
}
}