ig-headless-driver
Version:
Scrape instagram posts by username, shortcode, or hashtag
148 lines (130 loc) • 5.75 kB
JavaScript
const userURL = 'https://www.instagram.com/'
const listURL = 'https://www.instagram.com/explore/tags/'
const postURL = 'https://www.instagram.com/p/'
const locURL = 'https://www.instagram.com/explore/locations/'
const dataExp = /window\._sharedData\s?=\s?({.+)<\/script>/
const fetch = require('node-fetch')
exports.getUser = async function (username) {
if (!username) return (new Error('Argument "username" must be specified'))
let options = {
headers: {
'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4'
}
}
let result = await fetch(userURL + username + '/?__a=1', options)
let res = await result.json()
let obj = res.graphql.user
let user = {
bio: obj.biography,
followed_by: obj.edge_followed_by.count,
following: obj.edge_follow.count,
full_name: obj.full_name,
user_id: obj.id,
is_business: obj.is_business_account,
username: obj.username,
connected_fb: obj.connected_fb_page,
profile_pic: obj.profile_pic_url
}
let response = {
user: res.graphql.user,
post: obj.edge_owner_to_timeline_media.edges,
story: obj.edge_felix_video_timeline.edges
}
return response
}
exports.getPost = async function (shortcode) {
if (!shortcode) return (new Error('Argument "shortcode" must be specified'))
let options = {
headers: {
'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4'
}
}
let result = await fetch(postURL + shortcode + '/?__a=1', options)
let res = await result.json()
let obj = res.graphql.shortcode_media
let text = obj.edge_media_to_caption.edges[0] ? obj.edge_media_to_caption.edges[0].node.text : ''
let response = {
media_id: obj.id,
shortcode: obj.shortcode,
text: text,
comment_count: obj.edge_media_to_parent_comment && obj.edge_media_to_parent_comment.count ? obj.edge_media_to_parent_comment.count : 0,
comments: obj.edge_media_to_parent_comment && obj.edge_media_to_parent_comment.edges ? obj.edge_media_to_parent_comment.edges : [],
like_count: obj.edge_media_preview_like.count,
display_url: obj.display_url,
owner_id: obj.owner.id,
date: obj.taken_at_timestamp,
media_preview: obj.media_preview,
display_resources: obj.display_resources,
accessibility_caption: obj.accessibility_caption,
}
if (obj.__typename === 'GraphImage') {
// there's only one image, it's loaded
response.type = 'image'
} else if (obj.__typename === 'GraphSidecar') {
// go grab the sidecar images
response.type = 'sidecar'
response.gallery = obj.edge_sidecar_to_children.edges.map((post) => {
let res
try {
res = exports.getPostData(post, 'GraphImage')
} catch (err) {
res = { code: 0, error: err }
}
return res
})
} else if (obj.__typename === 'GraphVideo') {
response = exports.getPostData(obj, obj.__typename)
response.type = 'video'
}
return response
}
exports.getTag = async function (tag) {
if (!tag) return (new Error('Argument "tag" must be specified'))
let options = {
headers: {
'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4'
}
}
let result = await fetch(listURL + tag + '/?__a=1', options)
let res = await result.json()
let obj = res.graphql.hashtag
let hashtag = {
name: obj.name,
}
let response = {
hashtag,
post: obj.edge_hashtag_to_media.edges,
}
return response
}
exports.getPostData = function (postData, type) {
let response
if (type === 'GraphImage') {
response = {
media_id: postData.node.id,
shortcode: postData.node.shortcode,
text: postData.node.edge_media_to_caption && postData.node.edge_media_to_caption.edges[0] && postData.node.edge_media_to_caption.edges[0].node.text,
comment_count: postData.node.edge_media_to_comment && postData.node.edge_media_to_comment.count ? postData.node.edge_media_to_comment.count : 0,
like_count: postData.node.edge_liked_by && postData.node.edge_liked_by.count ? postData.node.edge_liked_by.count : 0,
display_url: postData.node.display_url,
date: postData.node.taken_at_timestamp,
thumbnail: postData.node.thumbnail_src,
thumbnail_resource: postData.node.thumbnail_resources
}
} else {
response = {
media_id: postData.id,
shortcode: postData.shortcode,
text: postData.edge_media_to_caption && postData.edge_media_to_caption.edges[0] && postData.edge_media_to_caption.edges[0].text,
comment_count: postData.edge_media_to_comment && postData.edge_media_to_comment.count ? postData.edge_media_to_comment.count : 0,
like_count: postData.edge_liked_by && postData.edge_liked_by.count ? postData.edge_liked_by.count : 0,
display_url: postData.display_url,
date: postData.taken_at_timestamp,
thumbnail: postData.thumbnail_src,
thumbnail_resource: postData.thumbnail_resources
}
response.video_view_count = postData.video_view_count
response.video_url = postData.video_url
}
return response
}