UNPKG

ig-headless-driver

Version:

Scrape instagram posts by username, shortcode, or hashtag

148 lines (130 loc) 5.75 kB
const userURL = 'https://www.instagram.com/' const listURL = 'https://www.instagram.com/explore/tags/' const postURL = 'https://www.instagram.com/p/' const locURL = 'https://www.instagram.com/explore/locations/' const dataExp = /window\._sharedData\s?=\s?({.+)<\/script>/ const fetch = require('node-fetch') exports.getUser = async function (username) { if (!username) return (new Error('Argument "username" must be specified')) let options = { headers: { 'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4' } } let result = await fetch(userURL + username + '/?__a=1', options) let res = await result.json() let obj = res.graphql.user let user = { bio: obj.biography, followed_by: obj.edge_followed_by.count, following: obj.edge_follow.count, full_name: obj.full_name, user_id: obj.id, is_business: obj.is_business_account, username: obj.username, connected_fb: obj.connected_fb_page, profile_pic: obj.profile_pic_url } let response = { user: res.graphql.user, post: obj.edge_owner_to_timeline_media.edges, story: obj.edge_felix_video_timeline.edges } return response } exports.getPost = async function (shortcode) { if (!shortcode) return (new Error('Argument "shortcode" must be specified')) let options = { headers: { 'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4' } } let result = await fetch(postURL + shortcode + '/?__a=1', options) let res = await result.json() let obj = res.graphql.shortcode_media let text = obj.edge_media_to_caption.edges[0] ? obj.edge_media_to_caption.edges[0].node.text : '' let response = { media_id: obj.id, shortcode: obj.shortcode, text: text, comment_count: obj.edge_media_to_parent_comment && obj.edge_media_to_parent_comment.count ? obj.edge_media_to_parent_comment.count : 0, comments: obj.edge_media_to_parent_comment && obj.edge_media_to_parent_comment.edges ? obj.edge_media_to_parent_comment.edges : [], like_count: obj.edge_media_preview_like.count, display_url: obj.display_url, owner_id: obj.owner.id, date: obj.taken_at_timestamp, media_preview: obj.media_preview, display_resources: obj.display_resources, accessibility_caption: obj.accessibility_caption, } if (obj.__typename === 'GraphImage') { // there's only one image, it's loaded response.type = 'image' } else if (obj.__typename === 'GraphSidecar') { // go grab the sidecar images response.type = 'sidecar' response.gallery = obj.edge_sidecar_to_children.edges.map((post) => { let res try { res = exports.getPostData(post, 'GraphImage') } catch (err) { res = { code: 0, error: err } } return res }) } else if (obj.__typename === 'GraphVideo') { response = exports.getPostData(obj, obj.__typename) response.type = 'video' } return response } exports.getTag = async function (tag) { if (!tag) return (new Error('Argument "tag" must be specified')) let options = { headers: { 'User-Agent': 'Mozilla/5.0 (iPhone CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4' } } let result = await fetch(listURL + tag + '/?__a=1', options) let res = await result.json() let obj = res.graphql.hashtag let hashtag = { name: obj.name, } let response = { hashtag, post: obj.edge_hashtag_to_media.edges, } return response } exports.getPostData = function (postData, type) { let response if (type === 'GraphImage') { response = { media_id: postData.node.id, shortcode: postData.node.shortcode, text: postData.node.edge_media_to_caption && postData.node.edge_media_to_caption.edges[0] && postData.node.edge_media_to_caption.edges[0].node.text, comment_count: postData.node.edge_media_to_comment && postData.node.edge_media_to_comment.count ? postData.node.edge_media_to_comment.count : 0, like_count: postData.node.edge_liked_by && postData.node.edge_liked_by.count ? postData.node.edge_liked_by.count : 0, display_url: postData.node.display_url, date: postData.node.taken_at_timestamp, thumbnail: postData.node.thumbnail_src, thumbnail_resource: postData.node.thumbnail_resources } } else { response = { media_id: postData.id, shortcode: postData.shortcode, text: postData.edge_media_to_caption && postData.edge_media_to_caption.edges[0] && postData.edge_media_to_caption.edges[0].text, comment_count: postData.edge_media_to_comment && postData.edge_media_to_comment.count ? postData.edge_media_to_comment.count : 0, like_count: postData.edge_liked_by && postData.edge_liked_by.count ? postData.edge_liked_by.count : 0, display_url: postData.display_url, date: postData.taken_at_timestamp, thumbnail: postData.thumbnail_src, thumbnail_resource: postData.thumbnail_resources } response.video_view_count = postData.video_view_count response.video_url = postData.video_url } return response }