UNPKG

instamancer

Version:

Scrape the Instagram API with Puppeteer

196 lines (195 loc) 5.2 kB
import { Type } from "io-ts"; import { Page } from "puppeteer"; import * as winston from "winston"; import { IOptions } from "./api"; /** * Instagram API wrapper */ export declare class Instagram<PostType> { /** * Apply defaults to undefined options */ private static defaultOptions; id: string; url: string; started: boolean; paused: boolean; finished: boolean; finishedReason: FinishedReasons; catchURL: string; postURL: string; defaultPostURL: string; jumpMod: number; jumpSize: number; page: Page; logger: winston.Logger; defaultPageFunctions: (() => void)[]; private readonly strict; private readonly validator; private browser; private browserDisconnected; private readonly browserInstance?; private readonly headless; private postBuffer; private postBufferLock; private requestBuffer; private requestBufferLock; private responseBuffer; private responseBufferLock; private readonly fullAPI; private pagePromises; private readonly enableGrafting; private readonly sameBrowser; private graft; private graftURL; private graftHeaders; private foundGraft; private hibernate; private readonly hibernationTime; private failedJumps; private responseFromAPI; private readonly pageQuery; private readonly edgeQuery; private postIds; private readonly total; private index; private jumps; private readonly maxPageUrlAttempts; private pageUrlAttempts; private postPageRetries; private readonly silent; private writeLock; private sleepRemaining; private readonly sleepTime; private readonly proxyURL; private readonly executablePath; private pluginFunctions; /** * Create API wrapper instance * @param endpoint the url for the type of resource to scrape * @param id the identifier for the resource * @param pageQuery the query to identify future pages in the nested API structure * @param edgeQuery the query to identify posts in the nested API structure * @param options configuration details * @param validator response type validator */ constructor(endpoint: string, id: string, pageQuery: string, edgeQuery: string, options: IOptions, validator: Type<unknown>); /** * Toggle pausing data collection */ pause(): void; /** * Toggle prolonged pausing */ toggleHibernation(): void; /** * Force the API to stop */ forceStop(force?: boolean): Promise<void>; /** * Generator of posts on page */ generator(): AsyncIterableIterator<PostType>; /** * Construct page and add listeners */ start(): Promise<void>; /** * Match the url to the url used in API requests */ matchURL(url: string): boolean; /** * Close the page and browser */ protected stop(): Promise<void>; /** * Finish retrieving data for the generator */ protected finish(reason: FinishedReasons): void; /** * Process the requests in the request buffer */ protected processRequests(): Promise<void>; /** * Process the responses in the response buffer */ protected processResponses(): Promise<void>; protected processResponseData(data: unknown): Promise<void>; /** * Open a post in a new page, then extract its metadata */ protected postPage(post: string, retries: number): Promise<void>; private handlePostPageError; protected validatePost(post: PostType): Promise<void>; /** * Stimulate the page until responses gathered */ protected getNext(): Promise<void>; /** * Halt execution * @param time Seconds */ protected sleep(time: number): Promise<void>; /** * Create the browser and page, then visit the url */ private constructPage; /*** * Handle errors that occur during page construction */ private handleConstructionError; /** * Pause and wait until resumed */ private waitResume; /** * Pop a post off the postBuffer (using locks). Returns null if no posts in buffer */ private postPop; /** * Print progress to stderr */ private progress; /** * Add request to the request buffer */ private interceptRequest; /** * Add the response to the response buffer */ private interceptResponse; /** * Log failed requests */ private interceptFailure; /** * Add post to buffer */ private addToPostBuffer; /** * Manipulate the page to stimulate a request */ private jump; /** * Clear request and response buffers */ private initiateGraft; /** * Read the posts that are pre-loaded on the page */ private scrapeDefaultPosts; private addPlugins; private executePlugins; } /** * Reasons why the collection finished */ declare enum FinishedReasons { FORCED_STOP = 0, API_FINISHED = 1, TOTAL_REACHED_API = 2, TOTAL_REACHED_PAGE = 3, NO_RESPONSE = 4, NO_INCREMENT = 5 } export {};