instamancer
Version:
Scrape the Instagram API with Puppeteer
196 lines (195 loc) • 5.2 kB
TypeScript
import { Type } from "io-ts";
import { Page } from "puppeteer";
import * as winston from "winston";
import { IOptions } from "./api";
/**
* Instagram API wrapper
*/
export declare class Instagram<PostType> {
/**
* Apply defaults to undefined options
*/
private static defaultOptions;
id: string;
url: string;
started: boolean;
paused: boolean;
finished: boolean;
finishedReason: FinishedReasons;
catchURL: string;
postURL: string;
defaultPostURL: string;
jumpMod: number;
jumpSize: number;
page: Page;
logger: winston.Logger;
defaultPageFunctions: (() => void)[];
private readonly strict;
private readonly validator;
private browser;
private browserDisconnected;
private readonly browserInstance?;
private readonly headless;
private postBuffer;
private postBufferLock;
private requestBuffer;
private requestBufferLock;
private responseBuffer;
private responseBufferLock;
private readonly fullAPI;
private pagePromises;
private readonly enableGrafting;
private readonly sameBrowser;
private graft;
private graftURL;
private graftHeaders;
private foundGraft;
private hibernate;
private readonly hibernationTime;
private failedJumps;
private responseFromAPI;
private readonly pageQuery;
private readonly edgeQuery;
private postIds;
private readonly total;
private index;
private jumps;
private readonly maxPageUrlAttempts;
private pageUrlAttempts;
private postPageRetries;
private readonly silent;
private writeLock;
private sleepRemaining;
private readonly sleepTime;
private readonly proxyURL;
private readonly executablePath;
private pluginFunctions;
/**
* Create API wrapper instance
* @param endpoint the url for the type of resource to scrape
* @param id the identifier for the resource
* @param pageQuery the query to identify future pages in the nested API structure
* @param edgeQuery the query to identify posts in the nested API structure
* @param options configuration details
* @param validator response type validator
*/
constructor(endpoint: string, id: string, pageQuery: string, edgeQuery: string, options: IOptions, validator: Type<unknown>);
/**
* Toggle pausing data collection
*/
pause(): void;
/**
* Toggle prolonged pausing
*/
toggleHibernation(): void;
/**
* Force the API to stop
*/
forceStop(force?: boolean): Promise<void>;
/**
* Generator of posts on page
*/
generator(): AsyncIterableIterator<PostType>;
/**
* Construct page and add listeners
*/
start(): Promise<void>;
/**
* Match the url to the url used in API requests
*/
matchURL(url: string): boolean;
/**
* Close the page and browser
*/
protected stop(): Promise<void>;
/**
* Finish retrieving data for the generator
*/
protected finish(reason: FinishedReasons): void;
/**
* Process the requests in the request buffer
*/
protected processRequests(): Promise<void>;
/**
* Process the responses in the response buffer
*/
protected processResponses(): Promise<void>;
protected processResponseData(data: unknown): Promise<void>;
/**
* Open a post in a new page, then extract its metadata
*/
protected postPage(post: string, retries: number): Promise<void>;
private handlePostPageError;
protected validatePost(post: PostType): Promise<void>;
/**
* Stimulate the page until responses gathered
*/
protected getNext(): Promise<void>;
/**
* Halt execution
* @param time Seconds
*/
protected sleep(time: number): Promise<void>;
/**
* Create the browser and page, then visit the url
*/
private constructPage;
/***
* Handle errors that occur during page construction
*/
private handleConstructionError;
/**
* Pause and wait until resumed
*/
private waitResume;
/**
* Pop a post off the postBuffer (using locks). Returns null if no posts in buffer
*/
private postPop;
/**
* Print progress to stderr
*/
private progress;
/**
* Add request to the request buffer
*/
private interceptRequest;
/**
* Add the response to the response buffer
*/
private interceptResponse;
/**
* Log failed requests
*/
private interceptFailure;
/**
* Add post to buffer
*/
private addToPostBuffer;
/**
* Manipulate the page to stimulate a request
*/
private jump;
/**
* Clear request and response buffers
*/
private initiateGraft;
/**
* Read the posts that are pre-loaded on the page
*/
private scrapeDefaultPosts;
private addPlugins;
private executePlugins;
}
/**
* Reasons why the collection finished
*/
declare enum FinishedReasons {
FORCED_STOP = 0,
API_FINISHED = 1,
TOTAL_REACHED_API = 2,
TOTAL_REACHED_PAGE = 3,
NO_RESPONSE = 4,
NO_INCREMENT = 5
}
export {};