@skypilot/scraper
Version:
Node-based scriptable web scraper
78 lines (77 loc) • 3.4 kB
TypeScript
import type { Integer } from '@skypilot/common-types';
import { DirectoryLike } from '@skypilot/sugarbowl';
import type { Browser, BrowserContext, ElementHandle, Page } from 'playwright';
import type { Dict, Href, Html } from 'src/_types/pseudotypes';
import type { BrowserOptions, ClickOptions, NavOptions, Query, QueryDict, QueryOptions, QueryResult, RunOnOptions, RunOptions, ScraperCommand, ScraperContext, StateOptions } from 'src/_types/scraper';
import type { ScriptBuilder } from 'src/scraper/ScriptBuilder';
import { Logger } from 'src/utils/Logger';
declare type Database = {
add: any;
get: any;
native: any;
update: any;
};
interface PlaywrightScraperOptions {
database?: Database;
logDir?: DirectoryLike;
logFileName?: string;
verbose?: boolean;
}
export declare class PlaywrightScraper {
browser: Browser | null;
browserOptions: BrowserOptions;
context: BrowserContext | null;
readonly logger: Logger;
readonly database: Database | null;
pages: Page[];
runLevel: number;
private readonly state;
private readonly variableDelayInMs;
constructor(options?: PlaywrightScraperOptions & BrowserOptions);
get currentPage(): Page | undefined;
addToLog(message: string | Record<string, any>, options?: {
prefix?: string;
}): void;
clearState(): void;
click(query: Query, options?: ClickOptions): Promise<void>;
close(): Promise<void>;
closeAllPages(): Promise<void>;
closeLatestPage(): Promise<void>;
count(query: Query): Promise<Integer>;
follow(query: Query, navOptions?: NavOptions): Promise<ScraperContext>;
get(query: Query & {
scope: 'all';
}): Promise<Array<string | null>>;
get(query: Query): Promise<string | null>;
getContext(): Promise<ScraperContext>;
getPageContent(): Promise<Html | null>;
getState(): Dict;
getAttribute(query: Query): Promise<string | null>;
getAttributeAll(query: Query): Promise<(string | null)[]>;
getOuterHtml(query: Query): Promise<string | null>;
getOuterHtmlAll(query: Query): Promise<Array<string | null>>;
getTextContent(query: Query): Promise<string | null>;
getTextContentAll(query: Query): Promise<string[]>;
getTextValue(query: Query): Promise<string | null>;
getTextValueAll(query: Query): Promise<Array<string | null>>;
goTo(url: Href, options?: NavOptions): Promise<ScraperContext>;
has(query: Query): Promise<boolean>;
isConnected(): boolean;
launch(): Promise<{
version: string;
}>;
newPage(): Promise<Page>;
query<Q extends QueryDict>(queryDict: Q, options: QueryOptions & {
collectionName: string;
}): Promise<{
[key: string]: QueryResult<Q>;
}>;
query<Q extends QueryDict>(queryDict: Q, options?: QueryOptions): Promise<Record<keyof Q, string | string[]>>;
run<Result extends Dict = Dict>(commandsOrBuilder: ReadonlyArray<ScraperCommand> | ScriptBuilder, options?: RunOptions): Promise<Result>;
runOnAll<Result extends Dict = Dict>(query: Query, commands: ReadonlyArray<ScraperCommand>, options?: RunOnOptions): Promise<Result[]>;
select(query: Query): Promise<ElementHandle | null>;
selectAll(query: Query): Promise<ElementHandle[]>;
updateState(data: Dict, options?: StateOptions): Dict;
write(collectionName: string, options?: NavOptions): Promise<any>;
}
export {};