hammer-scrape
Version:
Unifies Cheerio and Puppeteer for the most streamline scraping experience
125 lines • 4.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const request_group_puppeteer_1 = require("request-group-puppeteer");
const web_scraping_engine_1 = require("../web_scraping_engine");
const core_errors_1 = require("../core_errors");
/**
* Default configuration options for this manipulation core
*/
exports.PUPPETEER_MANIPULATING_CORE_DEFAULT = {
sharedRequest: null,
};
class PuppeteerManipulatingCore extends web_scraping_engine_1.ManipulationCore {
constructor(url, sharedManager) {
super(url);
this.request = null;
this.initialized = false;
this.sharingManager = sharedManager !== undefined;
this.manager =
sharedManager !== undefined
? sharedManager
: new request_group_puppeteer_1.PuppeteerManager({
width: 1920,
height: 1080,
});
}
isInitialized() {
return this.initialized;
}
getRequest() {
if (this.request !== null) {
return this.request;
}
else {
throw new core_errors_1.CoreRequestNotCreatedError();
}
}
initialize(data) {
return new Promise(async (resolve) => {
if (!this.isInitialized()) {
if (data.sharedRequest !== null) {
this.request = data.sharedRequest;
this.manager = data.sharedRequest.getManager();
}
// as of request-group-puppeteer version 1.2.0 there is no need to worry about reinitializing the puppeteer instance
await this.manager.initialize();
if (data.sharedRequest === null) {
this.request = new request_group_puppeteer_1.PuppeteerRequest(this.getUrl(), this.manager);
await this.request.run();
}
this.initialized = true;
}
resolve();
});
}
select(querySelector, value) {
if (this.isInitialized()) {
return new Promise(async (resolve) => {
await this.request.getPage().select(querySelector, value);
resolve();
});
}
else {
throw new core_errors_1.CoreNotInitializedError();
}
}
click(querySelector) {
if (this.isInitialized()) {
return new Promise(async (resolve) => {
await this.request.getPage().click(querySelector);
resolve();
});
}
else {
throw new core_errors_1.CoreNotInitializedError();
}
}
type(querySelector, value) {
if (this.isInitialized()) {
return new Promise(async (resolve) => {
await this.request.getPage().type(querySelector, value);
resolve();
});
}
else {
throw new core_errors_1.CoreNotInitializedError();
}
}
raw() {
if (this.isInitialized()) {
return this.request.getPage();
}
else {
throw new core_errors_1.CoreNotInitializedError();
}
}
dispose() {
return new Promise(async (resolve) => {
if (this.isInitialized()) {
if (this.request !== null) {
await this.request.dispose();
this.request = null;
}
if (this.manager !== null && !this.sharingManager) {
await this.manager.dispose();
}
this.initialized = false;
}
resolve();
});
}
getDocumentHtml() {
if (this.isInitialized()) {
return new Promise(async (resolve) => {
let html = await this.raw().evaluate(() => document.body.outerHTML);
resolve(html);
});
}
else {
throw new core_errors_1.CoreNotInitializedError();
}
}
}
exports.PuppeteerManipulatingCore = PuppeteerManipulatingCore;
exports.default = PuppeteerManipulatingCore;
//# sourceMappingURL=puppeteer_manipulate.js.map