hammer-scrape
Version:
Unifies Cheerio and Puppeteer for the most streamline scraping experience
175 lines • 5.69 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const engine_mode_1 = require("./engine_mode");
const engine_errors_1 = require("./engine_errors");
/**
* A standardized abstract class that represents what we want in our web scraping engines.
*/
class WebScrapingEngine {
/**
* Construct our basic implementation of our engine
* @param engineType The type of engine we are aiming for
* @param engineCoreType The core type that we want to utilize
*/
constructor(engineType, engineCoreType) {
this.engineType = engineType;
this.engineCore = engineCoreType;
this.engineMode = engine_mode_1.default.Off;
this.parsingCore = null;
this.manipulationCore = null;
}
// any method that we want to relay information back should go here
/**
* Gets the engine mode that represents
*/
getEngineMode() {
return this.engineMode;
}
/**
* Gets the engine type that best represents this engine
*/
getEngineType() {
return this.engineType;
}
/**
* Gets the core types associated with this engine
*/
getEngineCoreType() {
return this.engineCore;
}
/**
* Determine if the engine is currently running or not.
*/
isRunning() {
return this.engineMode != engine_mode_1.default.Off;
}
/**
* Decides what mode the engine should be in.
* The engine should only be in one mode at a time. Either Parsing or manipulating
* @param newMode The new mode that we want to lock the engine into
*/
setEngineMode(newMode) {
this.engineMode = newMode;
}
/**
* Determines if the engine is in the current mode
* @param targetMode The mode we are expecting to be in
*/
isCorrectEngineMode(targetMode) {
return this.engineMode === targetMode;
}
/**
* Start the engine up and get ready to process what comes in
*/
startup() {
return new Promise(async (resolve) => {
if (this.isCorrectEngineMode(engine_mode_1.default.Off)) {
// switch to loading mode and then load anything we have to load in
this.setEngineMode(engine_mode_1.default.Loading);
await this.load();
// switch to idling mode and then we should be off to the races
this.setEngineMode(engine_mode_1.default.Idling);
resolve();
}
else {
// if we are already started up just simply resolve. No need to complain
resolve();
}
});
}
/**
* Get the parsing core that we have decided to utilize
*/
getParsingCore() {
return this.parsingCore;
}
/**
* Get the manipulation core that we have decided to utilize
*/
getManipulationCore() {
return this.manipulationCore;
}
/**
* Safely enters manipulation mode
* @param callback Let's you safely manipulate the page this engine is processing
*/
manipulate(callback) {
if (this.isCorrectEngineMode(engine_mode_1.default.Idling)) {
return new Promise((resolve) => {
this.setEngineMode(engine_mode_1.default.Manipulating);
callback(this.getManipulationCore()).then(() => {
this.setEngineMode(engine_mode_1.default.Idling);
resolve();
});
});
}
else if (this.manipulationCore === null) {
throw new engine_errors_1.EngineModeError('There is no manipulation core defined or created');
}
else {
throw new engine_errors_1.EngineCannotSwitchModeError();
}
}
/**
* Provides a mechanism to safely parse the core contents
* @param callback This is where you will be able to safely parse the contents
*/
parse(callback) {
if (this.isCorrectEngineMode(engine_mode_1.default.Idling)) {
return new Promise((resolve) => {
this.setEngineMode(engine_mode_1.default.Parsing);
callback(this.getParsingCore()).then(() => {
this.setEngineMode(engine_mode_1.default.Idling);
resolve();
});
});
}
else if (this.parsingCore === null) {
throw new engine_errors_1.EngineModeError('There is no parsing core defined or created');
}
else {
throw new engine_errors_1.EngineCannotSwitchModeError();
}
}
}
exports.WebScrapingEngine = WebScrapingEngine;
/**
* Defines the methods that should be implemented in a core that handles manipulation
*/
class ManipulationCore {
constructor(url) {
this.core = null;
this.url = url;
}
getUrl() {
return this.url;
}
}
exports.ManipulationCore = ManipulationCore;
/**
* Defines the methods that should be implemented in an engine that supports parsing the page
*/
class ParsingCore {
constructor(url) {
this.core = null;
this.url = url;
}
/**
* Get the url that was passed at the time of creation
*/
getUrl() {
return this.url;
}
/**
* Wait for a timeout operation, return a promise that completes when the timeout is done
* @param timeoutInterval
*/
waitForTimeout(timeoutInterval) {
return new Promise((resolve) => {
setTimeout(resolve, timeoutInterval);
});
}
}
exports.ParsingCore = ParsingCore;
exports.default = WebScrapingEngine;
//# sourceMappingURL=web_scraping_engine.js.map