UNPKG

hammer-scrape

Version:

Unifies Cheerio and Puppeteer for the most streamline scraping experience

175 lines 5.69 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const engine_mode_1 = require("./engine_mode"); const engine_errors_1 = require("./engine_errors"); /** * A standardized abstract class that represents what we want in our web scraping engines. */ class WebScrapingEngine { /** * Construct our basic implementation of our engine * @param engineType The type of engine we are aiming for * @param engineCoreType The core type that we want to utilize */ constructor(engineType, engineCoreType) { this.engineType = engineType; this.engineCore = engineCoreType; this.engineMode = engine_mode_1.default.Off; this.parsingCore = null; this.manipulationCore = null; } // any method that we want to relay information back should go here /** * Gets the engine mode that represents */ getEngineMode() { return this.engineMode; } /** * Gets the engine type that best represents this engine */ getEngineType() { return this.engineType; } /** * Gets the core types associated with this engine */ getEngineCoreType() { return this.engineCore; } /** * Determine if the engine is currently running or not. */ isRunning() { return this.engineMode != engine_mode_1.default.Off; } /** * Decides what mode the engine should be in. * The engine should only be in one mode at a time. Either Parsing or manipulating * @param newMode The new mode that we want to lock the engine into */ setEngineMode(newMode) { this.engineMode = newMode; } /** * Determines if the engine is in the current mode * @param targetMode The mode we are expecting to be in */ isCorrectEngineMode(targetMode) { return this.engineMode === targetMode; } /** * Start the engine up and get ready to process what comes in */ startup() { return new Promise(async (resolve) => { if (this.isCorrectEngineMode(engine_mode_1.default.Off)) { // switch to loading mode and then load anything we have to load in this.setEngineMode(engine_mode_1.default.Loading); await this.load(); // switch to idling mode and then we should be off to the races this.setEngineMode(engine_mode_1.default.Idling); resolve(); } else { // if we are already started up just simply resolve. No need to complain resolve(); } }); } /** * Get the parsing core that we have decided to utilize */ getParsingCore() { return this.parsingCore; } /** * Get the manipulation core that we have decided to utilize */ getManipulationCore() { return this.manipulationCore; } /** * Safely enters manipulation mode * @param callback Let's you safely manipulate the page this engine is processing */ manipulate(callback) { if (this.isCorrectEngineMode(engine_mode_1.default.Idling)) { return new Promise((resolve) => { this.setEngineMode(engine_mode_1.default.Manipulating); callback(this.getManipulationCore()).then(() => { this.setEngineMode(engine_mode_1.default.Idling); resolve(); }); }); } else if (this.manipulationCore === null) { throw new engine_errors_1.EngineModeError('There is no manipulation core defined or created'); } else { throw new engine_errors_1.EngineCannotSwitchModeError(); } } /** * Provides a mechanism to safely parse the core contents * @param callback This is where you will be able to safely parse the contents */ parse(callback) { if (this.isCorrectEngineMode(engine_mode_1.default.Idling)) { return new Promise((resolve) => { this.setEngineMode(engine_mode_1.default.Parsing); callback(this.getParsingCore()).then(() => { this.setEngineMode(engine_mode_1.default.Idling); resolve(); }); }); } else if (this.parsingCore === null) { throw new engine_errors_1.EngineModeError('There is no parsing core defined or created'); } else { throw new engine_errors_1.EngineCannotSwitchModeError(); } } } exports.WebScrapingEngine = WebScrapingEngine; /** * Defines the methods that should be implemented in a core that handles manipulation */ class ManipulationCore { constructor(url) { this.core = null; this.url = url; } getUrl() { return this.url; } } exports.ManipulationCore = ManipulationCore; /** * Defines the methods that should be implemented in an engine that supports parsing the page */ class ParsingCore { constructor(url) { this.core = null; this.url = url; } /** * Get the url that was passed at the time of creation */ getUrl() { return this.url; } /** * Wait for a timeout operation, return a promise that completes when the timeout is done * @param timeoutInterval */ waitForTimeout(timeoutInterval) { return new Promise((resolve) => { setTimeout(resolve, timeoutInterval); }); } } exports.ParsingCore = ParsingCore; exports.default = WebScrapingEngine; //# sourceMappingURL=web_scraping_engine.js.map