UNPKG

@skypilot/scraper

Version:
67 lines (55 loc) 2.94 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.createScraperWithDb = createScraperWithDb; var _sugarbowl = require("@skypilot/sugarbowl"); var _LowDb = require("../clients/lowdb/LowDb"); var _PlaywrightScraper = require("../clients/playwright/PlaywrightScraper"); var _readConfigs = require("./readConfigs"); function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } const defaultDbDir = (0, _readConfigs.readConfigs)('databases.directory', { ignoreEmpty: true, defaultValue: 'scraped-data' }); const defaultLogDir = (0, _readConfigs.readConfigs)('logs.directory', { ignoreEmpty: true, defaultValue: defaultDbDir || 'scraped-data' }); function createScraperWithDb(params = {}, browserOptions = {}) { const { dateTimeFormat = 'slug', siteName = 'demo' } = params; const { collectionName = 'records', dbDir = [defaultDbDir, siteName], logDir = [defaultLogDir, siteName], dbFileName = `${siteName}.db.json`, logFileName = dateTimeFormat ? (0, _sugarbowl.composeFileName)([siteName, (0, _sugarbowl.makeDateTimeStamp)(dateTimeFormat), '.log']) : `${siteName}.log`, verbose, wipeData } = params; const dbDirHandle = new _sugarbowl.Directory(dbDir).makeSync(); const dbFilePath = dbDirHandle.join(dbFileName); (0, _sugarbowl.consoleIf)(verbose)(`Database:\n ${dbFilePath}`); if (wipeData) { (0, _sugarbowl.deleteFileSync)(dbFilePath); } const database = new _LowDb.LowDb(dbFilePath, { defaultData: { [collectionName]: [] } }); const logDirHandle = new _sugarbowl.Directory(logDir); const logFilePath = logFileName ? logDirHandle.join(logFileName) : ''; (0, _sugarbowl.consoleIf)(verbose)(`Log file:\n ${logFilePath || 'none'}`); return new _PlaywrightScraper.PlaywrightScraper(_objectSpread({ database, logDir, logFileName, verbose }, browserOptions)); }