UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

331 lines • 11.7 kB
"use strict"; var _ErrorTracker_options; Object.defineProperty(exports, "__esModule", { value: true }); exports.ErrorTracker = void 0; const tslib_1 = require("tslib"); const node_util_1 = require("node:util"); const error_snapshotter_1 = require("./error_snapshotter"); const extractPathFromStackTraceLine = (line) => { const lastStartingRoundBracketIndex = line.lastIndexOf('('); if (lastStartingRoundBracketIndex !== -1) { const closingRoundBracketIndex = line.indexOf(')', lastStartingRoundBracketIndex); if (closingRoundBracketIndex !== -1) { return line.slice(lastStartingRoundBracketIndex + 1, closingRoundBracketIndex); } } return line; }; // https://v8.dev/docs/stack-trace-api#appendix%3A-stack-trace-format const getPathFromStackTrace = (stack) => { for (const line of stack) { const path = extractPathFromStackTraceLine(line); if (path.startsWith('node:') || path.includes('/node_modules/') || path.includes('\\node_modules\\')) { continue; } return path; } return extractPathFromStackTraceLine(stack[0]); }; const getStackTraceGroup = (error, storage, showFullStack) => { const stack = error.stack?.split('\n').map((line) => line.trim()); let sliceAt = -1; if (stack) { for (let i = 0; i < stack.length; i++) { if (stack[i].startsWith('at ') || stack[i].startsWith('eval at ')) { sliceAt = i; break; } } } let normalizedStackTrace = null; if (sliceAt !== -1) { normalizedStackTrace = showFullStack ? stack .slice(sliceAt) .map((x) => x.trim()) .join('\n') : getPathFromStackTrace(stack.slice(sliceAt)); } if (!normalizedStackTrace) { normalizedStackTrace = 'missing stack trace'; } if (!(normalizedStackTrace in storage)) { storage[normalizedStackTrace] = Object.create(null); } return storage[normalizedStackTrace]; }; const getErrorCodeGroup = (error, storage) => { let { code } = error; if (code === undefined) { code = 'missing error code'; } if (!(code in storage)) { storage[code] = Object.create(null); } return storage[String(code)]; }; const getErrorNameGroup = (error, storage) => { const { name } = error; if (!(name in storage)) { storage[name] = Object.create(null); } return storage[name]; }; const findBiggestWordIntersection = (a, b) => { let maxStreak = 0; let bStreakIndex = -1; let aStreakIndex = -1; for (let aIndex = 0; aIndex < a.length; aIndex++) { let bIndex = -1; do { let aWalkIndex = aIndex; bIndex = b.indexOf(a[aIndex], bIndex + 1); let bWalkIndex = bIndex; let streak = 0; while (aWalkIndex < a.length && bWalkIndex < b.length && b[bWalkIndex++] === a[aWalkIndex++]) { streak++; } if (streak > maxStreak) { maxStreak = streak; aStreakIndex = aIndex; bStreakIndex = bIndex; } } while (bIndex !== -1); } return { maxStreak, aStreakIndex, bStreakIndex, }; }; const arrayCount = (array, target) => { let result = 0; for (const item of array) { if (item === target) { result++; } } return result; }; const calculatePlaceholder = (a, b) => { const { maxStreak, aStreakIndex, bStreakIndex } = findBiggestWordIntersection(a, b); if (maxStreak === 0) { return ['_']; } const leftA = a.slice(0, aStreakIndex); const leftB = b.slice(0, bStreakIndex); const rightA = a.slice(aStreakIndex + maxStreak); const rightB = b.slice(bStreakIndex + maxStreak); const output = []; if (leftA.length !== 0 || leftB.length !== 0) { output.push(...calculatePlaceholder(leftA, leftB)); } output.push(...a.slice(aStreakIndex, aStreakIndex + maxStreak)); if (rightA.length !== 0 || rightB.length !== 0) { output.push(...calculatePlaceholder(rightA, rightB)); } return output; }; const normalizedCalculatePlaceholder = (a, b) => { const output = calculatePlaceholder(a, b); // We can't be too general if (arrayCount(output, '_') / output.length >= 0.5) { return ['_']; } return output; }; // Merge A (missing placeholders) into B (can contain placeholders but does not have to) const mergeMessages = (a, b, storage) => { const placeholder = normalizedCalculatePlaceholder(a.split(' '), b.split(' ')).join(' '); if (placeholder === '_') { return undefined; } const count = storage[a].count + storage[b].count; delete storage[a]; delete storage[b]; storage[placeholder] = Object.assign(Object.create(null), { count, }); return placeholder; }; const getErrorMessageGroup = (error, storage, showFullMessage) => { let { message } = error; if (!message) { try { message = typeof error === 'string' ? error : `Unknown error message. Received non-error object: ${JSON.stringify(error)}`; } catch { message = `Unknown error message. Received non-error object, and could not stringify it: ${(0, node_util_1.inspect)(error, { depth: 0, })}`; } } if (!showFullMessage) { const newLineIndex = message.indexOf('\n'); message = message.slice(0, newLineIndex === -1 ? undefined : newLineIndex); } if (!(message in storage)) { storage[message] = Object.assign(Object.create(null), { count: 0, }); // This actually safe, since we Object.create(null) so no prototype pollution can happen. // eslint-disable-next-line no-restricted-syntax, guard-for-in for (const existingMessage in storage) { const newMessage = mergeMessages(message, existingMessage, storage); if (newMessage) { message = newMessage; break; } } } return storage[message]; }; const increaseCount = (group) => { if (!('count' in group)) { // In case users don't want to display error message group.count = 0; } group.count++; }; /** * This class tracks errors and computes a summary of information like: * - where the errors happened * - what the error names are * - what the error codes are * - what is the general error message * * This is extremely useful when there are dynamic error messages, such as argument validation. * * Since the structure of the `tracker.result` object differs when using different options, * it's typed as `Record<string, unknown>`. The most deep object has a `count` property, which is a number. * * It's possible to get the total amount of errors via the `tracker.total` property. */ class ErrorTracker { constructor(options = {}) { _ErrorTracker_options.set(this, void 0); Object.defineProperty(this, "result", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "total", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "errorSnapshotter", { enumerable: true, configurable: true, writable: true, value: void 0 }); tslib_1.__classPrivateFieldSet(this, _ErrorTracker_options, { showErrorCode: true, showErrorName: true, showStackTrace: true, showFullStack: false, showErrorMessage: true, showFullMessage: false, saveErrorSnapshots: false, ...options, }, "f"); if (tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").saveErrorSnapshots) { this.errorSnapshotter = new error_snapshotter_1.ErrorSnapshotter(); } this.result = Object.create(null); this.total = 0; } updateGroup(error) { let group = this.result; if (tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showStackTrace) { group = getStackTraceGroup(error, group, tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showFullStack); } if (tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showErrorCode) { group = getErrorCodeGroup(error, group); } if (tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showErrorName) { group = getErrorNameGroup(error, group); } if (tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showErrorMessage) { group = getErrorMessageGroup(error, group, tslib_1.__classPrivateFieldGet(this, _ErrorTracker_options, "f").showFullMessage); } increaseCount(group); return group; } add(error) { this.total++; this.updateGroup(error); if (typeof error.cause === 'object' && error.cause !== null) { this.add(error.cause); } } /** * This method is async, because it captures a snapshot of the error context. * We added this new method to avoid breaking changes. */ async addAsync(error, context) { this.total++; const group = this.updateGroup(error); // Capture a snapshot (screenshot and HTML) on the first occurrence of an error if (group.count === 1 && context) { await this.captureSnapshot(group, error, context).catch(() => { }); } if (typeof error.cause === 'object' && error.cause !== null) { await this.addAsync(error.cause); } } getUniqueErrorCount() { let count = 0; const goDeeper = (group) => { if ('count' in group) { count++; return; } // eslint-disable-next-line guard-for-in, no-restricted-syntax for (const key in group) { goDeeper(group[key]); } }; goDeeper(this.result); return count; } getMostPopularErrors(count) { const result = []; const goDeeper = (group, path) => { if ('count' in group) { result.push([group.count, path]); return; } // eslint-disable-next-line guard-for-in, no-restricted-syntax for (const key in group) { goDeeper(group[key], [...path, key]); } }; goDeeper(this.result, []); return result.sort((a, b) => b[0] - a[0]).slice(0, count); } async captureSnapshot(storage, error, context) { if (!this.errorSnapshotter) { return; } const { screenshotFileUrl, htmlFileUrl } = await this.errorSnapshotter.captureSnapshot(error, context); storage.firstErrorScreenshotUrl = screenshotFileUrl; storage.firstErrorHtmlUrl = htmlFileUrl; } reset() { // This actually safe, since we Object.create(null) so no prototype pollution can happen. // eslint-disable-next-line no-restricted-syntax, guard-for-in for (const key in this.result) { delete this.result[key]; } } } exports.ErrorTracker = ErrorTracker; _ErrorTracker_options = new WeakMap(); //# sourceMappingURL=error_tracker.js.map