UNPKG

xenforo-dl

Version:
433 lines 24.5 kB
var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (receiver, state, kind, f) { if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter"); if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it"); return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver); }; var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (receiver, state, value, kind, f) { if (kind === "m") throw new TypeError("Private method is not writable"); if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter"); if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot write private member to an object whose class did not declare it"); return (kind === "a" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value; }; var _XenForoDownloader_instances, _XenForoDownloader_fetcher, _XenForoDownloader_updateStatsOnError, _XenForoDownloader_process, _XenForoDownloader_downloadThread, _XenForoDownloader_downloadForum, _XenForoDownloader_downloadGeneric, _XenForoDownloader_fetchPage, _XenForoDownloader_isErrorNonContinuable, _XenForoDownloader_getThreadSavePath, _XenForoDownloader_downloadMessageAttachment, _XenForoDownloader_getMessageAttachmentFilename, _XenForoDownloader_createMessageFile, _XenForoDownloader_saveMessage, _XenForoDownloader_getDownloadStatusFilePath, _XenForoDownloader_checkPreviousDownload, _XenForoDownloader_saveDownloadStatus; import deepFreeze from 'deep-freeze'; import { getDownloaderConfig } from './DownloaderOptions.js'; import Fetcher, { FetcherError } from './utils/Fetcher.js'; import { commonLog } from './utils/logging/Logger.js'; import URLHelper from './utils/URLHelper.js'; import Bottleneck from 'bottleneck'; import { AbortError } from 'node-fetch'; import path from 'path'; import sanitizeFilename from 'sanitize-filename'; import { existsSync } from 'fs'; import fse from 'fs-extra'; import Parser from './parsers/Parser.js'; import MessageTemplate from './templates/Message.js'; import ThreadHeaderTemplate from './templates/Thread.js'; export default class XenForoDownloader { constructor(url, options) { _XenForoDownloader_instances.add(this); this.name = 'XenForoDownloader'; _XenForoDownloader_fetcher.set(this, void 0); this.config = deepFreeze({ ...getDownloaderConfig(url, options) }); this.pageFetchLimiter = new Bottleneck({ maxConcurrent: 1, minTime: this.config.request.minTime.page }); this.attachmentDownloadLimiter = new Bottleneck({ maxConcurrent: this.config.request.maxConcurrent, minTime: this.config.request.minTime.attachment }); this.logger = options?.logger; this.parser = new Parser(this.logger); } async start(params) { const stats = { processedForumCount: 0, processedThreadCount: 0, processedMessageCount: 0, skippedExistingAttachmentCount: 0, downloadedAttachmentCount: 0, errorCount: 0 }; try { await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, this.config.targetURL, stats, params.signal); this.log('info', 'Download complete'); } catch (error) { const __clearLimiters = () => { return Promise.all([ this.pageFetchLimiter.stop({ dropErrorMessage: 'LimiterStopOnError', dropWaitingJobs: true }), this.attachmentDownloadLimiter.stop({ dropErrorMessage: 'LimiterStopOnError', dropWaitingJobs: true }) ]); }; if (error instanceof AbortError) { this.log('info', 'Aborting...'); await __clearLimiters(); this.log('info', 'Download aborted'); } else { this.log('error', 'Unhandled error: ', error); __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats); await __clearLimiters(); } } this.log('info', '--------------'); this.log('info', 'Download stats'); this.log('info', '--------------'); this.log('info', `Processed forums: ${stats.processedForumCount}`); this.log('info', `Processed threads: ${stats.processedThreadCount}`); this.log('info', `Processed messages: ${stats.processedMessageCount}`); this.log('info', `Downloaded attachments: ${stats.downloadedAttachmentCount}`); this.log('info', `Skipped existing attachments: ${stats.skippedExistingAttachmentCount}`); this.log('info', `Errors: ${stats.errorCount}`); } log(level, ...msg) { const limiterStopOnError = msg.find((m) => m instanceof Error && m.message === 'LimiterStopOnError'); if (limiterStopOnError) { return; } commonLog(this.logger, level, this.name, ...msg); } getConfig() { return this.config; } async getFetcher() { if (!__classPrivateFieldGet(this, _XenForoDownloader_fetcher, "f")) { __classPrivateFieldSet(this, _XenForoDownloader_fetcher, await Fetcher.getInstance(this.logger, this.config.request.cookie), "f"); } return __classPrivateFieldGet(this, _XenForoDownloader_fetcher, "f"); } } _XenForoDownloader_fetcher = new WeakMap(), _XenForoDownloader_instances = new WeakSet(), _XenForoDownloader_updateStatsOnError = function _XenForoDownloader_updateStatsOnError(error, stats) { if (!(error instanceof Error) || error.message !== 'LimiterStopOnError') { stats.errorCount++; } }, _XenForoDownloader_process = async function _XenForoDownloader_process(url, stats, signal) { const targetType = URLHelper.getTargetTypeByURL(url); switch (targetType) { case 'thread': await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, url, stats, signal); break; case 'forum': await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, url, stats, signal); break; default: await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadGeneric).call(this, url, stats, signal); } }, _XenForoDownloader_downloadThread = async function _XenForoDownloader_downloadThread(url, stats, signal, context) { let threadPage = null; this.log('info', `Fetching thread content from "${url}"`); try { const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal); threadPage = this.parser.parseThreadPage(html, url); if (threadPage) { this.log('info', `Fetched "${threadPage.title}" (page ${threadPage.currentPage} / ${threadPage.totalPages})`); if (!context?.continued && this.config.continue) { try { const prevDownload = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_checkPreviousDownload).call(this, threadPage, __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getThreadSavePath).call(this, threadPage)); if (!prevDownload) { this.log('debug', `Previous download not found for "${threadPage.title}"`); } else { this.log('info', 'Continuing from previous download'); this.log('debug', 'Previous download status:', prevDownload); return __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, prevDownload.url, stats, signal, { continued: true, continueFromMessageID: prevDownload.messageID }); } } catch (error) { this.log('error', 'Error occurred while checking previous download:', error); this.log('warn', 'Ignoring \'continue\' flag'); } } if (context?.continueFromMessageID) { const i = threadPage.messages.findIndex((msg) => msg.id === context.continueFromMessageID); if (i >= 0) { const removed = threadPage.messages.splice(0, i + 1); this.log('debug', `Removed ${removed.length} previously downloaded messages from thread`); } if (threadPage.messages.length === 0) { this.log('info', 'No new messages since previous download'); } } // Handle attachments without filenames (usually non-images) const attachmentsWithoutFilenames = threadPage.messages .reduce((result, msg) => { const filtered = msg.attachments.filter((attachment) => !attachment.filename); result.push(...filtered); return result; }, []); if (attachmentsWithoutFilenames.length > 0) { this.log('debug', `${attachmentsWithoutFilenames.length} attachments do not have filenames - obtaining them by HEAD requests`); const __setAttachmentFilename = async (attachment) => { try { const filename = await (await this.getFetcher()).fetchFilenameByHeaders({ url: attachment.url, maxRetries: this.config.request.maxRetries, retryInterval: this.config.request.minTime.page, signal }); attachment.filename = filename || undefined; this.log('debug', `Set filename of attachment #${attachment.id} to "${attachment.filename}"`); } catch (error) { if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) { throw error; } this.log('warn', 'Failed to obtain filename from headers:', error); } }; await Promise.all(attachmentsWithoutFilenames.map((attachment) => __setAttachmentFilename(attachment))); } this.log('debug', 'Parsed thread page:', { 'Thread ID': threadPage.id, Title: threadPage.title, Page: `${threadPage.currentPage} / ${threadPage.totalPages}`, Messages: threadPage.messages.length, Attachments: threadPage.messages.reduce((c, m) => c + m.attachments.length, 0) }); const threadSavePath = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getThreadSavePath).call(this, threadPage); this.log('info', `Save directory: "${threadSavePath}"`); const attachmentSavePath = this.config.dirStructure.attachments ? path.resolve(threadSavePath, 'attachments') : threadSavePath; fse.ensureDirSync(threadSavePath); const messageFile = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_createMessageFile).call(this, threadPage, threadSavePath, !(context?.continued && context?.continueFromMessageID)); if (threadPage.messages.length > 0) { for (const message of threadPage.messages) { const hasAttachments = message.attachments.length > 0; if (hasAttachments) { this.log('info', `Processing message ${message.index} - ${message.attachments.length} attachments to download`); } else { this.log('info', `Processing message ${message.index}`); } if (hasAttachments) { fse.ensureDirSync(attachmentSavePath); await Promise.all(message.attachments.map((attachment) => __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadMessageAttachment).call(this, attachment, attachmentSavePath, stats, signal))); } __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_saveMessage).call(this, message, messageFile); stats.processedMessageCount++; __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_saveDownloadStatus).call(this, threadPage, message, threadSavePath); } } } } catch (error) { if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) { throw error; } this.log('error', error); __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats); } if (threadPage?.nextURL) { this.log('info', 'Proceeding to next batch of messages'); const context = this.config.continue ? { continued: true } : undefined; await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, threadPage.nextURL, stats, signal, context); } else if (threadPage) { this.log('info', `Done downloading thread "${threadPage.title}"`); stats.processedThreadCount++; } }, _XenForoDownloader_downloadForum = async function _XenForoDownloader_downloadForum(url, stats, signal) { let forumPage = null; this.log('info', `Fetching forum content from "${url}"`); try { const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal); forumPage = this.parser.parseForumPage(html, url); if (forumPage) { this.log('info', `Fetched "${forumPage.title}" (page ${forumPage.currentPage} / ${forumPage.totalPages})`); this.log('debug', 'Parsed forum page:', { 'Forum ID': forumPage.id, Title: forumPage.title, Page: `${forumPage.currentPage} / ${forumPage.totalPages}`, Subforums: forumPage.subforums.length, Threads: forumPage.threads.length }); // Download threads if (forumPage.threads.length > 0) { this.log('info', `This page has ${forumPage.threads.length} threads`); for (const thread of forumPage.threads) { await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, thread.url, stats, signal); } } } } catch (error) { if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) { throw error; } this.log('error', error); __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats); } if (forumPage?.nextURL) { this.log('info', 'Proceeding to next batch of threads'); await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, forumPage.nextURL, stats, signal); } else if (forumPage) { this.log('info', `All threads in "${forumPage.title}" downloaded.`); if (forumPage.subforums.length > 0) { this.log('info', `Now proceeding to subforums (total ${forumPage.subforums.length})`); for (const subforum of forumPage.subforums) { this.log('info', `Processing "${subforum.title}"`); await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, subforum.url, stats, signal); } } stats.processedForumCount++; } }, _XenForoDownloader_downloadGeneric = async function _XenForoDownloader_downloadGeneric(url, stats, signal) { this.log('info', `Fetching "${url}"`); try { const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal); const page = this.parser.parseGenericPage(html, url); if (page) { if (page.forums.length > 0) { this.log('info', `Found ${page.forums.length} forums on page`); for (const forum of page.forums) { await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, forum.url, stats, signal); } } } } catch (error) { if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) { throw error; } this.log('error', error); __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats); } }, _XenForoDownloader_fetchPage = async function _XenForoDownloader_fetchPage(url, signal) { const fetcher = await this.getFetcher(); return this.pageFetchLimiter.schedule(() => { this.log('debug', `Fetch page "${url}"`); return fetcher.fetchHTML({ url, maxRetries: this.config.request.maxRetries, retryInterval: this.config.request.minTime.page, signal }); }); }, _XenForoDownloader_isErrorNonContinuable = function _XenForoDownloader_isErrorNonContinuable(error) { return error instanceof AbortError || (error instanceof FetcherError && error.fatal); }, _XenForoDownloader_getThreadSavePath = function _XenForoDownloader_getThreadSavePath(thread) { const pathParts = []; const __pushPathPart = (title, id) => { if (id) { pathParts.push(sanitizeFilename(`${title}.${id}`)); } else { pathParts.push(sanitizeFilename(title)); } }; if (this.config.dirStructure.site && thread.breadcrumbs[0]?.title) { __pushPathPart(thread.breadcrumbs[0].title); } if (thread.breadcrumbs.length > 1) { if (this.config.dirStructure.parentForumsAndSections === 'all') { thread.breadcrumbs.forEach((crumb, i) => { if (i > 0 && crumb.title) { const id = URLHelper.parseForumURL(crumb.url)?.id; __pushPathPart(crumb.title, id); } }); } else if (this.config.dirStructure.parentForumsAndSections === 'immediate') { const crumb = thread.breadcrumbs.at(-1); if (crumb?.title) { const id = URLHelper.parseForumURL(crumb.url)?.id; __pushPathPart(crumb.title, id); } } } if (this.config.dirStructure.thread && thread.title) { __pushPathPart(thread.title, thread.id); } return path.resolve(this.config.outDir, pathParts.join(path.sep)); }, _XenForoDownloader_downloadMessageAttachment = async function _XenForoDownloader_downloadMessageAttachment(attachment, destDir, stats, signal) { const filename = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getMessageAttachmentFilename).call(this, attachment); const destPath = path.resolve(destDir, filename); if (existsSync(destPath) && !this.config.overwrite) { this.log('info', `Skipped existing "${filename}"`); stats.skippedExistingAttachmentCount++; return Promise.resolve(); } try { const fetcher = await this.getFetcher(); await this.attachmentDownloadLimiter.schedule(() => fetcher.downloadAttachment({ src: attachment.url, dest: destPath, maxRetries: this.config.request.maxRetries, retryInterval: this.config.request.minTime.attachment, signal })); this.log('info', `Downloaded "${filename}"`); stats.downloadedAttachmentCount++; } catch (error) { if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) { throw error; } this.log('error', `Error downloading "${filename}" from "${attachment.url}": `, error); __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats); } }, _XenForoDownloader_getMessageAttachmentFilename = function _XenForoDownloader_getMessageAttachmentFilename(attachment) { if (attachment.filename) { return sanitizeFilename(`attach-${attachment.id} - ${attachment.filename}`); } return sanitizeFilename(`attach-${attachment.id}-${attachment.index}`); }, _XenForoDownloader_createMessageFile = function _XenForoDownloader_createMessageFile(threadPage, destDir, overwrite = false) { const filename = sanitizeFilename(`messages-${threadPage.id}-p${threadPage.currentPage} - ${threadPage.title}.txt`); const destPath = path.resolve(destDir, filename); if (!overwrite && fse.existsSync(destPath)) { return destPath; } const threadHeader = ThreadHeaderTemplate.format(threadPage); fse.writeFileSync(destPath, threadHeader); this.log('info', `Created message file "${destPath}"`); return destPath; }, _XenForoDownloader_saveMessage = function _XenForoDownloader_saveMessage(message, file) { if (!message) { return; } const attachments = message.attachments.map((attachment) => { const filename = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getMessageAttachmentFilename).call(this, attachment); return { ...attachment, filename }; }); const messageOut = MessageTemplate.format({ ...message, attachments }); fse.appendFileSync(file, messageOut); this.log('info', `Saved message ${message.index} to "${path.parse(file).base}"`); }, _XenForoDownloader_getDownloadStatusFilePath = function _XenForoDownloader_getDownloadStatusFilePath(thread, threadSavePath) { const filename = sanitizeFilename(`.dl-status-${thread.id}`); return path.resolve(threadSavePath, filename); }, _XenForoDownloader_checkPreviousDownload = function _XenForoDownloader_checkPreviousDownload(thread, threadSavePath) { const file = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getDownloadStatusFilePath).call(this, thread, threadSavePath); if (fse.existsSync(file)) { const json = fse.readJSONSync(file); if (!json.threadID || !json.url || !json.messageID) { throw Error(`Failed to read previous download status from "${file}": invalid format`); } return json; } return false; }, _XenForoDownloader_saveDownloadStatus = function _XenForoDownloader_saveDownloadStatus(thread, lastSavedMessage, threadSavePath) { const status = { threadID: thread.id, url: thread.url, messageID: lastSavedMessage.id }; const file = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getDownloadStatusFilePath).call(this, thread, threadSavePath); try { fse.writeJSONSync(file, status); this.log('debug', `Saved download status to "${file}"`); } catch (error) { this.log('error', `Failed to save download status to "${file}"`, error); } }; //# sourceMappingURL=XenForoDownloader.js.map