xenforo-dl
Version:
XenForo Forum Downloader
433 lines • 24.5 kB
JavaScript
var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (receiver, state, kind, f) {
if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter");
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
};
var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
if (kind === "m") throw new TypeError("Private method is not writable");
if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot write private member to an object whose class did not declare it");
return (kind === "a" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;
};
var _XenForoDownloader_instances, _XenForoDownloader_fetcher, _XenForoDownloader_updateStatsOnError, _XenForoDownloader_process, _XenForoDownloader_downloadThread, _XenForoDownloader_downloadForum, _XenForoDownloader_downloadGeneric, _XenForoDownloader_fetchPage, _XenForoDownloader_isErrorNonContinuable, _XenForoDownloader_getThreadSavePath, _XenForoDownloader_downloadMessageAttachment, _XenForoDownloader_getMessageAttachmentFilename, _XenForoDownloader_createMessageFile, _XenForoDownloader_saveMessage, _XenForoDownloader_getDownloadStatusFilePath, _XenForoDownloader_checkPreviousDownload, _XenForoDownloader_saveDownloadStatus;
import deepFreeze from 'deep-freeze';
import { getDownloaderConfig } from './DownloaderOptions.js';
import Fetcher, { FetcherError } from './utils/Fetcher.js';
import { commonLog } from './utils/logging/Logger.js';
import URLHelper from './utils/URLHelper.js';
import Bottleneck from 'bottleneck';
import { AbortError } from 'node-fetch';
import path from 'path';
import sanitizeFilename from 'sanitize-filename';
import { existsSync } from 'fs';
import fse from 'fs-extra';
import Parser from './parsers/Parser.js';
import MessageTemplate from './templates/Message.js';
import ThreadHeaderTemplate from './templates/Thread.js';
export default class XenForoDownloader {
constructor(url, options) {
_XenForoDownloader_instances.add(this);
this.name = 'XenForoDownloader';
_XenForoDownloader_fetcher.set(this, void 0);
this.config = deepFreeze({
...getDownloaderConfig(url, options)
});
this.pageFetchLimiter = new Bottleneck({
maxConcurrent: 1,
minTime: this.config.request.minTime.page
});
this.attachmentDownloadLimiter = new Bottleneck({
maxConcurrent: this.config.request.maxConcurrent,
minTime: this.config.request.minTime.attachment
});
this.logger = options?.logger;
this.parser = new Parser(this.logger);
}
async start(params) {
const stats = {
processedForumCount: 0,
processedThreadCount: 0,
processedMessageCount: 0,
skippedExistingAttachmentCount: 0,
downloadedAttachmentCount: 0,
errorCount: 0
};
try {
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, this.config.targetURL, stats, params.signal);
this.log('info', 'Download complete');
}
catch (error) {
const __clearLimiters = () => {
return Promise.all([
this.pageFetchLimiter.stop({
dropErrorMessage: 'LimiterStopOnError',
dropWaitingJobs: true
}),
this.attachmentDownloadLimiter.stop({
dropErrorMessage: 'LimiterStopOnError',
dropWaitingJobs: true
})
]);
};
if (error instanceof AbortError) {
this.log('info', 'Aborting...');
await __clearLimiters();
this.log('info', 'Download aborted');
}
else {
this.log('error', 'Unhandled error: ', error);
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats);
await __clearLimiters();
}
}
this.log('info', '--------------');
this.log('info', 'Download stats');
this.log('info', '--------------');
this.log('info', `Processed forums: ${stats.processedForumCount}`);
this.log('info', `Processed threads: ${stats.processedThreadCount}`);
this.log('info', `Processed messages: ${stats.processedMessageCount}`);
this.log('info', `Downloaded attachments: ${stats.downloadedAttachmentCount}`);
this.log('info', `Skipped existing attachments: ${stats.skippedExistingAttachmentCount}`);
this.log('info', `Errors: ${stats.errorCount}`);
}
log(level, ...msg) {
const limiterStopOnError = msg.find((m) => m instanceof Error && m.message === 'LimiterStopOnError');
if (limiterStopOnError) {
return;
}
commonLog(this.logger, level, this.name, ...msg);
}
getConfig() {
return this.config;
}
async getFetcher() {
if (!__classPrivateFieldGet(this, _XenForoDownloader_fetcher, "f")) {
__classPrivateFieldSet(this, _XenForoDownloader_fetcher, await Fetcher.getInstance(this.logger, this.config.request.cookie), "f");
}
return __classPrivateFieldGet(this, _XenForoDownloader_fetcher, "f");
}
}
_XenForoDownloader_fetcher = new WeakMap(), _XenForoDownloader_instances = new WeakSet(), _XenForoDownloader_updateStatsOnError = function _XenForoDownloader_updateStatsOnError(error, stats) {
if (!(error instanceof Error) || error.message !== 'LimiterStopOnError') {
stats.errorCount++;
}
}, _XenForoDownloader_process = async function _XenForoDownloader_process(url, stats, signal) {
const targetType = URLHelper.getTargetTypeByURL(url);
switch (targetType) {
case 'thread':
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, url, stats, signal);
break;
case 'forum':
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, url, stats, signal);
break;
default:
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadGeneric).call(this, url, stats, signal);
}
}, _XenForoDownloader_downloadThread = async function _XenForoDownloader_downloadThread(url, stats, signal, context) {
let threadPage = null;
this.log('info', `Fetching thread content from "${url}"`);
try {
const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal);
threadPage = this.parser.parseThreadPage(html, url);
if (threadPage) {
this.log('info', `Fetched "${threadPage.title}" (page ${threadPage.currentPage} / ${threadPage.totalPages})`);
if (!context?.continued && this.config.continue) {
try {
const prevDownload = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_checkPreviousDownload).call(this, threadPage, __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getThreadSavePath).call(this, threadPage));
if (!prevDownload) {
this.log('debug', `Previous download not found for "${threadPage.title}"`);
}
else {
this.log('info', 'Continuing from previous download');
this.log('debug', 'Previous download status:', prevDownload);
return __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, prevDownload.url, stats, signal, { continued: true, continueFromMessageID: prevDownload.messageID });
}
}
catch (error) {
this.log('error', 'Error occurred while checking previous download:', error);
this.log('warn', 'Ignoring \'continue\' flag');
}
}
if (context?.continueFromMessageID) {
const i = threadPage.messages.findIndex((msg) => msg.id === context.continueFromMessageID);
if (i >= 0) {
const removed = threadPage.messages.splice(0, i + 1);
this.log('debug', `Removed ${removed.length} previously downloaded messages from thread`);
}
if (threadPage.messages.length === 0) {
this.log('info', 'No new messages since previous download');
}
}
// Handle attachments without filenames (usually non-images)
const attachmentsWithoutFilenames = threadPage.messages
.reduce((result, msg) => {
const filtered = msg.attachments.filter((attachment) => !attachment.filename);
result.push(...filtered);
return result;
}, []);
if (attachmentsWithoutFilenames.length > 0) {
this.log('debug', `${attachmentsWithoutFilenames.length} attachments do not have filenames - obtaining them by HEAD requests`);
const __setAttachmentFilename = async (attachment) => {
try {
const filename = await (await this.getFetcher()).fetchFilenameByHeaders({
url: attachment.url,
maxRetries: this.config.request.maxRetries,
retryInterval: this.config.request.minTime.page,
signal
});
attachment.filename = filename || undefined;
this.log('debug', `Set filename of attachment #${attachment.id} to "${attachment.filename}"`);
}
catch (error) {
if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) {
throw error;
}
this.log('warn', 'Failed to obtain filename from headers:', error);
}
};
await Promise.all(attachmentsWithoutFilenames.map((attachment) => __setAttachmentFilename(attachment)));
}
this.log('debug', 'Parsed thread page:', {
'Thread ID': threadPage.id,
Title: threadPage.title,
Page: `${threadPage.currentPage} / ${threadPage.totalPages}`,
Messages: threadPage.messages.length,
Attachments: threadPage.messages.reduce((c, m) => c + m.attachments.length, 0)
});
const threadSavePath = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getThreadSavePath).call(this, threadPage);
this.log('info', `Save directory: "${threadSavePath}"`);
const attachmentSavePath = this.config.dirStructure.attachments ? path.resolve(threadSavePath, 'attachments') : threadSavePath;
fse.ensureDirSync(threadSavePath);
const messageFile = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_createMessageFile).call(this, threadPage, threadSavePath, !(context?.continued && context?.continueFromMessageID));
if (threadPage.messages.length > 0) {
for (const message of threadPage.messages) {
const hasAttachments = message.attachments.length > 0;
if (hasAttachments) {
this.log('info', `Processing message ${message.index} - ${message.attachments.length} attachments to download`);
}
else {
this.log('info', `Processing message ${message.index}`);
}
if (hasAttachments) {
fse.ensureDirSync(attachmentSavePath);
await Promise.all(message.attachments.map((attachment) => __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadMessageAttachment).call(this, attachment, attachmentSavePath, stats, signal)));
}
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_saveMessage).call(this, message, messageFile);
stats.processedMessageCount++;
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_saveDownloadStatus).call(this, threadPage, message, threadSavePath);
}
}
}
}
catch (error) {
if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) {
throw error;
}
this.log('error', error);
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats);
}
if (threadPage?.nextURL) {
this.log('info', 'Proceeding to next batch of messages');
const context = this.config.continue ? { continued: true } : undefined;
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadThread).call(this, threadPage.nextURL, stats, signal, context);
}
else if (threadPage) {
this.log('info', `Done downloading thread "${threadPage.title}"`);
stats.processedThreadCount++;
}
}, _XenForoDownloader_downloadForum = async function _XenForoDownloader_downloadForum(url, stats, signal) {
let forumPage = null;
this.log('info', `Fetching forum content from "${url}"`);
try {
const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal);
forumPage = this.parser.parseForumPage(html, url);
if (forumPage) {
this.log('info', `Fetched "${forumPage.title}" (page ${forumPage.currentPage} / ${forumPage.totalPages})`);
this.log('debug', 'Parsed forum page:', {
'Forum ID': forumPage.id,
Title: forumPage.title,
Page: `${forumPage.currentPage} / ${forumPage.totalPages}`,
Subforums: forumPage.subforums.length,
Threads: forumPage.threads.length
});
// Download threads
if (forumPage.threads.length > 0) {
this.log('info', `This page has ${forumPage.threads.length} threads`);
for (const thread of forumPage.threads) {
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, thread.url, stats, signal);
}
}
}
}
catch (error) {
if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) {
throw error;
}
this.log('error', error);
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats);
}
if (forumPage?.nextURL) {
this.log('info', 'Proceeding to next batch of threads');
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, forumPage.nextURL, stats, signal);
}
else if (forumPage) {
this.log('info', `All threads in "${forumPage.title}" downloaded.`);
if (forumPage.subforums.length > 0) {
this.log('info', `Now proceeding to subforums (total ${forumPage.subforums.length})`);
for (const subforum of forumPage.subforums) {
this.log('info', `Processing "${subforum.title}"`);
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_downloadForum).call(this, subforum.url, stats, signal);
}
}
stats.processedForumCount++;
}
}, _XenForoDownloader_downloadGeneric = async function _XenForoDownloader_downloadGeneric(url, stats, signal) {
this.log('info', `Fetching "${url}"`);
try {
const { html } = await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_fetchPage).call(this, url, signal);
const page = this.parser.parseGenericPage(html, url);
if (page) {
if (page.forums.length > 0) {
this.log('info', `Found ${page.forums.length} forums on page`);
for (const forum of page.forums) {
await __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_process).call(this, forum.url, stats, signal);
}
}
}
}
catch (error) {
if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) {
throw error;
}
this.log('error', error);
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats);
}
}, _XenForoDownloader_fetchPage = async function _XenForoDownloader_fetchPage(url, signal) {
const fetcher = await this.getFetcher();
return this.pageFetchLimiter.schedule(() => {
this.log('debug', `Fetch page "${url}"`);
return fetcher.fetchHTML({
url,
maxRetries: this.config.request.maxRetries,
retryInterval: this.config.request.minTime.page,
signal
});
});
}, _XenForoDownloader_isErrorNonContinuable = function _XenForoDownloader_isErrorNonContinuable(error) {
return error instanceof AbortError || (error instanceof FetcherError && error.fatal);
}, _XenForoDownloader_getThreadSavePath = function _XenForoDownloader_getThreadSavePath(thread) {
const pathParts = [];
const __pushPathPart = (title, id) => {
if (id) {
pathParts.push(sanitizeFilename(`${title}.${id}`));
}
else {
pathParts.push(sanitizeFilename(title));
}
};
if (this.config.dirStructure.site && thread.breadcrumbs[0]?.title) {
__pushPathPart(thread.breadcrumbs[0].title);
}
if (thread.breadcrumbs.length > 1) {
if (this.config.dirStructure.parentForumsAndSections === 'all') {
thread.breadcrumbs.forEach((crumb, i) => {
if (i > 0 && crumb.title) {
const id = URLHelper.parseForumURL(crumb.url)?.id;
__pushPathPart(crumb.title, id);
}
});
}
else if (this.config.dirStructure.parentForumsAndSections === 'immediate') {
const crumb = thread.breadcrumbs.at(-1);
if (crumb?.title) {
const id = URLHelper.parseForumURL(crumb.url)?.id;
__pushPathPart(crumb.title, id);
}
}
}
if (this.config.dirStructure.thread && thread.title) {
__pushPathPart(thread.title, thread.id);
}
return path.resolve(this.config.outDir, pathParts.join(path.sep));
}, _XenForoDownloader_downloadMessageAttachment = async function _XenForoDownloader_downloadMessageAttachment(attachment, destDir, stats, signal) {
const filename = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getMessageAttachmentFilename).call(this, attachment);
const destPath = path.resolve(destDir, filename);
if (existsSync(destPath) && !this.config.overwrite) {
this.log('info', `Skipped existing "${filename}"`);
stats.skippedExistingAttachmentCount++;
return Promise.resolve();
}
try {
const fetcher = await this.getFetcher();
await this.attachmentDownloadLimiter.schedule(() => fetcher.downloadAttachment({
src: attachment.url,
dest: destPath,
maxRetries: this.config.request.maxRetries,
retryInterval: this.config.request.minTime.attachment,
signal
}));
this.log('info', `Downloaded "${filename}"`);
stats.downloadedAttachmentCount++;
}
catch (error) {
if (__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_isErrorNonContinuable).call(this, error)) {
throw error;
}
this.log('error', `Error downloading "${filename}" from "${attachment.url}": `, error);
__classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_updateStatsOnError).call(this, error, stats);
}
}, _XenForoDownloader_getMessageAttachmentFilename = function _XenForoDownloader_getMessageAttachmentFilename(attachment) {
if (attachment.filename) {
return sanitizeFilename(`attach-${attachment.id} - ${attachment.filename}`);
}
return sanitizeFilename(`attach-${attachment.id}-${attachment.index}`);
}, _XenForoDownloader_createMessageFile = function _XenForoDownloader_createMessageFile(threadPage, destDir, overwrite = false) {
const filename = sanitizeFilename(`messages-${threadPage.id}-p${threadPage.currentPage} - ${threadPage.title}.txt`);
const destPath = path.resolve(destDir, filename);
if (!overwrite && fse.existsSync(destPath)) {
return destPath;
}
const threadHeader = ThreadHeaderTemplate.format(threadPage);
fse.writeFileSync(destPath, threadHeader);
this.log('info', `Created message file "${destPath}"`);
return destPath;
}, _XenForoDownloader_saveMessage = function _XenForoDownloader_saveMessage(message, file) {
if (!message) {
return;
}
const attachments = message.attachments.map((attachment) => {
const filename = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getMessageAttachmentFilename).call(this, attachment);
return { ...attachment, filename };
});
const messageOut = MessageTemplate.format({ ...message, attachments });
fse.appendFileSync(file, messageOut);
this.log('info', `Saved message ${message.index} to "${path.parse(file).base}"`);
}, _XenForoDownloader_getDownloadStatusFilePath = function _XenForoDownloader_getDownloadStatusFilePath(thread, threadSavePath) {
const filename = sanitizeFilename(`.dl-status-${thread.id}`);
return path.resolve(threadSavePath, filename);
}, _XenForoDownloader_checkPreviousDownload = function _XenForoDownloader_checkPreviousDownload(thread, threadSavePath) {
const file = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getDownloadStatusFilePath).call(this, thread, threadSavePath);
if (fse.existsSync(file)) {
const json = fse.readJSONSync(file);
if (!json.threadID || !json.url || !json.messageID) {
throw Error(`Failed to read previous download status from "${file}": invalid format`);
}
return json;
}
return false;
}, _XenForoDownloader_saveDownloadStatus = function _XenForoDownloader_saveDownloadStatus(thread, lastSavedMessage, threadSavePath) {
const status = {
threadID: thread.id,
url: thread.url,
messageID: lastSavedMessage.id
};
const file = __classPrivateFieldGet(this, _XenForoDownloader_instances, "m", _XenForoDownloader_getDownloadStatusFilePath).call(this, thread, threadSavePath);
try {
fse.writeJSONSync(file, status);
this.log('debug', `Saved download status to "${file}"`);
}
catch (error) {
this.log('error', `Failed to save download status to "${file}"`, error);
}
};
//# sourceMappingURL=XenForoDownloader.js.map