UNPKG

rsshub

Version:
135 lines (121 loc) 6.51 kB
import { load } from 'cheerio'; import getIllustDetail from '../../api/get-illust-detail'; import pixivUtils from '../../utils'; import logger from '@/utils/logger'; export function convertPixivProtocolExtended(caption: string): string { const protocolMap = new Map([ [/pixiv:\/\/novels\/(\d+)/g, 'https://www.pixiv.net/novel/show.php?id=$1'], [/pixiv:\/\/illusts\/(\d+)/g, 'https://www.pixiv.net/artworks/$1'], [/pixiv:\/\/users\/(\d+)/g, 'https://www.pixiv.net/users/$1'], [/pixiv:\/\/novel\/series\/(\d+)/g, 'https://www.pixiv.net/novel/series/$1'], ]); let convertedText = caption; for (const [pattern, replacement] of protocolMap) { convertedText = convertedText.replace(pattern, replacement); } return convertedText; } // docs: https://www.pixiv.help/hc/ja/articles/235584168-小説作品の本文内に使える特殊タグとは export async function parseNovelContent(content: string, images: Record<string, string>, token?: string): Promise<string> { try { // 如果有 token,處理 pixiv 圖片引用 // If token exists, process pixiv image references if (token) { const imageMatches = [...content.matchAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g)]; const imageIdToUrl = new Map<string, string>(); // 批量獲取圖片資訊 // Batch fetch image information await Promise.all( imageMatches.map(async ([, illustId, pageNum]) => { if (!illustId) { return; } try { const illust = (await getIllustDetail(illustId, token)).data.illust; const pixivimages = pixivUtils.getImgs(illust).map((img) => img.match(/src="([^"]+)"/)?.[1] || ''); const imageUrl = pixivimages[Number(pageNum) || 0]; if (imageUrl) { imageIdToUrl.set(pageNum ? `${illustId}-${pageNum}` : illustId, imageUrl); } } catch (error) { // 記錄錯誤但不中斷處理 // Log error but don't interrupt processing logger.warn(`Failed to fetch illust detail for ID ${illustId}: ${error instanceof Error ? error.message : String(error)}`); } }) ); // 替換 pixiv 圖片引用為 img 標籤 // Replace pixiv image references with img tags content = content.replaceAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g, (match, illustId, pageNum) => { const key = pageNum ? `${illustId}-${pageNum}` : illustId; const imageUrl = imageIdToUrl.get(key); return imageUrl ? `<img src="${imageUrl}" alt="pixiv illustration ${illustId}${pageNum ? ` page ${pageNum}` : ''}">` : match; }); } else { /* * 處理 get-novels-sfw 的情況 * 當沒有 PIXIV_REFRESHTOKEN 時,將 [pixivimage:(\d+)] 格式轉換為 artwork 連結 * 因無法獲取 Pixiv 作品詳情,改為提供直接連結到原始作品頁面 * * Handle get-novels-sfw case * When PIXIV_REFRESHTOKEN is not available, convert [pixivimage:(\d+)] format to artwork link * Provide direct link to original artwork page since artwork details cannot be retrieved */ content = content.replaceAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g, (_, illustId) => `<a href="https://www.pixiv.net/artworks/${illustId}" target="_blank" rel="noopener noreferrer">Pixiv Artwork #${illustId}</a>`); } // 處理作者上傳的圖片 // Process author uploaded images content = content.replaceAll(/\[uploadedimage:(\d+)\]/g, (match, imageId) => { if (images[imageId]) { return `<img src="${pixivUtils.getProxiedImageUrl(images[imageId])}" alt="novel illustration ${imageId}">`; } return match; }); // 基本格式處理 // Basic formatting content = content // 換行轉換為 HTML 換行 // Convert newlines to HTML breaks .replaceAll('\n', '<br>') // 連續換行轉換為段落 // Convert consecutive breaks to paragraphs .replaceAll(/(<br>){2,}/g, '</p><p>') // ruby 標籤(為日文漢字標註讀音) // ruby tags (for Japanese kanji readings) .replaceAll(/\[\[rb:(.*?)>(.*?)\]\]/g, '<ruby>$1<rt>$2</rt></ruby>') // 外部連結 // external links .replaceAll(/\[\[jumpuri:(.*?)>(.*?)\]\]/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>') // 頁面跳轉,但由於 [newpage] 使用 hr 分隔,沒有頁數,沒必要跳轉,所以只顯示文字 // Page jumps, but since [newpage] uses hr separators, without the page numbers, jumping isn't needed, so just display text .replaceAll(/\[jump:(\d+)\]/g, 'Jump to page $1') // 章節標題 // chapter titles .replaceAll(/\[chapter:(.*?)\]/g, '<h2>$1</h2>') // 分頁符 // page breaks .replaceAll('[newpage]', '<hr>'); // 使用 cheerio 進行 HTML 清理和優化 // Use cheerio for HTML cleanup and optimization const $content = load(`<article><p>${content}</p></article>`); // 處理嵌套段落:移除多餘的嵌套 // Handle nested paragraphs: remove unnecessary nesting $content('p p').each((_, elem) => { const $elem = $content(elem); $elem.replaceWith($elem.html() || ''); }); // 處理段落中的標題:確保正確的 HTML 結構 // Handle headings in paragraphs: ensure correct HTML structure $content('p h2').each((_, elem) => { const $elem = $content(elem); const $parent = $elem.parent('p'); const html = $elem.prop('outerHTML'); if ($parent.length && html) { $parent.replaceWith(`</p>${html}<p>`); } }); return $content.html() || ''; } catch (error) { throw new Error(`Error parsing novel content: ${error instanceof Error ? error.message : String(error)}`); } }