stihirus-reader
Version:
stihirus.ru parser
498 lines (435 loc) • 18.2 kB
JavaScript
import fetch from 'node-fetch';
import * as cheerio from 'cheerio';
const BASE_URL = 'https://stihirus.ru';
const API_BASE_URL = `${BASE_URL}/-zbb/api`;
const POEMS_PER_PAGE = 20;
const DEFAULT_REQUEST_DELAY_MS = 200;
const ERROR_CODES = {
NETWORK_ERROR: 503,
API_ERROR: 500,
PARSING_ERROR: 500,
NOT_FOUND: 404,
INVALID_INPUT: 400,
UNKNOWN_ERROR: 500,
};
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
function createErrorObject(message, code, originalError = null) {
const errorObj = { code, message };
if (originalError) errorObj.originalError = originalError.message;
return errorObj;
}
async function fetchApi(endpoint, data, refererUrl = BASE_URL) {
const apiUrl = `${API_BASE_URL}/${endpoint}`;
const params = new URLSearchParams();
for (const key in data) {
if (data[key] !== null && data[key] !== undefined) {
params.append(key, data[key]);
}
}
const headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Node.js) stihirus-reader/1.5.0',
'X-Requested-With': 'XMLHttpRequest',
'Referer': refererUrl,
'Origin': BASE_URL
};
try {
const response = await fetch(apiUrl, {
method: 'POST',
headers: headers,
body: params,
});
if (!response.ok) {
const error = new Error(`HTTP ${response.status}`);
error.statusCode = response.status;
throw error;
}
const result = await response.json();
if (result.status === 'error') {
const msg = result.message || 'Unknown API Error';
if (msg.includes('denied') || msg.includes('доступ запрещен')) {
const err = new Error('Access denied or Content not found');
err.statusCode = ERROR_CODES.NOT_FOUND;
throw err;
}
throw new Error(`API Error: ${msg}`);
}
return result;
} catch (error) {
if (!error.statusCode) {
error.statusCode = ERROR_CODES.API_ERROR;
}
throw error;
}
}
async function fetchHtml(url) {
try {
const response = await fetch(url, {
headers: { 'User-Agent': 'Mozilla/5.0 (Node.js) stihirus-reader/1.5.0' }
});
if (response.status === 404) {
const err = new Error('Page not found');
err.statusCode = 404;
throw err;
}
return await response.text();
} catch (error) {
if (!error.statusCode) error.statusCode = ERROR_CODES.NETWORK_ERROR;
throw error;
}
}
function mapApiPoem(poemApiData) {
if (!poemApiData) return null;
let text = poemApiData.body || '';
text = text.replace(/<br\s*\/?>/gi, '\n').replace(/\r\n/g, '\n').trim();
const gifts = poemApiData.podarki && poemApiData.podarki !== 'proizv_like'
? poemApiData.podarki.split(',').map(g => g.trim()).filter(g => g)
: [];
return {
id: parseInt(poemApiData.id, 10),
title: poemApiData.title || '***',
text: text,
created: poemApiData.created,
rating: parseInt(poemApiData.rating || '0', 10),
commentsCount: parseInt(poemApiData.comments_count || '0', 10),
rubric: {
name: poemApiData.razd_name,
url: poemApiData.razd_url ? `${BASE_URL}/razdel/${poemApiData.razd_url}` : null
},
collection: poemApiData.urazd_name !== 'не в сборнике' ? poemApiData.urazd_name : null,
imageUrl: poemApiData.background ?
(poemApiData.background.startsWith('http') ? poemApiData.background : `${BASE_URL}${poemApiData.background}`)
: null,
uniquenessStatus: parseInt(poemApiData.text_unique, 10),
hasCertificate: poemApiData.have_certificate === '1',
gifts: gifts,
contest: poemApiData.contest_id ? { id: parseInt(poemApiData.contest_id), name: poemApiData.contest_name } : null,
holidaySection: null,
author: {
id: parseInt(poemApiData.avtor_id || poemApiData.user_id, 10),
username: poemApiData.username,
profileUrl: poemApiData.useruri ? `${BASE_URL}/avtor/${poemApiData.useruri}` : null
}
};
}
async function resolveIdentifier(identifier) {
let authorId = null;
let url = null;
let username = null;
if (typeof identifier === 'number') {
try {
const apiRes = await fetchApi('pr_read_avtor', { id: identifier, from: 0 });
if (apiRes.data && apiRes.data.length > 0) {
const item = apiRes.data[0];
return {
authorId: identifier,
username: item.useruri,
profileUrl: `${BASE_URL}/avtor/${item.useruri}`,
html: null
};
}
} catch (e) { }
const err = new Error(`Author ID ${identifier} not found`);
err.statusCode = 404;
throw err;
}
if (identifier.includes('stihirus.ru') || identifier.includes('/')) {
url = identifier.startsWith('http') ? identifier : `https://${identifier}`;
if (!url.includes('stihirus.ru')) {
const err = new Error('Invalid URL format');
err.statusCode = 400;
throw err;
}
} else {
if (!/^[a-zA-Z0-9-_]+$/.test(identifier)) {
const err = new Error('Invalid identifier format');
err.statusCode = 400;
throw err;
}
url = `${BASE_URL}/avtor/${identifier}`;
}
try {
const html = await fetchHtml(url);
const $ = cheerio.load(html);
const idStr = $('.avtorinfo').attr('data-userid');
if (idStr) {
authorId = parseInt(idStr, 10);
} else {
const err = new Error('Author not found');
err.statusCode = 404;
throw err;
}
return {
authorId,
html,
username: url.split('/avtor/')[1]?.split('/')[0] || identifier,
profileUrl: url
};
} catch (e) {
throw e;
}
}
function parseProfileExtras(html) {
const $ = cheerio.load(html);
const info = {
description: $('.avtorinfo__userinfo').text().trim(),
headerUrl: null,
stats: { poems: 0, reviewsSent: 0, reviewsReceived: 0 },
collections: [],
lastVisit: '',
status: '',
isPremium: false
};
const headerImg = $('.page_header_img').attr('src');
if (headerImg && !headerImg.includes('none_header')) {
info.headerUrl = headerImg.startsWith('/') ? `${BASE_URL}${headerImg}` : headerImg;
}
const avatarElement = $('.page_avatar_img');
let avatarUrl = avatarElement.attr('src');
if (!avatarUrl || avatarUrl === '/img/profile/none.jpg') {
info.avatarUrl = null;
} else {
info.avatarUrl = avatarUrl.startsWith('/') ? `${BASE_URL}${avatarUrl}` : avatarUrl;
}
const statsCard = $('#show_stat');
if (statsCard.length) {
const bars = statsCard.find('.progress-bar');
if (bars.length >= 3) {
info.stats.poems = parseInt($(bars[0]).attr('aria-valuenow') || 0, 10);
info.stats.reviewsSent = parseInt($(bars[1]).attr('aria-valuenow') || 0, 10);
info.stats.reviewsReceived = parseInt($(bars[2]).attr('aria-valuenow') || 0, 10);
}
}
$('#show_sborniki a').each((_, el) => {
const href = $(el).attr('href');
info.collections.push({
name: $(el).text().trim(),
url: href ? (href.startsWith('http') ? href : `${BASE_URL}${href}`) : null
});
});
$('.card-footer .small').each((_, el) => {
const txt = $(el).text().trim();
if (txt.includes('Последний визит:')) info.lastVisit = txt.replace('Последний визит:', '').trim();
if (txt.includes('Статус:')) info.status = $(el).find('b').text().trim();
if (txt.includes('Премиум доступ')) info.isPremium = true;
});
return info;
}
export async function getAuthorData(identifier, page = null, requestDelayMs = DEFAULT_REQUEST_DELAY_MS, filterOptions = null) {
if (typeof requestDelayMs === 'object' && requestDelayMs !== null) {
filterOptions = requestDelayMs;
requestDelayMs = DEFAULT_REQUEST_DELAY_MS;
}
try {
const authorMeta = await resolveIdentifier(identifier);
let profileExtras = { stats: {}, collections: [] };
if (authorMeta.html) {
profileExtras = parseProfileExtras(authorMeta.html);
} else {
const html = await fetchHtml(authorMeta.profileUrl);
profileExtras = parseProfileExtras(html);
}
const authorData = {
authorId: authorMeta.authorId,
username: authorMeta.username,
profileUrl: authorMeta.profileUrl,
canonicalUsername: authorMeta.username,
...profileExtras,
avatarUrl: profileExtras.avatarUrl,
poems: []
};
const apiParams = { id: authorMeta.authorId };
let isFiltered = false;
if (filterOptions) {
if (filterOptions.rubricId) { apiParams.razdel_id = filterOptions.rubricId; isFiltered = true; }
if (filterOptions.year) { apiParams.year = filterOptions.year; isFiltered = true; }
if (filterOptions.month) { apiParams.month = filterOptions.month; isFiltered = true; }
}
if (page === 0) {
return { status: 'success', data: authorData };
}
else if (page !== null && page > 0) {
apiParams.from = (page - 1) * POEMS_PER_PAGE;
const res = await fetchApi('pr_read_avtor', apiParams, authorMeta.profileUrl);
if (res.data && Array.isArray(res.data)) {
authorData.poems = res.data.map(mapApiPoem);
}
}
else {
if (isFiltered) {
let from = 0;
let keepFetching = true;
while(keepFetching) {
const currentParams = { ...apiParams, from: from };
const res = await fetchApi('pr_read_avtor', currentParams, authorMeta.profileUrl);
if (res.data && Array.isArray(res.data) && res.data.length > 0) {
authorData.poems.push(...res.data.map(mapApiPoem));
from += res.data.length;
if (res.data.length < POEMS_PER_PAGE) keepFetching = false;
else await sleep(requestDelayMs);
} else {
keepFetching = false;
}
}
} else {
const totalPoems = authorData.stats.poems || 50;
const totalPages = Math.ceil(totalPoems / POEMS_PER_PAGE);
if (totalPoems > 0) {
const promises = [];
const limitPages = Math.max(1, Math.min(totalPages, 100));
for (let i = 0; i < limitPages; i++) {
const currentParams = { ...apiParams, from: i * POEMS_PER_PAGE };
promises.push(
fetchApi('pr_read_avtor', currentParams, authorMeta.profileUrl)
.then(res => res.data || [])
.catch(() => [])
);
if (i % 5 === 0) await sleep(50);
}
const results = await Promise.all(promises);
const allRawPoems = results.flat();
const uniqueMap = new Map();
allRawPoems.forEach(p => {
if (p && p.id) uniqueMap.set(p.id, p);
});
authorData.poems = Array.from(uniqueMap.values()).map(mapApiPoem);
}
}
}
return { status: 'success', data: authorData };
} catch (error) {
return {
status: 'error',
error: createErrorObject(error.message, error.statusCode || ERROR_CODES.UNKNOWN_ERROR, error)
};
}
}
export async function getPoemById(poemId) {
if (typeof poemId !== 'number' || poemId <= 0) {
return { status: 'error', error: createErrorObject('Invalid ID', ERROR_CODES.INVALID_INPUT) };
}
try {
const result = await fetchApi('pr_read_avtor', { proizv_id: poemId });
if (!result.data || !Array.isArray(result.data) || result.data.length === 0) {
const err = new Error(`Poem ${poemId} not found`);
err.statusCode = 404;
throw err;
}
const poemData = mapApiPoem(result.data[0]);
return { status: 'success', data: poemData };
} catch (error) {
return {
status: 'error',
error: createErrorObject(error.message, error.statusCode || ERROR_CODES.UNKNOWN_ERROR, error)
};
}
}
export async function getAuthorFilters(identifier) {
try {
const resolved = await resolveIdentifier(identifier);
const apiResponse = await fetchApi('pr_read_avtor_prozv_filter', { for_user_id: resolved.authorId });
const filtersData = {
rubrics: (apiResponse.razd || []).map(r => ({
id: parseInt(r.id, 10),
name: r.razd_name,
count: parseInt(r.cnt, 10)
})),
dates: (apiResponse.year_month || []).map(d => ({
year: parseInt(d.year, 10),
month: parseInt(d.month, 10),
count: parseInt(d.cnt, 10)
}))
};
return { status: 'success', data: filtersData };
} catch (error) {
const code = error.statusCode || 500;
return { status: 'error', error: createErrorObject(error.message, code) };
}
}
function parseHomepageAuthors(html, selector) {
const $ = cheerio.load(html);
const authors = [];
$(selector).find('.friends-window__friend-card').each((_, el) => {
const link = $(el).find('a');
const href = link.attr('href');
if (!href) return;
const badge = $(el).find('.u-badge');
let avatarUrl = null;
const style = $(el).find('.avatarimg').attr('style');
if (style) {
const match = style.match(/url\(([^)]+)\)/);
if (match && match[1]) {
let raw = match[1].replace(/['"]/g, '');
avatarUrl = raw.startsWith('/') ? `${BASE_URL}${raw}` : raw;
}
}
authors.push({
username: $(el).find('.friends-window__fname').text().trim(),
profileUrl: `${BASE_URL}${href}`,
avatarUrl: avatarUrl,
poemsCount: parseInt(badge.text().trim(), 10) || 0,
rating: parseInt(badge.text().trim(), 10) || 0,
});
});
return authors;
}
export async function getRecommendedAuthors() {
try {
const html = await fetchHtml(BASE_URL);
return { status: 'success', data: parseHomepageAuthors(html, '.card-recomended') };
} catch (e) { return { status: 'error', error: createErrorObject(e.message, 500) }; }
}
export async function getWeeklyRatedAuthors() {
try {
const html = await fetchHtml(BASE_URL);
return { status: 'success', data: parseHomepageAuthors(html, '.card-week-rating') };
} catch (e) { return { status: 'error', error: createErrorObject(e.message, 500) }; }
}
export async function getActiveAuthors() {
try {
const html = await fetchHtml(BASE_URL);
return { status: 'success', data: parseHomepageAuthors(html, '.card-active-avtors') };
} catch (e) { return { status: 'error', error: createErrorObject(e.message, 500) }; }
}
export async function getPromoPoems() {
try {
const html = await fetchHtml(BASE_URL);
const $ = cheerio.load(html);
const poems = [];
$('.card-recomended-proizv .border-bottom').each((_, el) => {
const link = $(el).find('span.link');
const id = parseInt(link.attr('data-proizvid'), 10);
const authorDiv = $(el).find('.small.text-right');
const authorUsername = authorDiv.text().trim();
const authorLinkElement = authorDiv.find('a');
let authorProfileUrl = `${BASE_URL}/avtor/${encodeURIComponent(authorUsername)}`;
if (authorLinkElement.length > 0) {
const href = authorLinkElement.attr('href');
if (href) authorProfileUrl = href.startsWith('http') ? href : `${BASE_URL}${href}`;
}
// Парсинг лайков и комментариев (исправлено)
let rating = null;
let commentsCount = null;
$(el).find('span.text-nowrap.small').each((_, span) => {
const icon = $(span).find('i');
const text = $(span).text().trim();
const value = parseInt(text, 10);
if (icon.hasClass('fa-heart-o') || icon.hasClass('fa-heart')) {
rating = isNaN(value) ? 0 : value;
} else if (icon.hasClass('fa-comments-o')) {
commentsCount = isNaN(value) ? 0 : value;
}
});
poems.push({
id: id,
title: link.text().trim(),
authorUsername: authorUsername,
authorProfileUrl: authorProfileUrl,
rating: rating,
commentsCount: commentsCount,
url: `${BASE_URL}/proizv/${id}`
});
});
return { status: 'success', data: poems };
} catch (e) { return { status: 'error', error: createErrorObject(e.message, 500) }; }
}