rsshub
Version:
Make RSS Great Again!
169 lines (149 loc) • 5.07 kB
text/typescript
// import cache from '@/utils/cache';
/* eslint-disable unicorn/no-empty-file */
/* Removed due to news.gdut.edu.cn no longer exists.
import got from '@/utils/got';
import { load } from 'cheerio';
const site = 'http://news.gdut.edu.cn';
async function getCookie() {
const login = '/UserLogin.aspx';
// 获取登录页面
const loginResp = await got({
method: 'get',
url: site + login,
});
// 获取cookie
let cookie = '';
if (loginResp.headers['set-cookie'] !== undefined) {
loginResp.headers['set-cookie'].forEach((element) => {
cookie += element.split(';')[0] + ';';
});
}
const postdata = {
ctl00$ContentPlaceHolder1$userEmail: 'gdutnews',
ctl00$ContentPlaceHolder1$userPassWord: 'newsgdut',
ctl00$ContentPlaceHolder1$CheckBox1: 'on',
ctl00$ContentPlaceHolder1$Button1: '登录',
};
const $ = load(loginResp.data);
$('input[type=hidden]').each((index, element) => {
postdata[$(element).attr('name')] = $(element).attr('value');
});
// 登录
await got({
method: 'post',
url: site + login,
headers: {
Cookie: cookie,
Referer: site + login,
},
form: postdata,
}).catch((e) => {
if (e.statusCode === 302) {
if (/UserLogin/.test(e.headers.location)) {
throw new Error('新闻网登录失败');
}
} else {
throw e;
}
});
return cookie;
}
export default async (ctx) => {
const page = '/ArticleList.aspx?category=4';
// 缓存cookie
let cookie = await cache.get(site + page);
if (!cookie) {
cookie = await getCookie();
cache.set(site + page, cookie);
}
let pageResp = await got({
method: 'get',
url: site + page,
headers: {
Cookie: cookie,
},
}).catch(async (e) => {
// cookie 失效了
if (e.statusCode === 302) {
if (/UserLogin/.test(e.headers.location)) {
cookie = await getCookie();
cache.set(site + page, cookie);
pageResp = await got({
method: 'get',
url: site + page,
headers: {
Cookie: cookie,
},
});
} else if (/waf_verify/.test(e.headers.location)) {
throw new Error('访问量过大,触发验证码');
} else {
throw e;
}
} else {
throw e;
}
});
// 解析列表数据
const articleList = [];
const $ = load(pageResp.data);
$('#ContentPlaceHolder1_ListView1_ItemPlaceHolderContainer p a').each((index, element) => {
let url = $(element).attr('href');
if (url.startsWith('.')) {
url = url.substr(1);
}
const title = $(element).attr('title');
articleList.push({ title, link: site + url });
});
// 根据列表数据查找对应内容
const out = await Promise.all(
articleList.map(async (data) => {
const link = data.link;
const cacheIn = await cache.get(link);
if (cacheIn) {
return Promise.resolve(JSON.parse(cacheIn));
}
// 获取数据
const response = await got({
method: 'get',
url: link,
headers: {
Cookie: cookie,
},
});
const $ = load(response.data);
// 文字内容
const content = $('#articleBody').html();
let offset = content.indexOf('</center>'); // 删除标题
let description = content.substr(offset + 9).trim();
// 删除“单 位:XXXXXX”
const startIndex = description.indexOf('单 位:') + 4;
if (startIndex > 0 && startIndex < 50) {
offset = description.indexOf('<br>', startIndex) + 4;
description = description.substr(offset).trim();
}
// 获取文章数据
const articleinfos = $('.articleinfos').text();
const date = /\[发布日期:(.+?)\]/.exec(articleinfos)[1];
const author = /\[所属部门:(.+?)\]/.exec(articleinfos)[1];
const category = /\[文章分类:(.+?)\]/.exec(articleinfos)[1].split(' ');
const single = {
title: data.title,
link,
description,
pubDate: new Date(date).toUTCString(),
author,
category,
};
// 将文章结果缓存
cache.set(link, JSON.stringify(single));
return Promise.resolve(single);
})
);
return ({
title: `广东工业大学校内新闻网`,
link: 'http://news.gdut.edu.cn',
item: out.length > 0 ? out : articleList,
});
};
*/