@synstack/web
Version:
Web scraping utilities
53 lines (51 loc) • 1.47 kB
JavaScript
var __defProp = Object.defineProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
// src/web.bundle.ts
var web_bundle_exports = {};
__export(web_bundle_exports, {
fetchArticle: () => fetchArticle,
fetchJson: () => fetchJson,
fetchText: () => fetchText
});
// src/web.lib.ts
import { Readability } from "@mozilla/readability";
import { parseHTML } from "linkedom";
var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
var fetchText = (url) => fetch(url).then((response) => response.text());
var fetchArticle = async (url) => {
const content = await fetchText(url);
const doc = parseHTML(content, { url });
const reader = new Readability(doc.window.document);
const article = reader.parse();
if (!article?.content) throw new ArticleNotFoundException(url);
return {
url,
content: article.content,
title: article.title,
byline: article.byline,
siteName: article.siteName,
lang: article.lang,
publishedTime: article.publishedTime
};
};
var ArticleNotFoundException = class extends Error {
constructor(url) {
super(
`
No article found at the URL
URL: ${url}
`.trim()
);
}
};
export {
ArticleNotFoundException,
fetchArticle,
fetchJson,
fetchText,
web_bundle_exports as web
};
//# sourceMappingURL=web.index.js.map