extract-hrefs
Version:
This library is used to extract hrefs from a given HTML string.
32 lines • 810 B
JavaScript
// src/index.ts
import * as cheerio from "cheerio";
import normalizeUrl from "normalize-url";
var normalizeUrls = (links) => {
return links.map((link) => {
try {
return normalizeUrl(link, { stripWWW: false });
} catch (error) {
console.error(`Error normalizing URL: ${link}`, error);
return link;
}
});
};
var extractHrefs = (htmlString) => {
if (!htmlString || typeof htmlString !== "string") {
throw new Error("HTML string is required");
}
const $ = cheerio.load(htmlString);
const urls = [];
$("a").each((index, element) => {
const href = $(element).attr("href");
if (href) {
urls.push(href);
}
});
return normalizeUrls(urls);
};
var index_default = extractHrefs;
export {
index_default as default
};
//# sourceMappingURL=index.mjs.map