UNPKG

@langchain/community

Version:
22 lines (21 loc) 1 kB
import { expect, test } from "@jest/globals"; import { CheerioWebBaseLoader } from "../web/cheerio.js"; test("Test cheerio web scraper loader", async () => { const loader = new CheerioWebBaseLoader("https://news.ycombinator.com/item?id=34817881"); await loader.load(); }); test("Test cheerio web scraper loader with selector", async () => { const selectH1 = "h1"; const loader = new CheerioWebBaseLoader("https://about.google/commitments/", { selector: selectH1, }); const doc = await loader.load(); expect(doc[0].pageContent.trim()).toBe("Committed to significantly improving the lives of as many people as possible."); }); test("Test cheerio web scraper loader with textDecoder", async () => { const loader = new CheerioWebBaseLoader("https://corp.163.com/gb/about/management.html", { textDecoder: new TextDecoder("gbk"), }); const doc = await loader.load(); expect(doc[0].pageContent.trim()).toEqual(expect.stringContaining("网易")); });