UNPKG

supercrawler

Version:

A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.

github.com/brendonboshell/supercrawler

brendonboshell/supercrawler

70 lines (60 loc) • 1.99 kB

JavaScript

var Url = require("../lib/Url"), expect = require("chai").expect; describe("Url", function () { it("returns an instance when called as a function", function () { expect(Url({ url: "https://example.com" })).to.be.an.instanceOf(Url); }); it("uses URL as the unique ID", function () { expect(new Url({ url: "https://example.com" }).getUniqueId()).to.equal("https://example.com"); }); it("accepts a string URL as the only argument", function () { expect(new Url("https://example.com").getUniqueId()).to.equal("https://example.com"); }); describe("#getStatusCode", function () { it("returns the statusCode if specified", function () { expect(new Url({ url: "https://example.com", statusCode: 201 }).getStatusCode()).to.equal(201); }); it("defaults to null if statusCode not specified", function () { expect(new Url({ url: "https://example.com" }).getStatusCode()).to.equal(null); }); }); describe("#getUrl", function () { it("returns the URL", function () { expect(new Url({ url: "https://example.com" }).getUrl()).to.equal("https://example.com"); }); }); describe("#getErrorCode", function () { it("returns the error code", function () { expect(new Url({ url: "https://example.com", errorCode: "ROBOTS_NOT_ALLOWED" }).getErrorCode()).to.equal("ROBOTS_NOT_ALLOWED"); }); }); describe("#getErrorMessage", function () { it("returns the error message", function () { expect(new Url({ url: "https://example.com", errorCode: "ROBOTS_NOT_ALLOWED", errorMessage: "A robots.txt error" }).getErrorMessage()).to.equal("A robots.txt error"); }); it("returns null if no rror message", function () { expect(new Url({ url: "https://example.com", errorCode: "ROBOTS_NOT_ALLOWED" }).getErrorMessage()).to.equal(null); }); }); });