UNPKG

website-validator

Version:
656 lines (655 loc) 17.3 kB
import { DeepReadonly } from "ts-essentials"; import { Pool } from "./worker-runner.js"; import Ajv from "ajv"; export declare const log: import("util").DebugLogger; export type FileFetchResult = { url: string; headers: { [name: string]: string; }; status: number; data: { path: string; mtime: number; } | null; }; export type FoundPageFetchResult = { url: FileFetchResult["url"]; headers: FileFetchResult["headers"]; data: NonNullable<FileFetchResult["data"]>; status: FileFetchResult["status"]; }; export declare const getRedirect: (res: FoundPageFetchResult) => Promise<string | undefined>; export declare const toCanonical: (baseUrl: string, indexName: string) => (url: string) => string; export declare const isInternalLink: (baseUrl: string) => (url: string) => boolean; export type UrlRole = { type: "document"; } | { type: "stylesheet"; } | { type: "asset"; } | { type: "sitemap"; } | { type: "robotstxt"; } | { type: "rss"; } | { type: "atom"; } | { type: "json"; extractConfigs: { jmespath: string; asserts: Assertion[]; role: UrlRole; }[]; }; type AssertImage = { type: "image"; }; type AssertVideo = { type: "video"; }; type AssertFont = { type: "font"; }; type AssertImageSize = { type: "imageSize"; width: number; height: number; }; type AssertContentType = { type: "content-type"; contentType: readonly string[]; }; type AssertPermanent = { type: "permanent"; }; type AssertDocument = { type: "document"; }; export type Assertion = AssertImage | AssertVideo | AssertFont | AssertImageSize | AssertContentType | AssertPermanent | AssertDocument; export type EpubcheckError = { ID: string; severity: string; message: string; locations: Array<{ path: string; line: number; column: number; }>; }; export type VnuReportedError = { type: "error"; subtype?: "fatal"; message: string; extract: string; firstLine?: number; lastLine: number; firstColumn: number; lastColumn?: number; hiliteStart?: number; hiliteLength?: number; } | { type: "info"; subtype?: "warning"; message: string; extract: string; firstLine?: number; lastLine: number; firstColumn: number; lastColumn?: number; hiliteStart?: number; hiliteLength?: number; }; export type VnuResult = { messages: Array<{ type: "non-document-error"; message: string; } | VnuReportedError>; }; export type LinkLocation = { type: "html"; element: { outerHTML: string; selector: string; }; } | { type: "robotssitemap"; index: number; } | { type: "sitemaptxt"; sitemaplocation: { url: string; } | { extrasitemapIndex: number; }; index: number; } | { type: "sitemapxml"; sitemaplocation: { url: string; } | { extrasitemapIndex: number; }; urlsetIndex: number; urlIndex: number; } | { type: "rss"; rssurl: string; channelIndex: number; linkIndex: number; } | { type: "atom"; atomurl: string; entryIndex: number; linkIndex: number; } | { type: "json"; jsonurl: string; jmespath: string; index: number; } | { type: "css"; position: string; target: string; } | { type: "extraurl"; index: number; } | { type: "redirect"; }; export type LinkErrorTypes = LinkError["type"]; type LinkError = { type: "TARGET_NOT_FOUND"; location: { url: string; location: LinkLocation; }; } | { type: "HASH_POINTS_TO_NON_DOCUMENT"; location: { url: string; location: LinkLocation; }; } | { type: "HASH_TARGET_NOT_FOUND"; location: { url: string; location: LinkLocation; }; } | { type: "LINK_POINTS_TO_NON_DOCUMENT"; location: { url: string; location: LinkLocation; }; } | { type: "CONTENT_TYPE_MISMATCH"; expectedContentTypes: string[]; actualContentType: string; location: { url: string; location: LinkLocation; }; } | { type: "REDIRECT_CHAIN"; targetUrl: string; location: { url: string; location: LinkLocation; }; }; type NotFoundError = { type: "NOT_FOUND"; location: { url: string; location: { type: "fetchBase"; index: number; }; }; }; type DocumentErrors = { type: "JSON_LD_UNPARSEABLE"; location: { url: string; location: { outerHTML: string; selector: string; }; }; } | { type: "VNU"; object: VnuReportedError; location: { url: string; }; } | { type: "EPUBCHECK"; object: EpubcheckError; location: { url: string; }; } | { type: "PDF_CAN_NOT_BE_PARSED"; message: string; location: { url: string; }; } | { type: "JSON_FILE_UNPARSEABLE"; location: { url: string; }; } | { type: "XML_FILE_UNPARSEABLE"; location: { url: string; }; } | { type: "MULTIPLE_CANONICAL_LINKS"; canonicalLinks: Array<{ outerHTML: string; selector: string; }>; } | { type: "NON_REDIRECT_DIFFERENT_CANONICAL"; canonicalLink: string; location: { url: string; }; } | { type: "REDIRECT_DIFFERENT_CANONICAL"; redirectTarget: string; canonicalTarget: string; } | { type: "IMG_SRC_INVALID"; location: { url: string; location: { outerHTML: string; selector: string; }; }; src: { url: string; } & ({ external: false; width: number; height: number; } | { external: true; }) | undefined; srcset: Array<{ url: string; descriptor: { density: number; } | { width: number; }; } & ({ external: false; width: number; height: number; } | { external: true; })> | undefined; sizes: string | undefined; }; type AdditionalValidatorError = { type: "JSON_DOES_NOT_MATCH_SCHEMA"; result: NonNullable<ReturnType<InstanceType<typeof Ajv.default>["compile"]>["errors"]>[number]; schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; url: string; } | { type: "JSON_LD_DOES_NOT_MATCH_SCHEMA"; filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; result: NonNullable<ReturnType<InstanceType<typeof Ajv.default>["compile"]>["errors"]>[number]; schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; url: string; } | { type: "JSON_LD_DOES_NOT_MATCH_OCCURRENCE_REQUIREMENT"; filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; minOccurrence: number | undefined; maxOccurrence: number | undefined; actualOccurrence: number; url: string; } | { type: "ADDITIONAL_VALIDATOR_MATCH_NUMBER_OUTSIDE_EXPECTED_RANGE"; minMatches: number | undefined; maxMatches: number | undefined; actualMatches: number; urlPattern: string; }; export type ValidationResultType = DeepReadonly<LinkError | DocumentErrors | NotFoundError | { type: "ROBOTS_TXT_HOST_INVALID"; expectedHost: string; actualHost: string; } | { type: "ROBOTS_TXT_SITEMAP_INVALID"; sitemapUrl: string; } | { type: "SITEMAP_LINK_INVALID"; sitemapUrl: string; url: string; } | AdditionalValidatorError>; type ExtraTypes = DeepReadonly<{ extraTxtSitemaps?: string[] | undefined; extraXmlSitemaps?: string[] | undefined; extraUrls?: string[] | undefined; }>; export declare const fetchFileGraph: (pool: Pool) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{ url: string; role: UrlRole; }[]>, extras: ExtraTypes) => Promise<({ url: string; role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | { readonly type: "json"; readonly extractConfigs: readonly { readonly jmespath: string; readonly asserts: readonly ({ readonly type: "image"; } | { readonly type: "video"; } | { readonly type: "font"; } | { readonly type: "imageSize"; readonly width: number; readonly height: number; } | { readonly type: "content-type"; readonly contentType: readonly string[]; } | { readonly type: "permanent"; } | { readonly type: "document"; })[]; readonly role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | /*elided*/ any; }[]; }; res: FoundPageFetchResult; links: readonly { readonly url: string; readonly role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | { readonly type: "json"; readonly extractConfigs: readonly { readonly jmespath: string; readonly asserts: readonly ({ readonly type: "image"; } | { readonly type: "video"; } | { readonly type: "font"; } | { readonly type: "imageSize"; readonly width: number; readonly height: number; } | { readonly type: "content-type"; readonly contentType: readonly string[]; } | { readonly type: "permanent"; } | { readonly type: "document"; })[]; readonly role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | /*elided*/ any; }[]; }; readonly asserts: readonly ({ readonly type: "image"; } | { readonly type: "video"; } | { readonly type: "font"; } | { readonly type: "imageSize"; readonly width: number; readonly height: number; } | { readonly type: "content-type"; readonly contentType: readonly string[]; } | { readonly type: "permanent"; } | { readonly type: "document"; })[]; readonly location: { readonly type: "html"; readonly element: { readonly outerHTML: string; readonly selector: string; }; } | { readonly type: "robotssitemap"; readonly index: number; } | { readonly type: "sitemaptxt"; readonly sitemaplocation: { readonly url: string; } | { readonly extrasitemapIndex: number; }; readonly index: number; } | { readonly type: "sitemapxml"; readonly sitemaplocation: { readonly url: string; } | { readonly extrasitemapIndex: number; }; readonly urlsetIndex: number; readonly urlIndex: number; } | { readonly type: "rss"; readonly rssurl: string; readonly channelIndex: number; readonly linkIndex: number; } | { readonly type: "atom"; readonly atomurl: string; readonly entryIndex: number; readonly linkIndex: number; } | { readonly type: "json"; readonly jsonurl: string; readonly jmespath: string; readonly index: number; } | { readonly type: "css"; readonly position: string; readonly target: string; } | { readonly type: "extraurl"; readonly index: number; } | { readonly type: "redirect"; }; }[]; } | { url: string; role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | { readonly type: "json"; readonly extractConfigs: readonly { readonly jmespath: string; readonly asserts: readonly ({ readonly type: "image"; } | { readonly type: "video"; } | { readonly type: "font"; } | { readonly type: "imageSize"; readonly width: number; readonly height: number; } | { readonly type: "content-type"; readonly contentType: readonly string[]; } | { readonly type: "permanent"; } | { readonly type: "document"; })[]; readonly role: { readonly type: "document"; } | { readonly type: "stylesheet"; } | { readonly type: "asset"; } | { readonly type: "sitemap"; } | { readonly type: "robotstxt"; } | { readonly type: "rss"; } | { readonly type: "atom"; } | /*elided*/ any; }[]; }; res: { readonly url: string; readonly headers: { readonly [x: string]: string; }; readonly status: number; readonly data: { readonly path: string; readonly mtime: number; } | null; }; links: null; })[]>; type TargetConfig = { dir: string; indexName?: string; responseMeta?: (path: string) => { headers: { [name: string]: string; }; status: number; }; }; type JSONAdditionalValidator = { type: "json"; schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; }; type RequireAtLeastOne<T, R extends keyof T = keyof T> = Omit<T, R> & { [P in R]: Required<Pick<T, P>> & Partial<Omit<T, P>>; }[R]; type JSONLDAdditionalValidator = { type: "json-ld"; filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; } & RequireAtLeastOne<{ minOccurrence: number; maxOccurrence: number; schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0]; }>; export type AdditionalValidator = { urlPattern: RegExp; minMatches?: number; maxMatches?: number; config: JSONAdditionalValidator | JSONLDAdditionalValidator; }; export declare const validate: (options?: { concurrency?: number; }) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{ url: string; role: UrlRole; }[]>, extras: ExtraTypes, additionalValidators: DeepReadonly<AdditionalValidator[]>) => Promise<Array<ValidationResultType>>; export declare const compareVersions: (options?: { concurrency?: number; }) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{ url: string; role: UrlRole; }[]>, extras: ExtraTypes) => (originalBaseUrl: string, originalTargetConfig: TargetConfig) => (originalFetchBases: DeepReadonly<{ url: string; role: UrlRole; }[]>, originalExtras: ExtraTypes) => Promise<{ removedPermanentUrls: DeepReadonly<{ url: string; location: LinkLocation; }[]>; nonForwardCompatibleJsonLinks: DeepReadonly<{ url: string; location: LinkLocation; }[]>; feedGuidsChanged: DeepReadonly<{ url: string; feedUrl: string; originalGuid: string; newGuid: string; }[]>; }>; export {};