website-validator
Version:
Comprehensive website validation
656 lines (655 loc) • 17.3 kB
TypeScript
import { DeepReadonly } from "ts-essentials";
import { Pool } from "./worker-runner.js";
import Ajv from "ajv";
export declare const log: import("util").DebugLogger;
export type FileFetchResult = {
url: string;
headers: {
[name: string]: string;
};
status: number;
data: {
path: string;
mtime: number;
} | null;
};
export type FoundPageFetchResult = {
url: FileFetchResult["url"];
headers: FileFetchResult["headers"];
data: NonNullable<FileFetchResult["data"]>;
status: FileFetchResult["status"];
};
export declare const getRedirect: (res: FoundPageFetchResult) => Promise<string | undefined>;
export declare const toCanonical: (baseUrl: string, indexName: string) => (url: string) => string;
export declare const isInternalLink: (baseUrl: string) => (url: string) => boolean;
export type UrlRole = {
type: "document";
} | {
type: "stylesheet";
} | {
type: "asset";
} | {
type: "sitemap";
} | {
type: "robotstxt";
} | {
type: "rss";
} | {
type: "atom";
} | {
type: "json";
extractConfigs: {
jmespath: string;
asserts: Assertion[];
role: UrlRole;
}[];
};
type AssertImage = {
type: "image";
};
type AssertVideo = {
type: "video";
};
type AssertFont = {
type: "font";
};
type AssertImageSize = {
type: "imageSize";
width: number;
height: number;
};
type AssertContentType = {
type: "content-type";
contentType: readonly string[];
};
type AssertPermanent = {
type: "permanent";
};
type AssertDocument = {
type: "document";
};
export type Assertion = AssertImage | AssertVideo | AssertFont | AssertImageSize | AssertContentType | AssertPermanent | AssertDocument;
export type EpubcheckError = {
ID: string;
severity: string;
message: string;
locations: Array<{
path: string;
line: number;
column: number;
}>;
};
export type VnuReportedError = {
type: "error";
subtype?: "fatal";
message: string;
extract: string;
firstLine?: number;
lastLine: number;
firstColumn: number;
lastColumn?: number;
hiliteStart?: number;
hiliteLength?: number;
} | {
type: "info";
subtype?: "warning";
message: string;
extract: string;
firstLine?: number;
lastLine: number;
firstColumn: number;
lastColumn?: number;
hiliteStart?: number;
hiliteLength?: number;
};
export type VnuResult = {
messages: Array<{
type: "non-document-error";
message: string;
} | VnuReportedError>;
};
export type LinkLocation = {
type: "html";
element: {
outerHTML: string;
selector: string;
};
} | {
type: "robotssitemap";
index: number;
} | {
type: "sitemaptxt";
sitemaplocation: {
url: string;
} | {
extrasitemapIndex: number;
};
index: number;
} | {
type: "sitemapxml";
sitemaplocation: {
url: string;
} | {
extrasitemapIndex: number;
};
urlsetIndex: number;
urlIndex: number;
} | {
type: "rss";
rssurl: string;
channelIndex: number;
linkIndex: number;
} | {
type: "atom";
atomurl: string;
entryIndex: number;
linkIndex: number;
} | {
type: "json";
jsonurl: string;
jmespath: string;
index: number;
} | {
type: "css";
position: string;
target: string;
} | {
type: "extraurl";
index: number;
} | {
type: "redirect";
};
export type LinkErrorTypes = LinkError["type"];
type LinkError = {
type: "TARGET_NOT_FOUND";
location: {
url: string;
location: LinkLocation;
};
} | {
type: "HASH_POINTS_TO_NON_DOCUMENT";
location: {
url: string;
location: LinkLocation;
};
} | {
type: "HASH_TARGET_NOT_FOUND";
location: {
url: string;
location: LinkLocation;
};
} | {
type: "LINK_POINTS_TO_NON_DOCUMENT";
location: {
url: string;
location: LinkLocation;
};
} | {
type: "CONTENT_TYPE_MISMATCH";
expectedContentTypes: string[];
actualContentType: string;
location: {
url: string;
location: LinkLocation;
};
} | {
type: "REDIRECT_CHAIN";
targetUrl: string;
location: {
url: string;
location: LinkLocation;
};
};
type NotFoundError = {
type: "NOT_FOUND";
location: {
url: string;
location: {
type: "fetchBase";
index: number;
};
};
};
type DocumentErrors = {
type: "JSON_LD_UNPARSEABLE";
location: {
url: string;
location: {
outerHTML: string;
selector: string;
};
};
} | {
type: "VNU";
object: VnuReportedError;
location: {
url: string;
};
} | {
type: "EPUBCHECK";
object: EpubcheckError;
location: {
url: string;
};
} | {
type: "PDF_CAN_NOT_BE_PARSED";
message: string;
location: {
url: string;
};
} | {
type: "JSON_FILE_UNPARSEABLE";
location: {
url: string;
};
} | {
type: "XML_FILE_UNPARSEABLE";
location: {
url: string;
};
} | {
type: "MULTIPLE_CANONICAL_LINKS";
canonicalLinks: Array<{
outerHTML: string;
selector: string;
}>;
} | {
type: "NON_REDIRECT_DIFFERENT_CANONICAL";
canonicalLink: string;
location: {
url: string;
};
} | {
type: "REDIRECT_DIFFERENT_CANONICAL";
redirectTarget: string;
canonicalTarget: string;
} | {
type: "IMG_SRC_INVALID";
location: {
url: string;
location: {
outerHTML: string;
selector: string;
};
};
src: {
url: string;
} & ({
external: false;
width: number;
height: number;
} | {
external: true;
}) | undefined;
srcset: Array<{
url: string;
descriptor: {
density: number;
} | {
width: number;
};
} & ({
external: false;
width: number;
height: number;
} | {
external: true;
})> | undefined;
sizes: string | undefined;
};
type AdditionalValidatorError = {
type: "JSON_DOES_NOT_MATCH_SCHEMA";
result: NonNullable<ReturnType<InstanceType<typeof Ajv.default>["compile"]>["errors"]>[number];
schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
url: string;
} | {
type: "JSON_LD_DOES_NOT_MATCH_SCHEMA";
filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
result: NonNullable<ReturnType<InstanceType<typeof Ajv.default>["compile"]>["errors"]>[number];
schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
url: string;
} | {
type: "JSON_LD_DOES_NOT_MATCH_OCCURRENCE_REQUIREMENT";
filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
minOccurrence: number | undefined;
maxOccurrence: number | undefined;
actualOccurrence: number;
url: string;
} | {
type: "ADDITIONAL_VALIDATOR_MATCH_NUMBER_OUTSIDE_EXPECTED_RANGE";
minMatches: number | undefined;
maxMatches: number | undefined;
actualMatches: number;
urlPattern: string;
};
export type ValidationResultType = DeepReadonly<LinkError | DocumentErrors | NotFoundError | {
type: "ROBOTS_TXT_HOST_INVALID";
expectedHost: string;
actualHost: string;
} | {
type: "ROBOTS_TXT_SITEMAP_INVALID";
sitemapUrl: string;
} | {
type: "SITEMAP_LINK_INVALID";
sitemapUrl: string;
url: string;
} | AdditionalValidatorError>;
type ExtraTypes = DeepReadonly<{
extraTxtSitemaps?: string[] | undefined;
extraXmlSitemaps?: string[] | undefined;
extraUrls?: string[] | undefined;
}>;
export declare const fetchFileGraph: (pool: Pool) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{
url: string;
role: UrlRole;
}[]>, extras: ExtraTypes) => Promise<({
url: string;
role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | {
readonly type: "json";
readonly extractConfigs: readonly {
readonly jmespath: string;
readonly asserts: readonly ({
readonly type: "image";
} | {
readonly type: "video";
} | {
readonly type: "font";
} | {
readonly type: "imageSize";
readonly width: number;
readonly height: number;
} | {
readonly type: "content-type";
readonly contentType: readonly string[];
} | {
readonly type: "permanent";
} | {
readonly type: "document";
})[];
readonly role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | /*elided*/ any;
}[];
};
res: FoundPageFetchResult;
links: readonly {
readonly url: string;
readonly role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | {
readonly type: "json";
readonly extractConfigs: readonly {
readonly jmespath: string;
readonly asserts: readonly ({
readonly type: "image";
} | {
readonly type: "video";
} | {
readonly type: "font";
} | {
readonly type: "imageSize";
readonly width: number;
readonly height: number;
} | {
readonly type: "content-type";
readonly contentType: readonly string[];
} | {
readonly type: "permanent";
} | {
readonly type: "document";
})[];
readonly role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | /*elided*/ any;
}[];
};
readonly asserts: readonly ({
readonly type: "image";
} | {
readonly type: "video";
} | {
readonly type: "font";
} | {
readonly type: "imageSize";
readonly width: number;
readonly height: number;
} | {
readonly type: "content-type";
readonly contentType: readonly string[];
} | {
readonly type: "permanent";
} | {
readonly type: "document";
})[];
readonly location: {
readonly type: "html";
readonly element: {
readonly outerHTML: string;
readonly selector: string;
};
} | {
readonly type: "robotssitemap";
readonly index: number;
} | {
readonly type: "sitemaptxt";
readonly sitemaplocation: {
readonly url: string;
} | {
readonly extrasitemapIndex: number;
};
readonly index: number;
} | {
readonly type: "sitemapxml";
readonly sitemaplocation: {
readonly url: string;
} | {
readonly extrasitemapIndex: number;
};
readonly urlsetIndex: number;
readonly urlIndex: number;
} | {
readonly type: "rss";
readonly rssurl: string;
readonly channelIndex: number;
readonly linkIndex: number;
} | {
readonly type: "atom";
readonly atomurl: string;
readonly entryIndex: number;
readonly linkIndex: number;
} | {
readonly type: "json";
readonly jsonurl: string;
readonly jmespath: string;
readonly index: number;
} | {
readonly type: "css";
readonly position: string;
readonly target: string;
} | {
readonly type: "extraurl";
readonly index: number;
} | {
readonly type: "redirect";
};
}[];
} | {
url: string;
role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | {
readonly type: "json";
readonly extractConfigs: readonly {
readonly jmespath: string;
readonly asserts: readonly ({
readonly type: "image";
} | {
readonly type: "video";
} | {
readonly type: "font";
} | {
readonly type: "imageSize";
readonly width: number;
readonly height: number;
} | {
readonly type: "content-type";
readonly contentType: readonly string[];
} | {
readonly type: "permanent";
} | {
readonly type: "document";
})[];
readonly role: {
readonly type: "document";
} | {
readonly type: "stylesheet";
} | {
readonly type: "asset";
} | {
readonly type: "sitemap";
} | {
readonly type: "robotstxt";
} | {
readonly type: "rss";
} | {
readonly type: "atom";
} | /*elided*/ any;
}[];
};
res: {
readonly url: string;
readonly headers: {
readonly [x: string]: string;
};
readonly status: number;
readonly data: {
readonly path: string;
readonly mtime: number;
} | null;
};
links: null;
})[]>;
type TargetConfig = {
dir: string;
indexName?: string;
responseMeta?: (path: string) => {
headers: {
[name: string]: string;
};
status: number;
};
};
type JSONAdditionalValidator = {
type: "json";
schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
};
type RequireAtLeastOne<T, R extends keyof T = keyof T> = Omit<T, R> & {
[P in R]: Required<Pick<T, P>> & Partial<Omit<T, P>>;
}[R];
type JSONLDAdditionalValidator = {
type: "json-ld";
filter: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
} & RequireAtLeastOne<{
minOccurrence: number;
maxOccurrence: number;
schema: Parameters<InstanceType<typeof Ajv.default>["compile"]>[0];
}>;
export type AdditionalValidator = {
urlPattern: RegExp;
minMatches?: number;
maxMatches?: number;
config: JSONAdditionalValidator | JSONLDAdditionalValidator;
};
export declare const validate: (options?: {
concurrency?: number;
}) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{
url: string;
role: UrlRole;
}[]>, extras: ExtraTypes, additionalValidators: DeepReadonly<AdditionalValidator[]>) => Promise<Array<ValidationResultType>>;
export declare const compareVersions: (options?: {
concurrency?: number;
}) => (baseUrl: string, targetConfig: TargetConfig) => (fetchBases: DeepReadonly<{
url: string;
role: UrlRole;
}[]>, extras: ExtraTypes) => (originalBaseUrl: string, originalTargetConfig: TargetConfig) => (originalFetchBases: DeepReadonly<{
url: string;
role: UrlRole;
}[]>, originalExtras: ExtraTypes) => Promise<{
removedPermanentUrls: DeepReadonly<{
url: string;
location: LinkLocation;
}[]>;
nonForwardCompatibleJsonLinks: DeepReadonly<{
url: string;
location: LinkLocation;
}[]>;
feedGuidsChanged: DeepReadonly<{
url: string;
feedUrl: string;
originalGuid: string;
newGuid: string;
}[]>;
}>;
export {};