website-validator
Version:
Comprehensive website validation
222 lines • 9.42 kB
JavaScript
import fs from "node:fs/promises";
import path from "node:path";
import postcss from "postcss";
import os from "node:os";
import { strict as assert } from "node:assert";
import { withFileCache } from "with-file-cache";
import crypto from "node:crypto";
import { JSDOM } from "jsdom";
import { execFile } from "node:child_process";
import util from "node:util";
import vnu from "vnu-jar";
import * as epubcheck from "epubcheck-static";
import sharp from "sharp";
import { getDocument } from "pdfjs-dist";
export const sha = (x) => crypto.createHash("sha256").update(x).digest("hex");
export const addFileCache = withFileCache({ baseKey: async () => {
return (await Promise.all([
(async () => {
const files = [
"package-lock.json",
];
return (await Promise.all(files.map((file) => fs.readFile(file).then((contents) => sha(contents))))).join(";");
})(),
(async () => {
const javaVersion = (await util.promisify(execFile)("java", ["--version"])).stdout;
return sha(javaVersion);
})(),
])).reduce((memo, val) => sha(memo + ";" + val), "");
} });
export const withTempDir = async (fn) => {
const dir = await fs.mkdtemp(await fs.realpath(os.tmpdir()) + path.sep);
try {
return await fn(dir);
}
finally {
fs.rm(dir, { recursive: true });
}
};
export const getElementLocation = (element) => {
const getElementPath = (element) => {
if (element.parentElement === null) {
return [element];
}
else {
return [...getElementPath(element.parentElement), element];
}
};
return `${getElementPath(element).map((e) => {
if (e.parentElement === null) {
return e.tagName.toLowerCase();
}
else {
function getElIndex(el) {
let i = 0;
for (i = 0; el = el.previousElementSibling; i++)
;
return i;
}
return `${e.tagName.toLowerCase()}:nth-child(${getElIndex(e) + 1})`;
}
}).join(" > ")} - ${element.outerHTML}`;
};
export const extractAllUrlsFromCss = async (css) => {
const result = [];
const plugin = () => {
return {
postcssPlugin: "test",
Declaration: (decl) => {
// TODO: also extract the optional format()
// see: https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face/src
const urlPattern = /url\((?<n>([^\)]|(?<=\\)\))*)\)/g;
if (decl.value && decl.value.match(urlPattern)) {
const urls = [...decl.value.match(urlPattern)];
urls.filter((url) => !url.startsWith("url(\"data:") && !url.startsWith("url(data:")).map((url) => {
const getPath = (decl) => {
if (decl) {
const asString = (decl) => {
if (decl.type === "atrule") {
return "@" + decl.name;
}
else if (decl.type === "rule") {
return decl.selector;
}
else {
return decl.type;
}
};
return [...getPath(decl.parent), asString(decl)];
}
else {
return [];
}
};
const position = [...getPath(decl.parent), decl.prop].join(" / ");
const parent = decl.parent === undefined ? undefined : (decl.parent.type === "atrule" ? "@" + decl.parent.name : (decl.parent.type === "rule" ? decl.parent.selector : (undefined)));
const matchedUrl = (() => {
const res = url.match(/^url\((?<data>.*)\)$/);
assert(res, `could not parse css url: ${url} , decl.value: ${decl.value}`);
const resString = res.groups["data"];
if (resString.startsWith("\"") && resString.endsWith("\"")) {
if (resString.length === 1) {
throw new Error("Whops");
}
return resString.substring(1, resString.length - 1);
}
else {
return resString;
}
})();
result.push({ url: matchedUrl, parent, prop: decl.prop, position });
});
}
}
};
};
plugin.postcss = true;
await postcss([plugin]).process(css, { from: undefined });
return result;
};
export const getInterestingPageElements = addFileCache(async (page) => {
const dom = new JSDOM(await fs.readFile(page.path, "utf8"));
const ids = [...dom.window.document.querySelectorAll("*[id]")].map((elem) => ({
outerHTML: elem.outerHTML,
id: elem.id,
selector: getElementLocation(elem),
}));
const elementsWithTageName = (tagName) => {
return [...dom.window.document.querySelectorAll(tagName)]
.map((tag) => ({
attrs: Object.fromEntries(tag.getAttributeNames().map((name) => [name, tag.getAttribute(name)])),
outerHTML: tag.outerHTML,
selector: getElementLocation(tag),
innerHTML: tag.innerHTML,
}));
};
return {
ids,
tagCollections: {
img: elementsWithTageName("img"),
link: elementsWithTageName("link"),
meta: elementsWithTageName("meta"),
script: elementsWithTageName("script"),
video: elementsWithTageName("video"),
a: elementsWithTageName("a"),
}
};
}, { calcCacheKey: (page) => ["getInterestingPageElements_2", page.path, page.mtime] });
export const vnuValidates = async (files) => {
const byType = files.reduce((memo, file) => {
if (memo[file.type]) {
memo[file.type].push(file.data);
return memo;
}
else {
memo[file.type] = [file.data];
return memo;
}
}, {});
return (await Promise.all(Object.entries(byType).map(async ([type, datas]) => {
// TODO: streaming result
const { stdout } = await util.promisify(execFile)("java", ["-jar", vnu, `--${type}`, "--exit-zero-always", "--stdout", "--format", "json", ...datas.map(({ path }) => path)], { maxBuffer: 100 * 1024 * 1024 });
const out = JSON.parse(stdout);
if (out.messages.some(({ type }) => type === "non-document-error")) {
throw new Error(JSON.stringify(out.messages, undefined, 4));
}
return out.messages;
}))).flat(1).reduce((memo, message) => {
assert(message.url.startsWith("file:"));
const absolutePath = message.url.substring("file:".length);
if (memo[absolutePath]) {
memo[absolutePath].push(message);
return memo;
}
else {
throw new Error("Result path is not in files path");
}
}, Object.fromEntries(files.map(({ data }) => [data.path, []])));
};
export const validateEpub = addFileCache(async (data) => {
return withTempDir(async (dir) => {
const outPath = path.join(dir, "out");
try {
await util.promisify(execFile)("java", ["-jar", epubcheck.path, "--json", outPath, data.path]);
// move to catch with a dummy error
// so the read is not duplicated
throw new Error("move to catch");
}
catch (e) {
// epubcheck will exit with 1 if it found errors
// so try to read and parse the output
// and only throw an error if that fails
try {
const result = await fs.readFile(outPath, "utf8");
return JSON.parse(result).messages;
}
catch (e) {
console.error(e);
throw e;
}
}
});
}, { calcCacheKey: (data) => ["epubcheck_validate_1", data.path, data.mtime] });
export const validatePdf = addFileCache(async (data) => {
const pdf = await fs.readFile(data.path);
try {
const document = await getDocument({ data: new Uint8Array(pdf, pdf.byteOffset, pdf.byteLength), stopAtErrors: true }).promise;
const page = await document.getPage(1);
await page.getTextContent();
}
catch (e) {
return [e.message];
}
return [];
}, { calcCacheKey: (data) => ["validatepdf_1", data.path, data.mtime] });
export const getImageDimensions = addFileCache(async (data) => {
const metadata = await sharp(await fs.readFile(data.path)).metadata();
return {
width: metadata.width,
height: metadata.height,
};
}, { calcCacheKey: (data) => ["getImageDimensions_1", data.path, data.mtime] });
//# sourceMappingURL=utils.js.map