UNPKG

website-validator

Version:
222 lines 9.42 kB
import fs from "node:fs/promises"; import path from "node:path"; import postcss from "postcss"; import os from "node:os"; import { strict as assert } from "node:assert"; import { withFileCache } from "with-file-cache"; import crypto from "node:crypto"; import { JSDOM } from "jsdom"; import { execFile } from "node:child_process"; import util from "node:util"; import vnu from "vnu-jar"; import * as epubcheck from "epubcheck-static"; import sharp from "sharp"; import { getDocument } from "pdfjs-dist"; export const sha = (x) => crypto.createHash("sha256").update(x).digest("hex"); export const addFileCache = withFileCache({ baseKey: async () => { return (await Promise.all([ (async () => { const files = [ "package-lock.json", ]; return (await Promise.all(files.map((file) => fs.readFile(file).then((contents) => sha(contents))))).join(";"); })(), (async () => { const javaVersion = (await util.promisify(execFile)("java", ["--version"])).stdout; return sha(javaVersion); })(), ])).reduce((memo, val) => sha(memo + ";" + val), ""); } }); export const withTempDir = async (fn) => { const dir = await fs.mkdtemp(await fs.realpath(os.tmpdir()) + path.sep); try { return await fn(dir); } finally { fs.rm(dir, { recursive: true }); } }; export const getElementLocation = (element) => { const getElementPath = (element) => { if (element.parentElement === null) { return [element]; } else { return [...getElementPath(element.parentElement), element]; } }; return `${getElementPath(element).map((e) => { if (e.parentElement === null) { return e.tagName.toLowerCase(); } else { function getElIndex(el) { let i = 0; for (i = 0; el = el.previousElementSibling; i++) ; return i; } return `${e.tagName.toLowerCase()}:nth-child(${getElIndex(e) + 1})`; } }).join(" > ")} - ${element.outerHTML}`; }; export const extractAllUrlsFromCss = async (css) => { const result = []; const plugin = () => { return { postcssPlugin: "test", Declaration: (decl) => { // TODO: also extract the optional format() // see: https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face/src const urlPattern = /url\((?<n>([^\)]|(?<=\\)\))*)\)/g; if (decl.value && decl.value.match(urlPattern)) { const urls = [...decl.value.match(urlPattern)]; urls.filter((url) => !url.startsWith("url(\"data:") && !url.startsWith("url(data:")).map((url) => { const getPath = (decl) => { if (decl) { const asString = (decl) => { if (decl.type === "atrule") { return "@" + decl.name; } else if (decl.type === "rule") { return decl.selector; } else { return decl.type; } }; return [...getPath(decl.parent), asString(decl)]; } else { return []; } }; const position = [...getPath(decl.parent), decl.prop].join(" / "); const parent = decl.parent === undefined ? undefined : (decl.parent.type === "atrule" ? "@" + decl.parent.name : (decl.parent.type === "rule" ? decl.parent.selector : (undefined))); const matchedUrl = (() => { const res = url.match(/^url\((?<data>.*)\)$/); assert(res, `could not parse css url: ${url} , decl.value: ${decl.value}`); const resString = res.groups["data"]; if (resString.startsWith("\"") && resString.endsWith("\"")) { if (resString.length === 1) { throw new Error("Whops"); } return resString.substring(1, resString.length - 1); } else { return resString; } })(); result.push({ url: matchedUrl, parent, prop: decl.prop, position }); }); } } }; }; plugin.postcss = true; await postcss([plugin]).process(css, { from: undefined }); return result; }; export const getInterestingPageElements = addFileCache(async (page) => { const dom = new JSDOM(await fs.readFile(page.path, "utf8")); const ids = [...dom.window.document.querySelectorAll("*[id]")].map((elem) => ({ outerHTML: elem.outerHTML, id: elem.id, selector: getElementLocation(elem), })); const elementsWithTageName = (tagName) => { return [...dom.window.document.querySelectorAll(tagName)] .map((tag) => ({ attrs: Object.fromEntries(tag.getAttributeNames().map((name) => [name, tag.getAttribute(name)])), outerHTML: tag.outerHTML, selector: getElementLocation(tag), innerHTML: tag.innerHTML, })); }; return { ids, tagCollections: { img: elementsWithTageName("img"), link: elementsWithTageName("link"), meta: elementsWithTageName("meta"), script: elementsWithTageName("script"), video: elementsWithTageName("video"), a: elementsWithTageName("a"), } }; }, { calcCacheKey: (page) => ["getInterestingPageElements_2", page.path, page.mtime] }); export const vnuValidates = async (files) => { const byType = files.reduce((memo, file) => { if (memo[file.type]) { memo[file.type].push(file.data); return memo; } else { memo[file.type] = [file.data]; return memo; } }, {}); return (await Promise.all(Object.entries(byType).map(async ([type, datas]) => { // TODO: streaming result const { stdout } = await util.promisify(execFile)("java", ["-jar", vnu, `--${type}`, "--exit-zero-always", "--stdout", "--format", "json", ...datas.map(({ path }) => path)], { maxBuffer: 100 * 1024 * 1024 }); const out = JSON.parse(stdout); if (out.messages.some(({ type }) => type === "non-document-error")) { throw new Error(JSON.stringify(out.messages, undefined, 4)); } return out.messages; }))).flat(1).reduce((memo, message) => { assert(message.url.startsWith("file:")); const absolutePath = message.url.substring("file:".length); if (memo[absolutePath]) { memo[absolutePath].push(message); return memo; } else { throw new Error("Result path is not in files path"); } }, Object.fromEntries(files.map(({ data }) => [data.path, []]))); }; export const validateEpub = addFileCache(async (data) => { return withTempDir(async (dir) => { const outPath = path.join(dir, "out"); try { await util.promisify(execFile)("java", ["-jar", epubcheck.path, "--json", outPath, data.path]); // move to catch with a dummy error // so the read is not duplicated throw new Error("move to catch"); } catch (e) { // epubcheck will exit with 1 if it found errors // so try to read and parse the output // and only throw an error if that fails try { const result = await fs.readFile(outPath, "utf8"); return JSON.parse(result).messages; } catch (e) { console.error(e); throw e; } } }); }, { calcCacheKey: (data) => ["epubcheck_validate_1", data.path, data.mtime] }); export const validatePdf = addFileCache(async (data) => { const pdf = await fs.readFile(data.path); try { const document = await getDocument({ data: new Uint8Array(pdf, pdf.byteOffset, pdf.byteLength), stopAtErrors: true }).promise; const page = await document.getPage(1); await page.getTextContent(); } catch (e) { return [e.message]; } return []; }, { calcCacheKey: (data) => ["validatepdf_1", data.path, data.mtime] }); export const getImageDimensions = addFileCache(async (data) => { const metadata = await sharp(await fs.readFile(data.path)).metadata(); return { width: metadata.width, height: metadata.height, }; }, { calcCacheKey: (data) => ["getImageDimensions_1", data.path, data.mtime] }); //# sourceMappingURL=utils.js.map