lighthouse
Version:
Automated auditing, performance metrics, and best practices for the web.
423 lines (367 loc) • 14.3 kB
JavaScript
/**
* @license
* Copyright 2017 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as statistics from './statistics.js';
/** @typedef {import('../types/lhr/audit-details').default.SnippetValue} SnippetValue */
const ELLIPSIS = '\u2026';
const NBSP = '\xa0';
const PASS_THRESHOLD = 0.9;
const RATINGS = {
PASS: {label: 'pass', minScore: PASS_THRESHOLD},
AVERAGE: {label: 'average', minScore: 0.5},
FAIL: {label: 'fail'},
ERROR: {label: 'error'},
};
// 25 most used tld plus one domains (aka public suffixes) from http archive.
// @see https://github.com/GoogleChrome/lighthouse/pull/5065#discussion_r191926212
// The canonical list is https://publicsuffix.org/learn/ but we're only using subset to conserve bytes
const listOfTlds = [
'com', 'co', 'gov', 'edu', 'ac', 'org', 'go', 'gob', 'or', 'net', 'in', 'ne', 'nic', 'gouv',
'web', 'spb', 'blog', 'jus', 'kiev', 'mil', 'wi', 'qc', 'ca', 'bel', 'on',
];
class Util {
static get RATINGS() {
return RATINGS;
}
static get PASS_THRESHOLD() {
return PASS_THRESHOLD;
}
static get MS_DISPLAY_VALUE() {
return `%10d${NBSP}ms`;
}
/**
* If LHR is older than 10.0 it will not have the `finalDisplayedUrl` property.
* Old LHRs should have the `finalUrl` property which will work fine for the report.
*
* @param {LH.Result} lhr
*/
static getFinalDisplayedUrl(lhr) {
if (lhr.finalDisplayedUrl) return lhr.finalDisplayedUrl;
if (lhr.finalUrl) return lhr.finalUrl;
throw new Error('Could not determine final displayed URL');
}
/**
* If LHR is older than 10.0 it will not have the `mainDocumentUrl` property.
* Old LHRs should have the `finalUrl` property which is the same as `mainDocumentUrl`.
*
* @param {LH.Result} lhr
*/
static getMainDocumentUrl(lhr) {
return lhr.mainDocumentUrl || lhr.finalUrl;
}
/**
* @param {LH.Result} lhr
* @return {LH.Result.FullPageScreenshot=}
*/
static getFullPageScreenshot(lhr) {
if (lhr.fullPageScreenshot) {
return lhr.fullPageScreenshot;
}
// Prior to 10.0.
const details = /** @type {LH.Result.FullPageScreenshot=} */ (
lhr.audits['full-page-screenshot']?.details);
return details;
}
/**
* Given the entity classification dataset and a URL, identify the entity.
* @param {string} url
* @param {LH.Result.Entities=} entities
* @return {LH.Result.LhrEntity|string}
*/
static getEntityFromUrl(url, entities) {
// If it's a pre-v10 LHR, we don't have entities, so match against the root-ish domain
if (!entities) {
return Util.getPseudoRootDomain(url);
}
const entity = entities.find(e => e.origins.find(origin => url.startsWith(origin)));
// This fallback case would be unexpected, but leaving for safety.
return entity || Util.getPseudoRootDomain(url);
}
/**
* Split a string by markdown code spans (enclosed in `backticks`), splitting
* into segments that were enclosed in backticks (marked as `isCode === true`)
* and those that outside the backticks (`isCode === false`).
* @param {string} text
* @return {Array<{isCode: true, text: string}|{isCode: false, text: string}>}
*/
static splitMarkdownCodeSpans(text) {
/** @type {Array<{isCode: true, text: string}|{isCode: false, text: string}>} */
const segments = [];
// Split on backticked code spans.
const parts = text.split(/`(.*?)`/g);
for (let i = 0; i < parts.length; i ++) {
const text = parts[i];
// Empty strings are an artifact of splitting, not meaningful.
if (!text) continue;
// Alternates between plain text and code segments.
const isCode = i % 2 !== 0;
segments.push({
isCode,
text,
});
}
return segments;
}
/**
* Split a string on markdown links (e.g. [some link](https://...)) into
* segments of plain text that weren't part of a link (marked as
* `isLink === false`), and segments with text content and a URL that did make
* up a link (marked as `isLink === true`).
* @param {string} text
* @return {Array<{isLink: true, text: string, linkHref: string}|{isLink: false, text: string}>}
*/
static splitMarkdownLink(text) {
/** @type {Array<{isLink: true, text: string, linkHref: string}|{isLink: false, text: string}>} */
const segments = [];
const parts = text.split(/\[([^\]]+?)\]\((https?:\/\/.*?)\)/g);
while (parts.length) {
// Shift off the same number of elements as the pre-split and capture groups.
const [preambleText, linkText, linkHref] = parts.splice(0, 3);
if (preambleText) { // Skip empty text as it's an artifact of splitting, not meaningful.
segments.push({
isLink: false,
text: preambleText,
});
}
// Append link if there are any.
if (linkText && linkHref) {
segments.push({
isLink: true,
text: linkText,
linkHref,
});
}
}
return segments;
}
/**
* @param {string} string
* @param {number} characterLimit
* @param {string} ellipseSuffix
*/
static truncate(string, characterLimit, ellipseSuffix = '…') {
// Early return for the case where there are fewer bytes than the character limit.
if (string.length <= characterLimit) {
return string;
}
const segmenter = new Intl.Segmenter(undefined, {granularity: 'grapheme'});
const iterator = segmenter.segment(string)[Symbol.iterator]();
let lastSegmentIndex = 0;
for (let i = 0; i <= characterLimit - ellipseSuffix.length; i++) {
const result = iterator.next();
if (result.done) {
return string;
}
lastSegmentIndex = result.value.index;
}
for (let i = 0; i < ellipseSuffix.length; i++) {
if (iterator.next().done) {
return string;
}
}
return string.slice(0, lastSegmentIndex) + ellipseSuffix;
}
/**
* @param {URL} parsedUrl
* @param {{numPathParts?: number, preserveQuery?: boolean, preserveHost?: boolean}=} options
* @return {string}
*/
static getURLDisplayName(parsedUrl, options) {
// Closure optional properties aren't optional in tsc, so fallback needs undefined values.
options = options || {numPathParts: undefined, preserveQuery: undefined,
preserveHost: undefined};
const numPathParts = options.numPathParts !== undefined ? options.numPathParts : 2;
const preserveQuery = options.preserveQuery !== undefined ? options.preserveQuery : true;
const preserveHost = options.preserveHost || false;
let name;
if (parsedUrl.protocol === 'about:' || parsedUrl.protocol === 'data:') {
// Handle 'about:*' and 'data:*' URLs specially since they have no path.
name = parsedUrl.href;
} else {
name = parsedUrl.pathname;
const parts = name.split('/').filter(part => part.length);
if (numPathParts && parts.length > numPathParts) {
name = ELLIPSIS + parts.slice(-1 * numPathParts).join('/');
}
if (preserveHost) {
name = `${parsedUrl.host}/${name.replace(/^\//, '')}`;
}
if (preserveQuery) {
name = `${name}${parsedUrl.search}`;
}
}
const MAX_LENGTH = 64;
if (parsedUrl.protocol !== 'data:') {
// Even non-data uris can be 10k characters long.
name = name.slice(0, 200);
// Always elide hexadecimal hash
name = name.replace(/([a-f0-9]{7})[a-f0-9]{13}[a-f0-9]*/g, `$1${ELLIPSIS}`);
// Also elide other hash-like mixed-case strings
name = name.replace(/([a-zA-Z0-9-_]{9})(?=.*[a-z])(?=.*[A-Z])(?=.*[0-9])[a-zA-Z0-9-_]{10,}/g,
`$1${ELLIPSIS}`);
// Also elide long number sequences
name = name.replace(/(\d{3})\d{6,}/g, `$1${ELLIPSIS}`);
// Merge any adjacent ellipses
name = name.replace(/\u2026+/g, ELLIPSIS);
// Elide query params first
if (name.length > MAX_LENGTH && name.includes('?')) {
// Try to leave the first query parameter intact
name = name.replace(/\?([^=]*)(=)?.*/, `?$1$2${ELLIPSIS}`);
// Remove it all if it's still too long
if (name.length > MAX_LENGTH) {
name = name.replace(/\?.*/, `?${ELLIPSIS}`);
}
}
}
// Elide too long names next
if (name.length > MAX_LENGTH) {
const dotIndex = name.lastIndexOf('.');
if (dotIndex >= 0) {
name = name.slice(0, MAX_LENGTH - 1 - (name.length - dotIndex)) +
// Show file extension
`${ELLIPSIS}${name.slice(dotIndex)}`;
} else {
name = name.slice(0, MAX_LENGTH - 1) + ELLIPSIS;
}
}
return name;
}
/**
* Returns the origin portion of a Chrome extension URL.
* @param {string} url
* @return {string}
*/
static getChromeExtensionOrigin(url) {
const parsedUrl = new URL(url);
return parsedUrl.protocol + '//' + parsedUrl.host;
}
/**
* Split a URL into a file, hostname and origin for easy display.
* @param {string} url
* @return {{file: string, hostname: string, origin: string}}
*/
static parseURL(url) {
const parsedUrl = new URL(url);
return {
file: Util.getURLDisplayName(parsedUrl),
hostname: parsedUrl.hostname,
// Node's URL parsing behavior is different than Chrome and returns 'null'
// for chrome-extension:// URLs. See https://github.com/nodejs/node/issues/21955.
origin: parsedUrl.protocol === 'chrome-extension:' ?
Util.getChromeExtensionOrigin(url) : parsedUrl.origin,
};
}
/**
* @param {string|URL} value
* @return {!URL}
*/
static createOrReturnURL(value) {
if (value instanceof URL) {
return value;
}
return new URL(value);
}
/**
* Gets the tld of a domain
* This function is used only while rendering pre-10.0 LHRs.
*
* @param {string} hostname
* @return {string} tld
*/
static getPseudoTld(hostname) {
const tlds = hostname.split('.').slice(-2);
if (!listOfTlds.includes(tlds[0])) {
return `.${tlds[tlds.length - 1]}`;
}
return `.${tlds.join('.')}`;
}
/**
* Returns a primary domain for provided hostname (e.g. www.example.com -> example.com).
* As it doesn't consult the Public Suffix List, it can sometimes lose detail.
* See the `listOfTlds` comment above for more.
* This function is used only while rendering pre-10.0 LHRs. See UrlUtils.getRootDomain
* for the current method that makes use of PSL.
* @param {string|URL} url hostname or URL object
* @return {string}
*/
static getPseudoRootDomain(url) {
const hostname = Util.createOrReturnURL(url).hostname;
const tld = Util.getPseudoTld(hostname);
// tld is .com or .co.uk which means we means that length is 1 to big
// .com => 2 & .co.uk => 3
const splitTld = tld.split('.');
// get TLD + root domain
return hostname.split('.').slice(-splitTld.length).join('.');
}
/**
* Returns only lines that are near a message, or the first few lines if there are
* no line messages.
* @param {SnippetValue['lines']} lines
* @param {SnippetValue['lineMessages']} lineMessages
* @param {number} surroundingLineCount Number of lines to include before and after
* the message. If this is e.g. 2 this function might return 5 lines.
*/
static filterRelevantLines(lines, lineMessages, surroundingLineCount) {
if (lineMessages.length === 0) {
// no lines with messages, just return the first bunch of lines
return lines.slice(0, surroundingLineCount * 2 + 1);
}
const minGapSize = 3;
const lineNumbersToKeep = new Set();
// Sort messages so we can check lineNumbersToKeep to see how big the gap to
// the previous line is.
lineMessages = lineMessages.sort((a, b) => (a.lineNumber || 0) - (b.lineNumber || 0));
lineMessages.forEach(({lineNumber}) => {
let firstSurroundingLineNumber = lineNumber - surroundingLineCount;
let lastSurroundingLineNumber = lineNumber + surroundingLineCount;
while (firstSurroundingLineNumber < 1) {
// make sure we still show (surroundingLineCount * 2 + 1) lines in total
firstSurroundingLineNumber++;
lastSurroundingLineNumber++;
}
// If only a few lines would be omitted normally then we prefer to include
// extra lines to avoid the tiny gap
if (lineNumbersToKeep.has(firstSurroundingLineNumber - minGapSize - 1)) {
firstSurroundingLineNumber -= minGapSize;
}
for (let i = firstSurroundingLineNumber; i <= lastSurroundingLineNumber; i++) {
const surroundingLineNumber = i;
lineNumbersToKeep.add(surroundingLineNumber);
}
});
return lines.filter(line => lineNumbersToKeep.has(line.lineNumber));
}
/**
* Computes a score between 0 and 1 based on the measured `value`. Score is determined by
* considering a log-normal distribution governed by two control points (the 10th
* percentile value and the median value) and represents the percentage of sites that are
* greater than `value`.
*
* Score characteristics:
* - within [0, 1]
* - rounded to two digits
* - value must meet or beat a controlPoint value to meet or exceed its percentile score:
* - value > median will give a score < 0.5; value ≤ median will give a score ≥ 0.5.
* - value > p10 will give a score < 0.9; value ≤ p10 will give a score ≥ 0.9.
* - values < p10 will get a slight boost so a score of 1 is achievable by a
* `value` other than those close to 0. Scores of > ~0.99524 end up rounded to 1.
* @param {{median: number, p10: number}} controlPoints
* @param {number} value
* @return {number}
*/
static computeLogNormalScore(controlPoints, value) {
let percentile = statistics.getLogNormalScore(controlPoints, value);
// Add a boost to scores of 90+, linearly ramping from 0 at 0.9 to half a
// point (0.005) at 1. Expands scores in (0.9, 1] to (0.9, 1.005], so more top
// scores will be a perfect 1 after the two-digit `Math.floor()` rounding below.
if (percentile > 0.9) { // getLogNormalScore ensures `percentile` can't exceed 1.
percentile += 0.05 * (percentile - 0.9);
}
return Math.floor(percentile * 100) / 100;
}
}
export {
Util,
};