eslint-plugin-complete
Version:
An ESLint plugin that contains useful rules.
270 lines (269 loc) • 12.2 kB
JavaScript
import { isEnumBlockLabel, isSpecialComment } from "./comments.js";
import { getAdjustedList, reachedNewList } from "./list.js";
import { hasURL } from "./utils.js";
/**
* From:
* https://stackoverflow.com/questions/23571013/how-to-remove-url-from-a-string-completely-in-javascript
*/
const FULL_URL_REGEX = /(?:https?|ftp):\/\/[\S\n]+/g;
/**
* From:
* https://stackoverflow.com/questions/11761563/javascript-regexp-for-splitting-text-into-sentences-and-keeping-the-delimiter
*/
const SENTENCE_REGEX = /(?=[^])(?:\P{Sentence_Terminal}|\p{Sentence_Terminal}(?!['"`\p{Close_Punctuation}\p{Final_Punctuation}\s]))*(?:\p{Sentence_Terminal}+['"`\p{Close_Punctuation}\p{Final_Punctuation}]*|$)/guy;
const SENTENCE_SEPARATOR_IDENTIFIER = "___sentence_separator_identifier___";
const IN_LINE_CODE_IDENTIFIER = "___in_line_code_identifier___";
const LIST_ELEMENT_IDENTIFIER = "___list_element_identifier___";
export function getIncompleteSentences(text) {
const incompleteSentences = [];
const textBlocks = splitOnSpecialText(text);
for (const textBlock of textBlocks) {
// Handle text that "spills over" to the next line by simply converting all newlines to spaces.
const squishedText = textBlock.split("\n").join(" ").trim();
// Handling all edge cases for "e.g." or "i.e." is very difficult, since sometimes it is correct
// to put a period after them, and sometimes not. Thus, ignore all text that contains them.
if (squishedText.includes("e.g.") || squishedText.includes("i.e.")) {
continue;
}
// Whitelist markdown links.
if (squishedText.endsWith("](")) {
continue;
}
const sentences = getSentences(squishedText);
const isLoneSentence = sentences.length === 1;
for (const sentence of sentences) {
const messageId = getIncompleteSentenceKind(sentence, isLoneSentence);
if (messageId !== undefined) {
incompleteSentences.push({
sentence,
messageId,
});
}
}
}
return incompleteSentences;
}
/**
* Before parsing a multi-line string to get the sentences, we first need to mutate the input to
* handle some problematic situations.
*/
function splitOnSpecialText(text) {
// Below, we avoid replacing certain things to an empty string because that can potentially cause
// subsequent text to be considered to be part of the previous sentence.
// Remove multi-line code blocks.
text = text.replaceAll(/```[\S\s]*```/gm, SENTENCE_SEPARATOR_IDENTIFIER);
// Remove example tag blocks. An example tag might be followed by another tag, so first look for
// that situation. Then, handle the situation where the example tag is the final tag.
text = text.replaceAll(
// We use `[\s\S]` instead of `.` because the latter does not match a new line.
/@example[\S\s]*?@/gm, `${SENTENCE_SEPARATOR_IDENTIFIER}@`);
text = text.replaceAll(/@example[\S\s]*/gm, "");
// Remove see tag blocks. A see tag might be followed by another tag, so first look for that
// situation. Then, handle the situation where the see tag is the final tag. (This is copy-pasted
// from the code that handles example tags above.)
text = text.replaceAll(
// We use `[\s\S]` instead of `.` because the latter does not match a new line.
/@see[\S\s]*?@/gm, `${SENTENCE_SEPARATOR_IDENTIFIER}@`);
text = text.replaceAll(/@see[\S\s]*/gm, "");
// Replace the link tags with the link text. Note that if we replace them with a sentence
// separator instead, then the following sentence would fail: Get the name of a peripheral wrapped
// with {@link peripheral.wrap}.
// https://regex101.com/r/0u8hQG/1
// https://jsdoc.app/tags-inline-link.html
text = text.replaceAll(/\[([^\]]*)]{@link [^ |}]+}|{@link ([^ |}]+)[ |]?}|{@link [^ |}]+[ |]([^}]+)}/gm, "$1$2$3");
// Remove Markdown headers.
text = text.replaceAll(/^\n\s*#.*\n\n/gm, SENTENCE_SEPARATOR_IDENTIFIER);
if (text.trimStart().startsWith("#")) {
// Also handle if the first line is a Markdown header.
text = text.replace(/^\s*#.*\n\n/m, SENTENCE_SEPARATOR_IDENTIFIER);
}
// Remove pipes (which indicate a Markdown table).
text = text.replaceAll("|", SENTENCE_SEPARATOR_IDENTIFIER);
// Handle "blocks" indicated by a double newline. We don't want sentences to be parsed/combined
// past blocks, so we manually insert a sentence separator.
text = text.replaceAll("\n\n", `\n${SENTENCE_SEPARATOR_IDENTIFIER}\n`);
// Handle quoted question marks.
// e.g. This text contains "???" in the middle.
text = text.replaceAll(/'\?+'/g, "");
text = text.replaceAll(/"\?+"/g, "");
const lines = text.split("\n");
const newLines = [];
let insideList;
for (const [i, originalLine] of lines.entries()) {
let line = originalLine;
// Ignore "@type" JSDoc tags, since they contain a code type instead of English text.
// https://jsdoc.app/tags-type.html
line = line.replace(/^\s*@type .+$/, SENTENCE_SEPARATOR_IDENTIFIER);
// Remove any JSDoc tags. (But leave the descriptions following the tags, if any.) "@param" tags
// are followed by variable names, which will not be part of the sentence.
line = line.replace(/^\s*@param \w+ /, SENTENCE_SEPARATOR_IDENTIFIER);
// This is "\S+" instead of "\w+" because we need to match things like "@ts-expect-error".
line = line.replace(/^\s*@\S+/, SENTENCE_SEPARATOR_IDENTIFIER);
// Replace any single-line code snippets with custom text. The custom text begins with an
// underscore, which means that it will count towards the sentence starting with a capital
// letter. (This is only relevant if the code block is the first word in the sentence.)
line = line.replaceAll(/`.+`/g, IN_LINE_CODE_IDENTIFIER);
// Remove any URLs present in the string, as the periods will count as sentence terminators.
// e.g. "This is my URL: https://stackoverflow."
line = line.replaceAll(FULL_URL_REGEX, "");
// Remove the periods from some common abbreviations so that they do not mess up the sentence
// parsing.
line = line.replaceAll(/\bDr\.\s+/g, "Dr");
line = line.replaceAll(/\bJr\.\s+/g, "Jr");
line = line.replaceAll(/\bMr\.\s+/g, "Mr");
line = line.replaceAll(/\bMrs\.\s+/g, "Mrs");
line = line.replaceAll(/\bMs\.\s+/g, "Ms");
line = line.replaceAll(/\bSr\.\s+/g, "Sr");
line = line.replaceAll(/\bSt\.\s+/g, "St");
line = line.replaceAll(/\betc\.\s+/g, "etc");
// Replace list bullet headers, since they are never part of a sentence. We also need to mark
// that this sentence is a list element for the purposes of ignoring any incomplete sentences.
// Doing this allows short lists like:
// - apple
// - banana
const previousLine = lines[i - 1];
const previousLineWasBlank = previousLine === undefined || previousLine.trim() === "";
const previousLineEndedInColon = previousLine !== undefined && previousLine.trimEnd().endsWith(":");
const list = getAdjustedList(line, previousLineWasBlank, previousLineEndedInColon, insideList);
if (reachedNewList(insideList, list)) {
// Keep track that we have begun a list (or a new sub-list).
insideList = list;
}
if (list !== undefined) {
line = line.slice(list.markerSize);
line = SENTENCE_SEPARATOR_IDENTIFIER + LIST_ELEMENT_IDENTIFIER + line;
}
// Split enum block labels.
if (isEnumBlockLabel(line)) {
line += SENTENCE_SEPARATOR_IDENTIFIER;
}
newLines.push(line);
}
const textBlocks = newLines.join("\n").split(SENTENCE_SEPARATOR_IDENTIFIER);
return textBlocks.filter((textBlock) => !isEnumBlockLabel(textBlock));
}
export function getSentences(text) {
const match = text.match(SENTENCE_REGEX);
if (match === null) {
return [];
}
return match;
}
function getIncompleteSentenceKind(sentence, isLoneSentence) {
let text = sentence;
// Trim the parenthesis surrounding the sentence, if any.
let textBeforeModifications;
do {
textBeforeModifications = text;
text = text.trim().replace(/^\(*/, "").replace(/\)*$/, "").trim();
} while (text !== textBeforeModifications);
// Ignore / whitelist some specific things.
if (
// Blank text.
text === ""
// Sentences that do not contain any letters.
|| !/[A-Za-z]/.test(text)
// Sentences with an arrow, like: "Alice --> Bob"
|| text.includes("-->")
// Placeholder text.
|| text === "n/a"
// Special comments.
|| isSpecialComment(text)
// Dates.
|| isDate(text)
// URLS.
|| hasURL(text)
// Single JSDoc tags.
|| /^@\w+$/.test(text)
// Lists.
|| text.startsWith(LIST_ELEMENT_IDENTIFIER)
// Code blocks.
|| text.includes("```")
// Sentences that end with a number in parenthesis (which indicates some kind of expression).
// This must check the original text.
|| / \(\d+\)$/.test(sentence.trimEnd())) {
return undefined;
}
// First, check for a double period.
if (text.endsWith("..") && text.length >= 3) {
const characterBeforePeriods = text.at(-3);
if (characterBeforePeriods !== ".") {
return "doublePeriod";
}
}
if (isLoneSentence
// Single words, double words, and triple words.
&& (/^\S+$/.test(text)
|| /^\S+ \S+$/.test(text)
|| /^\S+ \S+ \S+$/.test(text))) {
return undefined;
}
if (/^[a-z]/.test(text) && !isCapitalizedWordException(text)) {
return "missingCapital";
}
if (
// Allow normal end-of-line punctuation.
!text.endsWith(".")
&& !text.endsWith("!")
&& !text.endsWith("?")
// Allow ending with a period inside of a single quote or double quote, since it is implied that
// this is a fully quoted sentence.
&& !text.endsWith('."')
&& !text.endsWith('!"')
&& !text.endsWith('?"')
&& !text.endsWith(".'")
&& !text.endsWith("!'")
&& !text.endsWith("?'")
// Allow ending with a colon, since it is implied that there is an example of something on the
// subsequent block.
&& !text.endsWith(":")
// Allow ending with anything if there is a colon in the middle of the sentence, since it is
// implied that this is an example of something.
&& !text.includes(": ")) {
return "missingPeriod";
}
return undefined;
}
const CAPITALIZED_WORD_EXCEPTIONS = ["iPad", "iPhone", "iPod"];
function isCapitalizedWordException(text) {
return CAPITALIZED_WORD_EXCEPTIONS.some((word) => text.startsWith(word));
}
const MONTHS_SET = new Set([
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]);
const ORDINALS_SET = new Set(["st", "nd", "rd", "th"]);
function isDate(text) {
text = text.trim();
const match1 = text.match(/^(?<month>\w+) \d+(?<ordinal>\w+)$/);
if (match1 !== null && match1.groups !== undefined) {
const { month, ordinal } = match1.groups;
if (month !== undefined
&& MONTHS_SET.has(month)
&& ordinal !== undefined
&& ORDINALS_SET.has(ordinal)) {
return true;
}
}
const match2 = text.match(/^(?<month>\w+) \d+(?<ordinal>\w+), \d+$/);
if (match2 !== null && match2.groups !== undefined) {
const { month, ordinal } = match2.groups;
if (month !== undefined
&& MONTHS_SET.has(month)
&& ordinal !== undefined
&& ORDINALS_SET.has(ordinal)) {
return true;
}
}
return false;
}