retext-capitalization
Version:
A retext plugin to encourage the use of sentence case
303 lines (261 loc) • 10 kB
JavaScript
import { visit } from "unist-util-visit";
import { is, convert } from "unist-util-is";
import { pointStart, pointEnd } from "unist-util-position";
import { matchCasing } from "match-casing";
import { search } from "nlcst-search";
import { toString } from "nlcst-to-string";
import { quotation } from "quotation";
import { schema } from "./schema.js";
import exceptions from "./exceptions.js";
const list = Object.keys(schema);
const source = "retext-capitalization";
const url =
"https://docs.newrelic.com/docs/style-guide/capitalization/product-capability-feature-usage/#when-to-use-title-case";
export default function retextCapitalization() {
return (tree, file) => {
const sourceString = file.value;
/*!
* Checks if the provided capitalized word is already a part of a suggestion
* @param {Object} word - the current word object
* @return {Boolean} Whether it already exists as a suggestion
*/
const isAlreadySuggested = (word) => {
return file.messages.some((message) => {
const isAfterStartOfExistingSuggestion =
word.position.start.offset >= message.position.start.offset;
const isBeforeEndOfExistingSuggestion =
word.position.end.offset <= message.position.end.offset;
return (
isAfterStartOfExistingSuggestion && isBeforeEndOfExistingSuggestion
);
});
};
/*!
* Checks if the provided capitalized word is a proper noun
* @param {Object} word - the current word object
* @return {Boolean} Whether it a proper noun
*/
const isProperNoun = (word) => {
return (
word.data.partOfSpeech === "NNP" || word.data.partOfSpeech === "NNPS"
);
};
// Check the string against the contents of schema.js
search(tree, list, (match, index, parent, phrase) => {
const actual = toString(match);
let expected = schema[phrase];
let checkForFalsePositive = "";
const matchIsFromBeginningOfSentence = () => {
const firstChildOfMatch = match[0];
const startPositionOfParent = parent.position.start.offset;
const startPositionOfMatch = firstChildOfMatch.position.start.offset;
const matchIsFromBeginningOfSentence =
startPositionOfMatch === startPositionOfParent;
return matchIsFromBeginningOfSentence;
};
// The search function performs a fuzzy search on the string in question
// ignoring casing, apostrophes, etc. This is a check too make sure
// this isn't a false positive due to casing.
const isFalsePositive = () => {
const falsePositiveDueToCasing = () => {
let testSubject = Array.isArray(expected) ? expected : [expected];
return testSubject.some((expectation) => {
return expectation === actual;
});
};
const falsePositiveDueToBeginningOfSentence = () => {
if (!matchIsFromBeginningOfSentence()) {
return false;
} else {
const matchAsString = toString(match);
if (Array.isArray(expected)) {
return expected.some((replacement) => {
// Is the replacement === the match when the replacement matches
// the casing of the match.
return (
replacement[0].toUpperCase() + replacement.substring(1) ===
matchAsString
);
});
} else {
return (
expected[0].toUpperCase() + expected.substring(1) ===
matchAsString
);
}
}
};
if (falsePositiveDueToCasing()) return true;
if (falsePositiveDueToBeginningOfSentence()) return true;
return false;
};
// if it's a false positive, stop everything
if (isFalsePositive()) {
return;
}
// expected always needs to be an array, so ensure that it is
if (typeof expected === "string") {
expected = [expected];
}
checkForFalsePositive = (
actualToCheck,
numberOfWordsBefore,
matchesToCheck
) => {
const matchesIsString = typeof matchesToCheck === "string";
const matchesIsArray = Array.isArray(matchesToCheck);
if (!matchesIsString && !matchesIsArray) {
console.error(
`"checkContext" expects the argument "matchesToCheck" to be an array or string'`
);
return null;
}
if (actualToCheck !== actual) {
return null;
}
let actualWithContext = parent.children
.map((item, index, array) => {
// if you've found the element
if (item === match[0]) {
let contextArray = [];
let dummyArray = [...Array(numberOfWordsBefore * 2)];
// return an array of the words before it and it.
dummyArray.forEach((_, i) => {
contextArray.unshift(array[index - (i + 1)]);
});
contextArray.push(item);
return contextArray;
} else {
return false;
}
})
.filter((item) => item)[0];
if (matchesIsString) {
return matchesToCheck === toString(actualWithContext);
} else if (matchesIsArray) {
return matchesToCheck.some(
(match) => match === toString(actualWithContext)
);
}
};
// check for false positives
if (
checkForFalsePositive("Observability", 1, "Instant Observability") ||
checkForFalsePositive("I/O", 1, "Relic I/O")
) {
return null;
}
// Because we need to make sure that don't recommend uncapitalizing
// the first letter of words at the beginning of a sentence.
const replacements = (() => {
if (matchIsFromBeginningOfSentence()) {
return expected.map(
(replacement) =>
replacement[0].toUpperCase() + replacement.substring(1)
);
} else {
return expected;
}
})();
Object.assign(
file.message(
"Replace " +
quotation(actual, "`") +
" with " +
quotation(replacements, "`"),
{
start: pointStart(match[0]),
end: pointEnd(match[match.length - 1]),
},
[source, phrase.replace(/\s+/g, "-").toLowerCase()].join(":")
),
{ actual, expected: replacements, url }
);
});
// Create a suggestion if all of the following are true for the word:
// 1. Is not a proper noun
// 2. Is not in the included in schema.js,
// 3. Is not the first word of a sentance
// 4. Is not in the list of exceptions
// 5. Is not a number
visit(tree, "SentenceNode", (sentence) => {
const wordAsString = (word) => {
return sourceString.substring(
word.position.start.offset,
word.position.end.offset
);
};
const improperlyCapitalizedWords = (() => {
// Because we also don't want to return a suggestion/report
// for numbers.
const words = sentence.children.filter((child) => {
const containsNumber = (str) => {
return /\d/.test(str);
};
return (
child.type === "WordNode" && !containsNumber(wordAsString(child))
);
});
/*!
* Checks if the provided word is the first in the sentence
* @param {Array} words - A collection of the words in the sentence
* @param {Object} word - the word object in question
* @return {Boolean} Whether it a proper noun
*/
const isFirstWordOfSentence = (words, word) => {
const firstWordPosition = words[0].position.start.offset;
const currentWordPosition = word.position.start.offset;
return firstWordPosition === currentWordPosition;
};
// Because we don't want to check words that have already been
// suggested/flagged by the previous search function
const wordsNotYetSuggested = words.filter(
(word) => !isAlreadySuggested(word)
);
const wordsMinusExceptions = wordsNotYetSuggested.filter((word) => {
const existsInExceptions = exceptions.some((exception) => {
return exception.toLowerCase() === wordAsString(word).toLowerCase();
});
return !existsInExceptions;
});
let capitalizedWords = wordsMinusExceptions.filter((word) => {
return wordAsString(word)[0] === wordAsString(word)[0].toUpperCase();
});
// Because sometimes the retext-pos plugin gets it wrong
// See all part of speech tags: https://github.com/dariusk/pos-js
const properNounCorrections = {
Data: "NN",
};
capitalizedWords = capitalizedWords.map((word) => {
if (properNounCorrections[wordAsString(word)]) {
word.data.partOfSpeech = properNounCorrections[wordAsString(word)];
}
return word;
});
return capitalizedWords.filter((word) => {
return !isFirstWordOfSentence(words, word) && !isProperNoun(word);
});
})();
improperlyCapitalizedWords.forEach((word) => {
const actual = wordAsString(word);
const expected = [wordAsString(word).toLowerCase()];
Object.assign(
file.message(
"Replace " +
quotation(actual, "`") +
" with " +
quotation(expected, "`"),
{
start: pointStart(word),
end: pointEnd(word),
},
[source, `${actual.toLowerCase()}-${word.data.partOfSpeech}`].join(
":"
)
),
{ actual, expected, url }
);
});
});
};
}