UNPKG

@vrcd-community/zhlint

Version:

A linting tool for Chinese language.

376 lines (375 loc) 12.1 kB
import { ValidationTarget } from '../report.js'; import { checkCharType } from './char.js'; import { BRACKET_NOT_CLOSED, BRACKET_NOT_OPEN, QUOTATION_NOT_CLOSED, QUOTATION_NOT_OPEN } from './messages.js'; import { CharType, SHORTHAND_CHARS, SHORTHAND_PAIR_SET, BRACKET_CHAR_SET, QUOTATION_CHAR_SET, MarkSideType, MarkType, HyperTokenType, GroupTokenType, isBracketType, isQuotationType } from './types.js'; export const handlePunctuation = (i, char, type, status) => { // end the last unfinished token finalizeLastToken(status, i); // check the current token type // - start of a mark: start an unfinished mark // - end of a mark: end the current unfinished mark // - neutral quotation: start/end a group by pairing the last unfinished group // - left quotation: start a new unfinished group // - right quotation: end the current unfinished group // - other punctuation: add and end the current token if (isBracketType(type)) { if (BRACKET_CHAR_SET.left.indexOf(char) >= 0) { // push (save) the current unfinished mark if have initNewMark(status, i, char); // generate a new token and mark it as a mark punctuation by left // and finish the token addBracketToken(status, i, char, MarkSideType.LEFT); } else if (BRACKET_CHAR_SET.right.indexOf(char) >= 0) { if (!status.lastMark || !status.lastMark.startValue) { addUnmatchedToken(status, i, char); addError(status, i, BRACKET_NOT_OPEN); } else { // generate token as a punctuation addBracketToken(status, i, char, MarkSideType.RIGHT); // end the last unfinished mark // and pop the previous one if exists finalizeCurrentMark(status, i, char); } } return; } if (isQuotationType(type)) { if (QUOTATION_CHAR_SET.neutral.indexOf(char) >= 0) { // - end the last unfinished group // - start a new group if (status.lastGroup && char === status.lastGroup.startValue) { finalizeCurrentGroup(status, i, char); } else { initNewGroup(status, i, char); } } else if (QUOTATION_CHAR_SET.left.indexOf(char) >= 0) { initNewGroup(status, i, char); } else if (QUOTATION_CHAR_SET.right.indexOf(char) >= 0) { if (!status.lastGroup || !status.lastGroup.startValue) { addUnmatchedToken(status, i, char); addError(status, i, QUOTATION_NOT_OPEN); } else { finalizeCurrentGroup(status, i, char); } } return; } addSinglePunctuationToken(status, i, char, type); }; export const handleLetter = (i, char, type, status) => { // check if type changed and last token unfinished // - create new token in the current group // - append into current unfinished token if (status.lastToken) { if (status.lastToken.type !== type) { finalizeLastToken(status, i); initNewContent(status, i, char, type); } else { appendValue(status, char); } } else { initNewContent(status, i, char, type); } }; // status export const initNewStatus = (str, hyperMarks) => { const tokens = []; Object.assign(tokens, { type: GroupTokenType.GROUP, index: 0, spaceAfter: '', startIndex: 0, endIndex: str.length - 1, startValue: '', endValue: '', innerSpaceBefore: '' }); const status = { lastToken: undefined, lastGroup: tokens, lastMark: undefined, tokens, marks: [...hyperMarks], groups: [], markStack: [], groupStack: [], errors: [] }; return status; }; // finalize token export const finalizeLastToken = (status, index) => { if (status.lastToken) { // the lastToken.index is not the current index anymore status.lastToken.length = index - status.lastToken.index; status.lastGroup && status.lastGroup.push(status.lastToken); status.lastToken = undefined; } }; export const finalizeCurrentToken = (status, token) => { status.lastGroup && status.lastGroup.push(token); status.lastToken = undefined; }; // hyper marks const markTypeToTokenType = (type) => { switch (type) { case MarkType.HYPER: return HyperTokenType.HYPER_MARK; case MarkType.BRACKETS: return HyperTokenType.BRACKET_MARK; case MarkType.RAW: // technically never since MarkType.RAW should go to addRawContent() return HyperTokenType.INDETERMINATED; } }; export const addHyperToken = (status, index, mark, value, markSide) => { const token = { type: markTypeToTokenType(mark.type), index, length: value.length, value: value, spaceAfter: '', // to be finalized mark: mark, markSide }; finalizeCurrentToken(status, token); }; export const addRawContent = (status, index, value) => { const token = { type: getHyperContentType(value), index, length: value.length, value: value, spaceAfter: '' // to be finalized }; finalizeCurrentToken(status, token); }; // bracket marks export const initNewMark = (status, index, char, type = MarkType.BRACKETS) => { if (status.lastMark) { status.markStack.push(status.lastMark); status.lastMark = undefined; } const mark = { type, startIndex: index, startValue: char, endIndex: -1, // to be finalized endValue: '' // to be finalized }; status.marks.push(mark); status.lastMark = mark; }; export const addBracketToken = (status, index, char, markSide) => { const token = { type: HyperTokenType.BRACKET_MARK, index, length: 1, value: char, spaceAfter: '', // to be finalized mark: status.lastMark, markSide }; finalizeCurrentToken(status, token); }; export const finalizeCurrentMark = (status, index, char) => { if (!status.lastMark) { return; } status.lastMark.endIndex = index; status.lastMark.endValue = char; if (status.markStack.length > 0) { status.lastMark = status.markStack.pop(); } else { status.lastMark = undefined; } }; // normal punctuation const addSinglePunctuationToken = (status, index, char, type) => { const token = { type, index, length: 1, value: char, spaceAfter: '' // to be finalized }; finalizeCurrentToken(status, token); }; const addUnmatchedToken = (status, i, char) => { const token = { type: HyperTokenType.UNMATCHED, index: i, length: 1, value: char, spaceAfter: '' }; finalizeCurrentToken(status, token); }; // group export const initNewGroup = (status, index, char) => { status.lastGroup && status.groupStack.push(status.lastGroup); const lastGroup = []; Object.assign(lastGroup, { type: GroupTokenType.GROUP, index, spaceAfter: '', // to be finalized startIndex: index, startValue: char, endIndex: -1, // to be finalized endValue: '', // to be finalized innerSpaceBefore: '' // to be finalized }); // TODO: previous group in stack status.groupStack[status.groupStack.length - 1].push(lastGroup); status.lastGroup = lastGroup; status.groups.push(lastGroup); }; export const finalizeCurrentGroup = (status, index, char) => { if (status.lastGroup) { // index, length, value status.lastGroup.endIndex = index; status.lastGroup.endValue = char; } if (status.groupStack.length > 0) { status.lastGroup = status.groupStack.pop(); } else { status.lastGroup = undefined; } }; // general content export const initNewContent = (status, index, char, type) => { status.lastToken = { type, index, length: 1, // to be finalized value: char, // to be finalized spaceAfter: '' // to be finalized }; }; export const appendValue = (status, char) => { if (status.lastToken) { status.lastToken.value += char; status.lastToken.length++; } }; // others /** * Get the length of connecting spaces from a certain index */ export const getConnectingSpaceLength = (str, start) => { // not even a space if (checkCharType(str[start]) !== CharType.SPACE) { return 0; } // find the next non-space char for (let i = start + 1; i < str.length; i++) { const char = str[i]; const type = checkCharType(char); if (type !== CharType.SPACE) { return i - start; } } // space till the end return str.length - start; }; export const getPreviousToken = (status) => { if (status.lastGroup) { return status.lastGroup[status.lastGroup.length - 1]; } }; export const getHyperMarkMap = (hyperMarks) => { const hyperMarkMap = {}; hyperMarks.forEach((mark) => { hyperMarkMap[mark.startIndex] = mark; if (mark.type !== MarkType.RAW) { hyperMarkMap[mark.endIndex] = mark; } }); return hyperMarkMap; }; export const isShorthand = (str, status, index, char) => { if (SHORTHAND_CHARS.indexOf(char) < 0) { return false; } if (!status.lastToken || status.lastToken.type !== CharType.WESTERN_LETTER) { return false; } if (str.length <= index + 1) { return false; } const nextChar = str[index + 1]; const nextType = checkCharType(nextChar); if (nextType === CharType.WESTERN_LETTER || nextType === CharType.SPACE) { if (!status.lastGroup) { return true; } if (status.lastGroup.startValue !== SHORTHAND_PAIR_SET[char]) { return true; } } return false; }; export const getHyperContentType = (content) => { if (content.match(/\n/)) { // Usually it's hexo custom containers. return HyperTokenType.HYPER_CONTENT; } if (content.match(/^<code.*>.*<\/code.*>$/)) { // Usually it's <code>...</code>. return HyperTokenType.CODE_CONTENT; } if (content.match(/^<.+>$/)) { // Usually it's other HTML tags. return HyperTokenType.HYPER_CONTENT; } // Usually it's `...`. return HyperTokenType.CODE_CONTENT; }; // error handling const addError = (status, index, message) => { status.errors.push({ name: '', index, length: 0, message, target: ValidationTarget.VALUE }); }; export const handleErrors = (status) => { // record an error if the last mark not fully resolved const lastMark = status.lastMark; if (lastMark && lastMark.type === MarkType.BRACKETS && !lastMark.endValue) { addError(status, lastMark.startIndex, BRACKET_NOT_CLOSED); } // record an error if `markStack` not fully resolved if (status.markStack.length > 0) { status.markStack.forEach((mark) => { if (mark !== lastMark) { addError(status, mark.startIndex, BRACKET_NOT_CLOSED); } }); } // record an error if the last group not fully resolved const lastGroup = status.lastGroup; if (lastGroup && lastGroup.startValue && !lastGroup.endValue) { addError(status, lastGroup.startIndex, QUOTATION_NOT_CLOSED); } // record an error if `groupStack` not fully resolved if (status.groupStack.length > 0) { status.groupStack.forEach((group) => { if (group !== lastGroup && group.startValue && !group.endValue) { addError(status, group.startIndex, QUOTATION_NOT_CLOSED); } }); } };