aliaset
Version:
twind monorepo
98 lines (90 loc) • 3.3 kB
text/typescript
// For now we are using a simple parser adapted from htm (https://github.com/developit/htm/blob/master/src/build.mjs)
// If we find any issues we can switch to something more sophisticated like
// - https://github.com/acrazing/html5parser
// - https://github.com/fb55/htmlparser2
const MODE_SLASH = 0
const MODE_TEXT = 1
const MODE_WHITESPACE = 2
const MODE_TAGNAME = 3
const MODE_COMMENT = 4
const MODE_ATTRIBUTE = 5
export function parseHTML(
markup: string,
onClass: (startIndex: number, endIndex: number, quote: string) => false | unknown,
): void {
let mode = MODE_TEXT
let startIndex = 0
let quote = ''
let attributeName = ''
const commit = (currentIndex: number): void => {
if (mode == MODE_ATTRIBUTE && attributeName == 'class') {
if (onClass(startIndex, currentIndex, quote) === false) {
markup = ''
}
}
}
for (let position = 0; position < markup.length; position++) {
const char = markup[position]
if (mode == MODE_TEXT) {
if (char == '<') {
mode = markup.substr(position + 1, 3) == '!--' ? MODE_COMMENT : MODE_TAGNAME
}
} else if (mode == MODE_COMMENT) {
// Ignore everything until the last three characters are '-', '-' and '>'
if (char == '>' && markup.slice(position - 2, position) == '--') {
mode = MODE_TEXT
}
} else if (quote) {
if (char == quote && markup[position - 1] != '\\') {
commit(position)
mode = MODE_WHITESPACE
quote = ''
}
} else if (char == '"' || char == "'") {
quote = char
startIndex += 1
} else if (char == '>') {
commit(position)
mode = MODE_TEXT
} else if (!mode) {
// MODE_SLASH
// Ignore everything until the tag ends
} else if (char == '=') {
attributeName = markup.slice(startIndex, position)
mode = MODE_ATTRIBUTE
startIndex = position + 1
} else if (char == '/' && (mode < MODE_ATTRIBUTE || markup[position + 1] == '>')) {
commit(position)
mode = MODE_SLASH
} else if (/\s/.test(char)) {
// <a class=font-bold>
commit(position)
mode = MODE_WHITESPACE
startIndex = position + 1
}
}
}
export function fixClassList(value: string, quote: string): string {
// const value = markup.slice(startIndex, endIndex)
// Lets handle some special react cases:
// * arbitrary values for `content-`
// <span class="before:content-['asas']"></span>
// <span class="before:content-["asas"]"></span>
//
// * self-referenced groups
// <span class="flex(& col)"></span>
//
// If a class name contains `'`, `"`, or `&` those will be replaced with HTML entities
// To fix this we replace those for depending on the actual symbol that is being used
// As an alternative we could always escape class names direcly in twind like react does
// but this works for now
return (
quote == `"`
? // `'` -> ' ' '
value.replace(/(=|\[)(?:'|'|')|(?:'|'|')(])/g, `$1'$2`)
: quote == `'`
? // `"` -> " " "
value.replace(/(=|\[)(?:"|"|")|(?:"|"|")(])/g, `$1"$2`)
: value
).replace(/(&|&|&)/g, '&')
}