llmxml
Version:
Convert between markdown and LLM-friendly pseudo-XML
105 lines (90 loc) • 3.12 kB
text/typescript
import type { TagNode, TextNode } from '../../types/ast';
import { describe, it, expect } from 'vitest';
import { parse } from '../llm';
describe('LLM Parser', () => {
it('should parse code blocks', () => {
const input = `<Section>
\`\`\`js
const x = 1;
\`\`\`
</Section>`;
const { ast } = parse(input);
expect(ast).toHaveLength(1);
const section = ast[0] as TagNode;
const codeBlock = section.children?.find(n => (n as TextNode).textType === 'code');
expect(codeBlock).toBeDefined();
expect((codeBlock as TextNode).language).toBe('js');
});
it('should preserve whitespace around code blocks', () => {
const input = `<Section>
Text before
\`\`\`js
const x = 1;
\`\`\`
Text after
</Section>`;
const { ast } = parse(input);
expect(ast).toHaveLength(1);
const section = ast[0] as TagNode;
const children = section.children || [];
expect(children.some(n => (n as TextNode).textType === 'code')).toBe(true);
expect(children.some(n => !(n as TextNode).textType && (n as TextNode).value.includes('Text before'))).toBe(true);
expect(children.some(n => !(n as TextNode).textType && (n as TextNode).value.includes('Text after'))).toBe(true);
});
it('requires capital letter for tag names', () => {
const input = '<section></section>';
const { errors } = parse(input);
expect(errors).toHaveLength(1);
expect(errors[0].message).toBe('Tag names must start with a capital letter');
});
it('should parse tag attributes', () => {
const input = '<Section title="Test" hlevel="2">Content</Section>';
const { ast } = parse(input);
const section = ast[0] as TagNode;
expect(section.attributes).toMatchObject({
title: 'Test',
hlevel: '2'
});
});
it('should use tag name as title if not specified', () => {
const input = '<Section hlevel="2">Content</Section>';
const { ast } = parse(input);
const section = ast[0] as TagNode;
expect(section.attributes).toMatchObject({
title: 'Section',
hlevel: '2'
});
});
describe('error handling', () => {
it('detects mismatched tags', () => {
const input = '<Section>Content</Other>';
const { errors } = parse(input);
expect(errors).toHaveLength(1);
expect(errors[0].message).toContain('Mismatched tags');
});
it('handles unclosed tags', () => {
const input = '<Section>Content';
const { errors } = parse(input);
expect(errors).toHaveLength(1);
expect(errors[0].message).toContain('Mismatched tags');
});
it('provides location information', () => {
const input = `<Section>
<Other>
</Section>`;
const { errors } = parse(input);
expect(errors).toHaveLength(1);
expect(errors[0].location).toBeDefined();
expect(errors[0].location.start.line).toBe(2);
});
it('handles malformed code blocks', () => {
const input = `<Section>
\`\`\`js
const x = 1;
</Section>`;
const { errors } = parse(input);
expect(errors).toHaveLength(1);
expect(errors[0].message).toBe('Unclosed code block');
});
});
});