document-outline-extractor
Version:
Extract structured outlines from documents with optional AI enhancement
102 lines (85 loc) • 2.94 kB
text/typescript
import { OutlineExtractor } from '../extractor';
import { extractHeadings, buildOutlineTree, computeOutlineScore } from '../utils';
describe('OutlineExtractor', () => {
describe('extractHeadings', () => {
it('should extract markdown headings correctly', () => {
const content = `
# Main Title
Some content
## Subtitle
### Sub-subtitle
More content
## Another Subtitle
`;
const headings = extractHeadings(content);
expect(headings).toHaveLength(4);
expect(headings[0]).toEqual({
level: 1,
title: 'Main Title',
lineNumber: 2
});
});
it('should handle empty content', () => {
const headings = extractHeadings('');
expect(headings).toHaveLength(0);
});
});
describe('buildOutlineTree', () => {
it('should build hierarchical tree correctly', () => {
const headings = [
{ level: 1, title: 'Title 1', lineNumber: 1 },
{ level: 2, title: 'Title 1.1', lineNumber: 2 },
{ level: 2, title: 'Title 1.2', lineNumber: 3 },
{ level: 1, title: 'Title 2', lineNumber: 4 }
];
const tree = buildOutlineTree(headings);
expect(tree).toHaveLength(2);
expect(tree[0].children).toHaveLength(2);
expect(tree[1].children).toHaveLength(0);
});
});
describe('computeOutlineScore', () => {
it('should compute quality score', () => {
const headings = [
{ level: 1, title: 'Title 1', lineNumber: 1 },
{ level: 2, title: 'Title 1.1', lineNumber: 3 },
{ level: 2, title: 'Title 1.2', lineNumber: 5 },
{ level: 3, title: 'Title 1.2.1', lineNumber: 7 }
];
const score = computeOutlineScore(headings, 100);
expect(score).toBeGreaterThan(0);
expect(score).toBeLessThanOrEqual(1);
});
});
describe('OutlineExtractor integration', () => {
it('should extract outline without OpenAI', async () => {
const extractor = new OutlineExtractor();
const content = `
# Introduction
This is the introduction.
## Background
Some background information.
## Objectives
Main objectives of the document.
# Main Content
The main content goes here.
## Section 1
First section content.
## Section 2
Second section content.
`;
const outline = await extractor.extract(content);
expect(outline).toBeTruthy();
expect(outline).toContain('Introduction');
expect(outline).toContain('Main Content');
});
it('should respect format options', async () => {
const extractor = new OutlineExtractor();
const content = '# Title\n## Subtitle';
const jsonOutline = await extractor.extract(content, { format: 'json' });
expect(() => JSON.parse(jsonOutline)).not.toThrow();
const markdownOutline = await extractor.extract(content, { format: 'markdown' });
expect(markdownOutline).toContain('#');
});
});
});