@signalwire/docusaurus-plugin-llms-txt
Version:
Generate Markdown versions of Docusaurus HTML pages and an llms.txt index file
48 lines (47 loc) • 1.27 kB
JavaScript
/**
* Title extraction strategies
* Multiple strategies for extracting document titles from HTML
*/
import { DEFAULT_DOCUMENT_TITLE, HTML_SELECTORS } from '../constants';
import { selectText } from '../utils/html';
/**
* Extract title from first h1 element found anywhere on the page
* @internal
*/
const extractFirstH1 = (tree) => {
const firstH1 = selectText(tree, HTML_SELECTORS.H1);
return firstH1 || null;
};
/**
* Extract title from document title tag
* @internal
*/
const extractDocumentTitle = (tree) => {
const fullTitle = selectText(tree, HTML_SELECTORS.TITLE);
if (fullTitle) {
const parts = fullTitle.split('|');
if (parts.length > 1 && parts[0]) {
return parts[0].trim();
}
return fullTitle.trim();
}
return null;
};
/**
* Default title extractor strategy chain
* @internal
*/
const defaultTitleExtractors = [extractFirstH1, extractDocumentTitle];
/**
* Extract title using a strategy chain approach
* @internal
*/
export function extractTitle(tree, extractors = defaultTitleExtractors) {
for (const extractor of extractors) {
const title = extractor(tree);
if (title) {
return title;
}
}
return DEFAULT_DOCUMENT_TITLE;
}