UNPKG

@signalwire/docusaurus-plugin-llms-txt

Version:

Generate Markdown versions of Docusaurus HTML pages and an llms.txt index file

48 lines (47 loc) 1.27 kB
/** * Title extraction strategies * Multiple strategies for extracting document titles from HTML */ import { DEFAULT_DOCUMENT_TITLE, HTML_SELECTORS } from '../constants'; import { selectText } from '../utils/html'; /** * Extract title from first h1 element found anywhere on the page * @internal */ const extractFirstH1 = (tree) => { const firstH1 = selectText(tree, HTML_SELECTORS.H1); return firstH1 || null; }; /** * Extract title from document title tag * @internal */ const extractDocumentTitle = (tree) => { const fullTitle = selectText(tree, HTML_SELECTORS.TITLE); if (fullTitle) { const parts = fullTitle.split('|'); if (parts.length > 1 && parts[0]) { return parts[0].trim(); } return fullTitle.trim(); } return null; }; /** * Default title extractor strategy chain * @internal */ const defaultTitleExtractors = [extractFirstH1, extractDocumentTitle]; /** * Extract title using a strategy chain approach * @internal */ export function extractTitle(tree, extractors = defaultTitleExtractors) { for (const extractor of extractors) { const title = extractor(tree); if (title) { return title; } } return DEFAULT_DOCUMENT_TITLE; }