@mintlify/scraping
Version:
Scrape documentation frameworks to Mintlify docs
143 lines (126 loc) • 4.21 kB
text/typescript
import { Tab } from '@mintlify/models';
import type { Root as HastRoot, Element } from 'hast';
import { visit, EXIT, CONTINUE } from 'unist-util-visit';
import { framework } from '../utils/detectFramework.js';
import { findTitle, getTitleFromLink } from '../utils/title.js';
export function retrieveTabLinks(rootNode: HastRoot, url: URL): Array<Tab> | undefined {
if (
framework.vendor !== 'readme' &&
framework.vendor !== 'docusaurus' &&
framework.vendor !== 'gitbook'
) {
return undefined;
}
let element: Element | undefined = undefined as Element | undefined;
visit(rootNode, 'element', function (node) {
if (framework.vendor === 'readme') {
if (
node.tagName === 'header' &&
node.properties.className &&
Array.isArray(node.properties.className) &&
node.properties.className.includes('rm-Header')
) {
element = node;
return EXIT;
}
}
if (framework.vendor === 'docusaurus') {
if (
node.tagName === 'nav' &&
node.properties.className &&
Array.isArray(node.properties.className) &&
node.properties.className.includes('navbar')
) {
element = node;
return EXIT;
}
}
if (framework.vendor === 'gitbook') {
if (
node.tagName === 'nav' &&
node.properties.id === 'sections' &&
node.properties.ariaLabel === 'Sections'
) {
element = node;
return EXIT;
}
}
});
if (!element) return undefined;
const links: Array<Tab> = [];
visit(element, 'element', function (node) {
switch (framework.vendor) {
case 'readme':
if (
node.tagName !== 'nav' &&
!(
node.tagName === 'div' &&
node.properties.className &&
Array.isArray(node.properties.className) &&
node.properties.className.includes('rm-Header-right')
)
)
return CONTINUE;
visit(node, 'element', function (subNode) {
if (
subNode.tagName !== 'a' ||
!subNode.properties.href ||
typeof subNode.properties.href !== 'string' ||
subNode.properties.href.startsWith('http')
)
return CONTINUE;
const title = findTitle(subNode);
links.push({
name: title || getTitleFromLink(subNode.properties.href),
url: subNode.properties.href,
});
});
break;
case 'docusaurus':
if (node.tagName !== 'nav') return CONTINUE;
visit(node, 'element', function (subNode, _, parent) {
if (
subNode.tagName !== 'a' ||
!subNode.properties.href ||
typeof subNode.properties.href !== 'string' ||
subNode.properties.href.startsWith('http') ||
!parent ||
parent.type !== 'element' ||
!Array.isArray(parent.properties.className) ||
parent.properties.className.length !== 1 ||
parent.properties.className[0] !== 'navbar__items' ||
parent.properties.className.includes('navbar__items--right')
)
return CONTINUE;
const title = findTitle(subNode);
links.push({
name: title || getTitleFromLink(subNode.properties.href),
url: subNode.properties.href,
});
});
break;
case 'gitbook':
if (node.tagName !== 'nav') return CONTINUE;
visit(node, 'element', function (subNode, _, parent) {
if (
subNode.tagName !== 'a' ||
!subNode.properties.href ||
typeof subNode.properties.href !== 'string' ||
!parent ||
parent.type !== 'element'
)
return CONTINUE;
const title = findTitle(subNode);
const link = new URL(subNode.properties.href);
links.push({
name: title || getTitleFromLink(subNode.properties.href),
url: link.origin === url.origin ? link.pathname : link.toString(),
});
});
break;
default:
break;
}
});
return links;
}