@mintlify/scraping
Version:
Scrape documentation frameworks to Mintlify docs
170 lines (151 loc) • 5.19 kB
text/typescript
import type { Element, ElementContent } from 'hast';
import { visit, EXIT, CONTINUE } from 'unist-util-visit';
import { assertIsDefined } from '../assert.js';
import type { HastNode, HastNodeIndex, HastNodeParent } from '../types/hast.js';
import { turnChildrenIntoMdx } from '../utils/children.js';
import { findImg } from '../utils/img.js';
import { findTitle } from '../utils/title.js';
export function gitBookScrapeCard(
node: HastNode,
_: HastNodeIndex,
__: HastNodeParent
): Element | undefined {
if (
(node.tagName !== 'a' && node.tagName !== 'div') ||
!node.properties.className ||
!Array.isArray(node.properties.className) ||
!node.properties.className
.join(' ')
.startsWith(
'group grid shadow-1xs shadow-dark/[0.02] rounded-md straight-corners:rounded-none dark:shadow-transparent'
)
) {
return undefined;
}
let firstTextElement: Element | undefined = undefined;
visit(node, 'element', function (subNode, index, parent) {
if (
!subNode.properties.className ||
!Array.isArray(subNode.properties.className) ||
subNode.properties.className.join(' ') !== 'w-full space-y-2 lg:space-y-3 leading-normal'
)
return CONTINUE;
firstTextElement = subNode;
if (parent && typeof index === 'number') {
parent.children.splice(index, 1);
}
return EXIT;
});
const title = findTitle(firstTextElement);
const imgSrc = findImg(node);
const newNode: Element = {
type: 'element',
tagName: 'Card',
properties: {
title: title,
},
children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
};
if (node.properties.href) newNode.properties.href = node.properties.href;
if (imgSrc) newNode.properties.img = imgSrc;
return newNode;
}
export function readmeScrapeCard(
node: HastNode,
_: HastNodeIndex,
parent: HastNodeParent
): Element | undefined {
if (
(node.tagName !== 'div' && node.tagName !== 'a') ||
!node.properties.className ||
!Array.isArray(node.properties.className) ||
(!node.properties.className.includes('Tile') &&
!node.properties.className.includes('card') &&
!node.properties.className.includes('Card') &&
!node.properties.className.includes('docs-card') &&
!node.properties.className.includes('next-steps__step') &&
!node.properties.className.join(' ').includes('_card') &&
!node.properties.className.join(' ').includes('-card'))
) {
return undefined;
}
const title = findTitle(node);
let href: string | undefined = undefined;
if (node.properties.href) {
href = node.properties.href as string;
} else if (node.properties.onclick && typeof node.properties.onclick === 'string') {
const str = node.properties.onclick.split("'")[1];
href = str ? `./${str}` : undefined;
} else {
visit(node, 'element', function (subNode) {
if (subNode.properties.href) {
href = subNode.properties.href as string;
return EXIT;
} else if (subNode.properties.onclick && typeof node.properties.onclick === 'string') {
const str = node.properties.onclick.split("'")[1];
href = str ? `./${str}` : undefined;
return EXIT;
}
});
}
assertIsDefined(parent);
const newNode: Element = {
type: 'element',
tagName: 'Card',
properties: {
title: title,
href: href,
},
children: turnChildrenIntoMdx(node.children as Array<Element>) as Array<ElementContent>,
};
return newNode;
}
export function docusaurusScrapeCard(
node: HastNode,
_: HastNodeIndex,
parent: HastNodeParent
): Element | undefined {
if (
(node.tagName !== 'div' && node.tagName !== 'a') ||
!node.properties.className ||
!Array.isArray(node.properties.className) ||
(!node.properties.className.includes('Tile') &&
!node.properties.className.includes('card') &&
!node.properties.className.includes('Card') &&
!node.properties.className.includes('docs-card') &&
!node.properties.className.join(' ').includes('_card') &&
!node.properties.className.join(' ').includes('-card'))
) {
return undefined;
}
const title = findTitle(node);
let href: string | undefined = undefined;
if (node.properties.href) {
href = node.properties.href as string;
} else if (node.properties.onclick && typeof node.properties.onclick === 'string') {
const str = node.properties.onclick.split("'")[1];
href = str ? `./${str}` : undefined;
} else {
visit(node, 'element', function (subNode) {
if (subNode.properties.href) {
href = subNode.properties.href as string;
return EXIT;
} else if (subNode.properties.onclick && typeof node.properties.onclick === 'string') {
const str = node.properties.onclick.split("'")[1];
href = str ? `./${str}` : undefined;
return EXIT;
}
});
}
assertIsDefined(parent);
const newNode: Element = {
type: 'element',
tagName: 'Card',
properties: {
title: title,
href: href,
},
children: turnChildrenIntoMdx(node.children as Array<Element>) as Array<ElementContent>,
};
return newNode;
}