@mintlify/scraping
Version:
Scrape documentation frameworks to Mintlify docs
107 lines (92 loc) • 3 kB
text/typescript
import { existsSync, mkdirSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { SUPPORTED_MEDIA_EXTENSIONS } from '../constants.js';
import type { Result } from '../types/result.js';
import { getErrorMessage } from './errors.js';
import { getFileExtension } from './extension.js';
import { write } from './file.js';
import { fetchImage } from './network.js';
export async function downloadImage(
src: string,
rootPath: string
): Promise<Result<[string, string]>> {
if (src.startsWith('data:image/')) {
return { success: true, data: [src, src] };
}
try {
let filename = await writeImageToFile(src, rootPath);
filename = filename.replace(process.cwd(), '');
return { success: true, data: [src, filename] };
} catch (error) {
if (error instanceof Error) {
return { success: false, message: error.message };
} else {
return {
success: false,
message: `${src} - an unknown error occurred downloading this image`,
};
}
}
}
async function writeImageToFile(src: string, rootPath: string): Promise<string> {
const filename = removeMetadataFromImageSrc(src);
const imagePath = join(rootPath, filename);
const shortenedFilename = filename.length > 36 ? `...${filename.slice(-36)}` : filename;
if (!isValidImageSrc(filename)) {
throw new Error(`${shortenedFilename} - file extension not supported`);
}
if (existsSync(imagePath)) {
return imagePath;
}
try {
mkdirSync(dirname(imagePath), { recursive: true });
} catch (error) {
throw new Error(`${imagePath} - failed to create directory`);
}
try {
const imageData = await fetchImage(src);
write(imagePath, imageData);
return imagePath;
} catch (error) {
const errorMessage = getErrorMessage(error);
throw new Error(`${shortenedFilename} - failed to download file from source${errorMessage}`);
}
}
export function isValidImageSrc(src: string) {
if (!src) {
return false;
}
const ext = getFileExtension(src);
if (ext && !SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
return false;
}
return true;
}
export function getFilenameBeforeMetadata(src: string, ext: string): string {
const lengthUntilMetadata = src.indexOf(`.${ext}`) + `.${ext}`.length;
return src.slice(0, lengthUntilMetadata);
}
export function removeMetadataFromImageSrc(src: string): string {
let filename = '';
if (src.includes('gitbook/image')) {
for (const ext of SUPPORTED_MEDIA_EXTENSIONS) {
if (src.includes(`.${ext}`)) {
filename = getFilenameBeforeMetadata(src, ext);
}
}
}
if (!filename) {
if (src.startsWith('http')) {
src = new URL(src).pathname;
}
filename =
decodeURIComponent(
src
.split('#')[0]!
.split('?')[0]!
.replace(/[\/]{2,}/g, '/')
).replace(/(?:_{2,}|[\s%#&{}\\<>*?$!'":@+`|=])/g, '-') || 'image';
return filename;
}
return filename.split('%2F').slice(4).join('%2F');
}