linguist-js
Version:
Analyse the programming languages used in a folder or from raw content, using the same rules that GitHub Linguist does.
41 lines (40 loc) • 1.56 kB
JavaScript
import YAML from 'js-yaml';
import Cache from 'node-cache';
import FS from 'node:fs';
import Path from 'node:path';
import { fileURLToPath } from 'node:url';
const cache = new Cache({});
const dirname = Path.dirname(fileURLToPath(import.meta.url));
async function loadWebFile(file) {
// Return cache if it exists
const cachedContent = cache.get(file);
if (cachedContent)
return cachedContent;
// Otherwise cache the request
const dataUrl = (file) => `https://raw.githubusercontent.com/github/linguist/HEAD/lib/linguist/${file}`;
// Load file content, falling back to the local file if the request fails
const fileContent = await fetch(dataUrl(file))
.then((data) => data.text())
.catch(async (x) => void x);
if (!fileContent) {
return await loadLocalFile(file);
}
cache.set(file, fileContent);
// Clean up lengthy files
if (file === 'generated.rb') {
return YAML.dump(parseGeneratedDataFile(fileContent));
}
return fileContent;
}
async function loadLocalFile(file) {
const filePath = Path.resolve(dirname, '../../../ext', file);
return FS.promises.readFile(filePath).then((buffer) => buffer.toString());
}
/** Nukes unused `generated.rb` file content. */
export function parseGeneratedDataFile(fileContent) {
return [...(fileContent.match(/(?<=name\.match\(\/).+?(?=(?<!\\)\/)/gm) ?? [])];
}
/** Load a data file from github-linguist. */
export function loadFile(file, offline = false) {
return offline ? loadLocalFile(file) : loadWebFile(file);
}