@yemreak/yt-dlp
Version:
Downloading videos and subtitles using yt-dlp, with utilities for extracting text from subtitles
111 lines • 4.03 kB
JavaScript
import { chmodSync } from "fs";
import fetch from "node-fetch";
import { platform } from "os";
import { downloadFile, execAsync } from "./utils.js";
/**
* Downloads the latest release of yt-dlp from GitHub
* - chmod +x the file if it's not on Windows
*/
export async function downloadLatestRelease(outdir = ".") {
const url = `https://api.github.com/repos/yt-dlp/yt-dlp/releases`;
const response = await fetch(url);
const data = (await response.json());
const os = platform();
const fileName = os === "win32" ? "yt-dlp.exe" : "yt-dlp";
const latestRelease = data[0].assets.find(({ name }) => name === fileName);
const filepath = `${outdir}/${fileName}`;
if (!latestRelease)
throw new Error("Failed to find the latest release");
await downloadFile(latestRelease.browser_download_url, filepath);
if (os !== "win32")
chmodSync(filepath, "755");
return filepath;
}
export function execYtDlp(options) {
const args = [`"${options.url}"`];
if (options.outputPath)
args.push("--output", `"${options.outputPath}"`);
if (options.cookies)
args.push("--cookies", `"${options.cookies}"`);
if (options.dumpJson)
args.push("--dump-json");
if (options.format)
args.push("--format", options.format);
else
args.push("--format", "b");
if (options.subtitle)
args.push(options.subtitle.auto ? "--write-auto-sub" : "--write-sub", options.subtitle.lang ? `--sub-lang ${options.subtitle}` : "", "--skip-download", "--convert-subs", "srt");
const command = `${options.binaryPath} ${args.join(" ")}`;
return execAsync(command);
}
/**
* Parses the filenames from the output of yt-dlp
*/
export function parseFilenamesFromOutput(stdout) {
const paths = [];
const alreadyDownloadedRegex = /\[download\] (.+) has already been downloaded/g;
for (const match of stdout.matchAll(alreadyDownloadedRegex)) {
paths.push(match[1]);
}
const destinationRegex = /Destination: (.+)/g;
for (const match of stdout.matchAll(destinationRegex)) {
paths.push(match[1]);
}
if (paths.length === 0)
throw new Error(`Failed to parse filename from output: ${stdout}`);
return paths;
}
/**
* Extracts text from subtitle data.
* @param subtitleData - The subtitle data containing events with text segments.
* @returns The concatenated text from all subtitles.
*/
export function extractTextFromJson3Subtitle(subtitleData) {
return subtitleData.events
.flatMap(event => event.segs.map(segment => segment.utf8))
.join(" ")
.replace(/[\r\n]+/g, " ");
}
/**
* Extracts text from VTT subtitle data.
* @param subtitleData - The subtitle data in VTT format.
* @returns The concatenated text from all subtitles.
*/
export function extractTextFromVttSubtitle(subtitleData) {
return subtitleData
.split("\n")
.filter(line => !line.startsWith("WEBVTT") && !line.startsWith("NOTE"))
.join(" ")
.replace(/[\r\n]+/g, " ");
}
/**
* Cleans and extracts text from SRT subtitle data.
* - Omits line numbers and timestamps.
* - Consolidates multiple spaces into one.
* - Converts dashes to spaces.
* - Removes duplicate consecutive lines.
* @param subtitleData Raw SRT subtitle content.
* @returns A clean, continuous string of subtitle text.
*/
export function extractTextFromSrtSubtitle(subtitleData) {
const lines = subtitleData.split("\n");
let previousLine = "";
const processedText = lines.reduce((acc, line) => {
line = line
.trim()
.replace(/^\d.*/g, "")
.replace(/-/g, " ")
.replace(/\s+/g, " ")
.trim();
if (line && line !== previousLine) {
acc.push(line);
previousLine = line;
}
return acc;
}, []);
return processedText.join(" ").trim();
}
export function isConflictError(error) {
return error?.code === "conflict_error";
}
//# sourceMappingURL=helpers.js.map