@jackdbd/eleventy-plugin-text-to-speech
Version:
Eleventy plugin that uses text-to-speech to generate audio assets for your website, then injects audio players in your HTML.
178 lines • 7.14 kB
JavaScript
import defDebug from 'debug';
import { JSDOM } from 'jsdom';
import { calculate, compare } from 'specificity';
import { z } from 'zod';
import { transform_name } from '@jackdbd/zod-schemas/eleventy';
import { DEBUG_PREFIX, DEFAULT_REGEX, DEFAULT_TRANSFORM_NAME, ERR_PREFIX, OK_PREFIX } from '../constants.js';
import { insertAudioPlayerMatchingXPathExpression } from '../dom/mutations.js';
import { logErrors } from '../log.js';
import { rule } from '../schemas/rule.js';
import { textToAudioAsset } from '../text-to-audio-asset.js';
import { validatedDataOrThrow } from '../validation.js';
import { aggregateRules } from '../aggregate-rules.js';
export const config_schema = z
.object({
rules: z.array(rule).min(1),
transformName: transform_name.default(DEFAULT_TRANSFORM_NAME)
})
.describe(`11ty transform ${DEFAULT_TRANSFORM_NAME} config`);
export const injectAudioTagsUnbounded = async (config, content, outputPath) => {
const cfg = validatedDataOrThrow(config, config_schema);
const transformName = cfg.transformName || DEFAULT_TRANSFORM_NAME;
const debug = defDebug(`${DEBUG_PREFIX}:${transformName}`);
debug(`11ty transform ${transformName} invoked on ${outputPath}`);
const matches = cfg.rules
.map((r, i) => {
// If we skip validation with zod, regex might be undefined. So we assign
// the default value now.
const regex = r.regex || DEFAULT_REGEX;
return regex.test(outputPath)
? { idx: i, regex, matched: true }
: { idx: i, regex, matched: false };
})
.filter((d) => d.matched);
if (matches.length === 0) {
debug(`${outputPath} does NOT match any regex pattern, so the transform ${transformName} will NOT transform it`);
return content;
}
else {
debug(`${matches.length} regex pattern/s match ${outputPath} %O`, {
regexes: matches.map((m) => m.regex)
});
}
const dom = new JSDOM(content);
// Step 1: find all CSS selector matches and XPath expression matches, across
// ALL rules, to find ALL texts of this document that should be synthesized
// into speech.
const res = aggregateRules({ dom, rules: cfg.rules, matches });
if (res.error) {
throw new Error(`${ERR_PREFIX} ${res.error.message}`);
}
const rec = res.value;
const configs = Object.entries(rec).map(([hash, v]) => {
return {
hash,
text: v.text,
'CSS selectors': v.cssSelectors.length,
'XPath expressions': v.xPathExpressions.length,
'synthesis configurations': v.synthesis_configs.length,
'hosting configurations': v.hosting_configs.length
};
});
debug(`${configs.length} audio player/s will be injected in ${outputPath} %O`, configs);
// Step 2: wait for ALL audio assets of ONE text to be generated, so we have
// all hrefs for the audio player corresponding to that text.
const promises = Object.values(rec).map(async (value) => {
const { audioInnerHTML, cssSelectors, hosting_configs, synthesis_configs, text, xPathExpressions } = value;
const assetsPromises = synthesis_configs.map((synthesis, i) => {
return textToAudioAsset({
text,
synthesis,
hosting: hosting_configs[i]
});
});
const results = await Promise.all(assetsPromises);
let cssSelector = undefined;
if (cssSelectors.length === 1) {
cssSelector = cssSelectors[0];
}
else if (cssSelectors.length > 0) {
cssSelector = cssSelectors
.map((selector) => {
return { selector, specificity: calculate(selector) };
})
.sort((a, b) => {
return compare(a.specificity, b.specificity);
})[0].selector;
}
else {
cssSelector = undefined;
}
// Is there a way to establish a priority among the XPath expressions? I mean,
// between //p[contains(., "Hello")] and //p[contains(., "Hello World")], I
// would argue the latter is more specific and should take precedence.
const xPathExpression = xPathExpressions.length > 0 ? xPathExpressions[0] : undefined;
const hm = {
audioInnerHTML,
text,
hrefs: [],
errors: [],
cssSelector,
xPathExpression
};
results.forEach((res) => {
if (res.error) {
hm.errors.push(res.error);
}
else {
hm.hrefs.push(res.value.href);
}
});
return hm;
});
const maps = await Promise.all(promises);
const counter = {};
const doc = dom.window.document;
// Step 3: for each text, inject ONE audio player for that text, with ALL the
// hrefs corresponding to the audio sources available for that text.
const errors = [];
const successes = [];
maps.forEach((hm) => {
const audioInnerHTML = hm.audioInnerHTML;
const expression = hm.xPathExpression;
const hrefs = hm.hrefs;
hm.errors.forEach((err) => errors.push(err));
// Picking the first matching XPath expression even when there is a matching
// CSS selector is a somewhat arbitrary decision. I might revisit this
// decision in the future.
if (expression) {
const idx = counter[expression] || 0;
const res = insertAudioPlayerMatchingXPathExpression({
audioInnerHTML,
expression,
dom,
hrefs,
idx
});
if (idx) {
counter[expression]++;
}
else {
counter[expression] = 1;
}
if (res.error) {
errors.push(res.error);
}
}
if (!hm.xPathExpression && hm.cssSelector) {
const elements = doc.querySelectorAll(hm.cssSelector);
if (elements.length === 0) {
return {
error: new Error(`no elements found matching CSS selector ${hm.cssSelector}`)
};
}
const idx = counter[hm.cssSelector];
let elem;
if (idx) {
elem = elements[idx];
counter[hm.cssSelector]++;
}
else {
elem = elements[0];
counter[hm.cssSelector] = 1;
}
const position = 'afterend';
const innerHTML = `<audio controls>${audioInnerHTML(hrefs)}</audio>`;
elem.insertAdjacentHTML(position, innerHTML);
}
if (hm.errors.length === 0) {
successes.push(`inserted all audio players in ${outputPath}`);
}
});
logErrors(errors, outputPath);
if (successes.length > 0) {
console.log(`${OK_PREFIX} inserted audio players in ${outputPath}`);
}
return dom.serialize();
};
//# sourceMappingURL=transforms.js.map