extwee
Version:
A story compiler tool using Twine-compatible formats
523 lines (451 loc) • 16.5 kB
JavaScript
import { Story } from '../Story.js';
import Passage from '../Passage.js';
import { decode } from 'html-entities';
/**
* Lightweight HTML parser for web builds - specifically for Twine 2 HTML parsing
* This replaces node-html-parser to reduce bundle size
*/
class LightweightTwine2Parser {
constructor(html) {
this.html = html;
this.doc = null;
this.usingDOMParser = false;
// Parse HTML using browser's native DOMParser if available, otherwise fallback
if (typeof DOMParser !== 'undefined') {
try {
const parser = new DOMParser();
this.doc = parser.parseFromString(html, 'text/html');
this.usingDOMParser = true;
// Check if parsing was successful (DOMParser doesn't throw errors, but creates error documents)
const parserError = this.doc.querySelector('parsererror');
if (parserError) {
console.warn('DOMParser encountered an error, falling back to regex parsing:', parserError.textContent);
this.doc = this.createSimpleDOM(html);
this.usingDOMParser = false;
}
} catch (error) {
console.warn('DOMParser failed, falling back to regex parsing:', error.message);
this.doc = this.createSimpleDOM(html);
this.usingDOMParser = false;
}
} else {
// Fallback for environments without DOMParser
this.doc = this.createSimpleDOM(html);
this.usingDOMParser = false;
}
}
getElementsByTagName(tagName) {
if (this.usingDOMParser && this.doc && this.doc.getElementsByTagName) {
// Use native DOM methods when DOMParser is available and working
const elements = Array.from(this.doc.getElementsByTagName(tagName));
// Convert DOM elements to our expected format
return elements.map(element => {
const attributes = {};
// Extract attributes using DOM methods - much more reliable than regex
if (element.attributes) {
for (let i = 0; i < element.attributes.length; i++) {
const attr = element.attributes[i];
// DOM automatically handles HTML entity decoding
attributes[attr.name] = attr.value;
}
}
return {
attributes,
innerHTML: element.innerHTML || '',
rawText: element.textContent || element.innerText || ''
};
});
}
// Fallback implementation for environments without DOMParser or when DOM parsing fails
if (tagName === 'tw-storydata') {
return this.extractStoryDataElements();
}
if (tagName === 'tw-passagedata') {
return this.extractPassageDataElements();
}
if (tagName === 'style') {
return this.extractStyleElements();
}
return [];
}
extractStoryDataElements() {
const storyDataRegex = /<tw-storydata[^>]*>([\s\S]*?)<\/tw-storydata>/gi;
const elements = [];
let match;
while ((match = storyDataRegex.exec(this.html)) !== null) {
const elementHtml = match[0];
const attributes = this.parseAttributes(elementHtml);
const innerHTML = match[1];
elements.push({
attributes,
innerHTML,
rawText: innerHTML
});
}
return elements;
}
extractPassageDataElements() {
const passageDataRegex = /<tw-passagedata[^>]*>([\s\S]*?)<\/tw-passagedata>/gi;
const elements = [];
let match;
while ((match = passageDataRegex.exec(this.html)) !== null) {
const elementHtml = match[0];
const attributes = this.parseAttributes(elementHtml);
const textContent = this.extractTextContent(match[1]);
elements.push({
attributes,
rawText: textContent
});
}
return elements;
}
extractStyleElements() {
const styleRegex = /<style[^>]*>([\s\S]*?)<\/style>/gi;
const elements = [];
let match;
while ((match = styleRegex.exec(this.html)) !== null) {
const elementHtml = match[0];
const attributes = this.parseAttributes(elementHtml);
const textContent = match[1];
elements.push({
attributes,
rawText: textContent,
innerHTML: textContent
});
}
return elements;
}
parseAttributes(elementHtml) {
const attributes = {};
// Extract just the opening tag to avoid getting attributes from nested elements
const openingTagMatch = elementHtml.match(/^<[^>]*>/);
if (!openingTagMatch) return attributes;
const openingTag = openingTagMatch[0];
// Enhanced attribute parsing to handle multiple formats:
// 1. Quoted attributes: name="value" or name='value'
// 2. Unquoted attributes: name=value
// 3. Boolean attributes: hidden, selected, etc.
// First, handle quoted attributes (including those with escaped quotes)
const quotedAttributeRegex = /(\w+(?:-\w+)*)=["']([^"']*)["']/g;
let match;
while ((match = quotedAttributeRegex.exec(openingTag)) !== null) {
// Decode basic HTML entities in attribute values
const value = match[2]
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/&/g, '&'); // This should be last
attributes[match[1]] = value;
}
// Handle unquoted attributes (but avoid matching already processed quoted ones)
let tagWithoutQuoted = openingTag;
const quotedMatches = [...openingTag.matchAll(quotedAttributeRegex)];
quotedMatches.forEach(quotedMatch => {
tagWithoutQuoted = tagWithoutQuoted.replace(quotedMatch[0], '');
});
const unquotedAttributeRegex = /(\w+(?:-\w+)*)=([^\s>]+)/g;
while ((match = unquotedAttributeRegex.exec(tagWithoutQuoted)) !== null) {
if (!attributes[match[1]]) { // Don't overwrite quoted attributes
attributes[match[1]] = match[2];
}
}
// Handle boolean attributes (attributes without values)
const booleanAttributeRegex = /\s(\w+(?:-\w+)*)(?=\s|>|$)/g;
while ((match = booleanAttributeRegex.exec(openingTag)) !== null) {
const attrName = match[1];
// Only add if it's not already parsed as a key=value attribute and not the tag name
if (!attributes[attrName] && !openingTag.includes(`${attrName}=`) && attrName !== openingTag.match(/<(\w+)/)?.[1]) {
attributes[attrName] = true;
}
}
return attributes;
}
extractTextContent(html) {
// Remove HTML tags and decode basic entities
return html
.replace(/<[^>]*>/g, '') // Remove HTML tags
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/&/g, '&') // This should be last
.trim();
}
// eslint-disable-next-line no-unused-vars
createSimpleDOM(_html) {
// Minimal DOM-like object for fallback when DOMParser is not available
// This should only be used in very limited environments (like some older Node.js versions)
return {
getElementsByTagName: (tagName) => {
// Use regex-based extraction as fallback
if (tagName === 'tw-storydata') {
return this.extractStoryDataElements();
}
if (tagName === 'tw-passagedata') {
return this.extractPassageDataElements();
}
if (tagName === 'style') {
return this.extractStyleElements();
}
return [];
}
};
}
}
/**
* Web-optimized Twine 2 HTML parser with reduced dependencies
* Parse Twine 2 HTML into Story object using lightweight DOM parsing
*
* See: Twine 2 HTML Output Specification
* (https://github.com/iftechfoundation/twine-specs/blob/master/twine-2-htmloutput-spec.md)
*
* Produces warnings for:
* - Missing name attribute on `<tw-storydata>` element.
* - Missing IFID attribute on `<tw-storydata>` element.
* - Malformed IFID attribute on `<tw-storydata>` element.
* @function parse
* @param {string} content - Twine 2 HTML content to parse.
* @returns {Story} Story object based on Twine 2 HTML content.
* @throws {TypeError} Content is not a string.
* @throws {Error} Not Twine 2 HTML content!
* @throws {Error} Cannot parse passage data without name!
* @throws {Error} Passages are required to have PID!
*/
function parse(content) {
// Create new story.
const story = new Story();
// Can only parse string values.
if (typeof content !== 'string') {
throw new TypeError('TypeError: Content is not a string!');
}
// Set default start node.
let startNode = null;
// Use lightweight parser for web builds
const dom = new LightweightTwine2Parser(content);
// Pull out the `<tw-storydata>` element.
const storyDataElements = dom.getElementsByTagName('tw-storydata');
// Did we find any elements?
if (storyDataElements.length === 0) {
// If there is not a single `<tw-storydata>` element, this is not a Twine 2 story!
throw new TypeError('TypeError: Not Twine 2 HTML content!');
}
// We only parse the first element found.
const storyData = storyDataElements[0];
/**
* name: (string) Required.
* The name of the story.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'name')) {
// Validate that the name is a non-empty string before setting
const nameValue = storyData.attributes.name;
if (typeof nameValue === 'string' && nameValue.trim().length > 0) {
story.name = nameValue.trim();
} else {
console.warn('Warning: The name attribute is empty or invalid on tw-storydata!');
// Keep the default name from Story constructor
}
} else {
// Name is a required field. Warn user.
console.warn('Warning: The name attribute is missing from tw-storydata!');
}
/**
* ifid: (string) Required.
* An IFID is a sequence of between 8 and 63 characters,
* each of which shall be a digit, a capital letter or a
* hyphen that uniquely identify a story (see Treaty of Babel).
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'ifid')) {
// Validate that the IFID is a non-empty string before setting
const ifidValue = storyData.attributes.ifid;
if (typeof ifidValue === 'string' && ifidValue.trim().length > 0) {
story.IFID = ifidValue.trim();
} else {
console.warn('Warning: The ifid attribute is empty or invalid on tw-storydata!');
}
} else {
// IFID is a required field. Warn user.
console.warn('Warning: The ifid attribute is missing from tw-storydata!');
}
// Check if the IFID has valid formatting (only if IFID was set).
if (story.IFID && story.IFID.match(/^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/) === null) {
// IFID is not valid.
console.warn('Warning: The IFID is not in valid UUIDv4 formatting on tw-storydata!');
}
/**
* creator: (string) Optional.
* The name of program used to create the file.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'creator')) {
// Update story creator
story.creator = storyData.attributes.creator;
}
/**
* creator-version: (string) Optional.
* The version of program used to create the file.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'creator-version')) {
// Update story creator version
story.creatorVersion = storyData.attributes['creator-version'];
}
/**
* format: (string) Optional.
* The story format used when publishing file.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'format')) {
// Update story format
story.format = storyData.attributes.format;
}
/**
* format-version: (string) Optional.
* The version of story format used when publishing file.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'format-version')) {
// Update story format version
story.formatVersion = storyData.attributes['format-version'];
}
/**
* startnode: (string) Optional.
* The PID of the starting passage.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'startnode')) {
// Update start node
startNode = storyData.attributes.startnode;
}
/**
* zoom: (string) Optional.
* Zoom level between 0.25 (25%) and 4.0 (400%).
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'zoom')) {
// Convert to Number and save
story.zoom = Number(storyData.attributes.zoom);
}
/**
* options: (string) Optional.
* String of comma-separated key-value pairs for story.
* Each pair is separated by a comma.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'options')) {
// Update metadata with options
const options = storyData.attributes.options;
// Split by comma and parse each key-value pair
if (options.length > 0) {
const pairs = options.split(',');
pairs.forEach(pair => {
const [key, value] = pair.split(':');
if (key && value) {
story.metadata[key.trim()] = value.trim();
}
});
}
}
/**
* hidden: (string) Optional.
* String of passage names that should not be included in the output.
*/
if (Object.prototype.hasOwnProperty.call(storyData.attributes, 'hidden')) {
// Update metadata with hidden passages
story.metadata.hidden = storyData.attributes.hidden;
}
// Parse tag colors from style elements
const styleElements = dom.getElementsByTagName('style');
styleElements.forEach(styleElement => {
const styleContent = styleElement.innerHTML || styleElement.rawText || '';
// Look for tag color definitions
const tagColorRegex = /tw-story-tag-(.+?)\s*{\s*color:\s*(.+?)\s*}/g;
let match;
while ((match = tagColorRegex.exec(styleContent)) !== null) {
const tagName = match[1];
const color = match[2];
story.tagColors[tagName] = color;
}
});
// Pull out the `<tw-passagedata>` elements.
const storyPassages = dom.getElementsByTagName('tw-passagedata');
// Move through the passages.
storyPassages.forEach(passage => {
// Get the passage attributes.
const attr = passage.attributes;
// Get the passage text and decode HTML entities.
const text = decode(passage.rawText);
/**
* name: (string) Required.
* The name of the passage.
*/
if (!Object.prototype.hasOwnProperty.call(attr, 'name')) {
// Name is required! Warn user and skip passage.
console.warn('Warning: Cannot parse passage data without name!');
return;
}
// Get passage name.
const name = attr.name;
/**
* pid: (string) Required.
* The Passage ID (PID).
*/
if (!Object.prototype.hasOwnProperty.call(attr, 'pid')) {
// PID is required! Throw error.
throw new Error('Error: Passages are required to have PID!');
}
/**
* position: (string) Optional.
* Comma-separated X and Y coordinates of the passage within Twine 2.
*/
let position = null;
if (Object.prototype.hasOwnProperty.call(attr, 'position')) {
position = attr.position;
}
/**
* size: (string) Optional.
* Comma-separated width and height of the passage within Twine 2.
*/
let size = null;
if (Object.prototype.hasOwnProperty.call(attr, 'size')) {
size = attr.size;
}
/**
* tags: (string) Optional.
* Space-separated list of passage tags, if any.
*/
let tags = [];
if (Object.prototype.hasOwnProperty.call(attr, 'tags')) {
if (attr.tags.length > 0 && attr.tags !== '""') {
tags = attr.tags.split(' ').filter(tag => tag !== '');
}
}
/**
* metadata: (object) Optional.
* An object containing additional metadata about the passage.
*/
const metadata = {};
// Does position exist?
if (position !== null) {
metadata.position = position;
}
// Does size exist?
if (size !== null) {
metadata.size = size;
}
/**
* pid: (string) Required.
* The Passage ID (PID).
*/
const pid = attr.pid;
// If the PID is the start node, update the story start.
if (startNode === pid) {
story.start = name;
}
// Add the passage.
story.addPassage(
new Passage(
name,
text,
tags,
metadata
)
);
});
// Return story object.
return story;
}
export { parse };