screenplay-js
Version:
A modern Typescript, Foutain screenplay parser. Convert Final Draft (.fdx) files to Fountain, and then parse Fountain markdown to HTML.
199 lines (165 loc) • 6.08 kB
text/typescript
import { IToken } from '../interfaces';
import { sections } from './sections';
class ScriptTokenizer {
/**
* Cleans the script text to allow the parser to break
* the script into correct tokens, pages, etc.
*
* @param { String } script
* @returns { String }
*/
clean(script: string): string {
var s = script.replace(sections.boneyard, '\n$1\n')
.replace(sections.standardizer, '\n')
.replace(sections.cleaner, '')
.replace(sections.whitespacer, '')
// PR: Treat whitespace-only lines as empty
// https://github.com/nathanhoad/fountain-js/pull/2
.replace(/^\s+$/gm, '');
return s;
}
/**
* Return an array of token objects breaking the screenplay into type, text, etc. tokens.
*
* @param { String } script - Text of the screenplay
* @returns { Array<any> }
*/
tokenize(script: string): any[] {
let script_lines: string[] = this.clean(script).split(sections.splitter),
match: any,
parts: any,
text: any,
meta: any,
tokens: IToken[] = [],
scene_number: number = 0;
for (let i = 0; i < script_lines.length; i++) {
let line = script_lines[i];
// title page
if (sections.title_page.test(line)) {
match = line.replace(sections.title_page, '\n$1').split(sections.splitter);
for (let x = 0, length = match.length; x < length; x++) {
parts = match[x].replace(sections.cleaner, '').split(/\:\n*/);
// Handle titles with colons in them
if (parts.length > 2) {
const colonTitle = parts.slice(1).map(p => p.trim()).join(': ')
tokens.push({ type: parts[0].trim().toLowerCase().replace(' ', '_'), text: colonTitle.trim() });
} else {
tokens.push({ type: parts[0].trim().toLowerCase().replace(' ', '_'), text: parts[1].trim() });
}
}
continue;
}
/**
* Scene headings
*
* @todo:
* - Figure out what the match fields are returning,
* and how to update the parsed meta results
*/
if (match = line.match(sections.scene_heading)) {
text = match[1] || match[2];
if (text.indexOf(' ') !== text.length - 2) {
if (meta = text.match(sections.scene_number)) {
meta = meta[2];
text = text.replace(sections.scene_number, '');
}
if (meta) {
scene_number = meta
}
tokens.push({ type: 'scene_heading', text: text, scene_number: scene_number });
// increment scene number
scene_number += 1
}
continue;
}
// centered
if (match = line.match(sections.centered)) {
tokens.push({ type: 'centered', text: match[0].replace(/>|</g, '') });
continue;
}
// transitions
if (match = line.match(sections.transition)) {
tokens.push({ type: 'transition', text: match[1] || match[2] });
continue;
}
// dialogue blocks - characters, parentheticals and dialogue
if (match = line.match(sections.dialogue)) {
if (match[1].indexOf(' ') !== match[1].length - 2) {
// PR: Fixed the bug where parentheticals are after the dialogue
// https://github.com/nathanhoad/fountain-js/pull/7
// parts = match[3].split(/(\(.+\))(?:\n+)/).reverse();
parts = match[3].split(/(\(.+\))(?:\n+)/);
let dual_diaglogue = !!match[2];
if (dual_diaglogue) {
// If dual dialogue, we need to traverse back four indexes
// and insert those into a dual dialogue block
// Get last index of the last inserted dialogue block
let lastDialogueBeginIndex: number = 0;
for (let idx = tokens.length - 1; idx >= 0; idx--) {
if (tokens[idx].type === 'dialogue_begin') {
lastDialogueBeginIndex = idx;
break;
}
}
const leftDualDialogueBlocks = tokens.splice(lastDialogueBeginIndex)
tokens.push({ type: 'dual_dialogue_begin' });
// Insert previous dialogue block into dual dialogue
tokens = tokens.concat(leftDualDialogueBlocks)
}
tokens.push({ type: 'dialogue_begin' });
tokens.push({ type: 'character', text: match[1].trim() });
for (let x = 0, length = parts.length; x < length; x++) {
text = parts[x].trim();
if (text.length > 0) {
tokens.push({ type: sections.parenthetical.test(text) ? 'parenthetical' : 'dialogue', text: text });
}
}
tokens.push({ type: 'dialogue_end' });
if (dual_diaglogue) {
tokens.push({ type: 'dual_dialogue_end' });
}
continue;
}
}
// section
if (match = line.match(sections.section)) {
tokens.push({ type: 'section', text: match[2], depth: match[1].length });
continue;
}
// synopsis
if (match = line.match(sections.synopsis)) {
tokens.push({ type: 'synopsis', text: match[1] });
continue;
}
// notes
if (match = line.match(sections.note)) {
tokens.push({ type: 'note', text: match[1] });
continue;
}
// boneyard
if (match = line.match(sections.boneyard)) {
tokens.push({ type: match[0][0] === '/' ? 'boneyard_begin' : 'boneyard_end' });
continue;
}
// page breaks
if (sections.page_break.test(line)) {
tokens.push({ type: 'page_break' });
continue;
}
// line breaks
if (sections.line_break.test(line)) {
tokens.push({ type: 'line_break' });
continue;
}
// lyrics
if (sections.lyrics.test(line)) {
tokens.push({ type: 'lyrics', text: line });
continue;
}
tokens.push({ type: 'action', text: line });
}
return tokens;
}
};
const tokenizer = new ScriptTokenizer()
export { tokenizer }