everywherebible
Version:
A Bible app for slow connections
335 lines (289 loc) • 8.93 kB
JavaScript
// @flow
const last = a => a[a.length - 1];
type TagEvent = {
+type: 'tag',
+name: string,
+isStart: boolean,
+attributes: {[string]: string},
+value: string,
};
type TextEvent = {
+type: 'text',
+value: string,
};
type ParseEvent = TagEvent | TextEvent;
type ParseEventWithStack = ParseEvent & {+stack: Array<TagEvent>};
export function* tagsAndText(html: string): Generator<ParseEvent, void, void> {
const context: Array<any> = [{type: 'document', start: 0}];
let index = -1;
let metadata: any = {};
while (index++ < html.length) {
const ctx: any = last(context);
if (ctx.type === 'document') {
if (html[index] === '<') {
if (ctx.start !== index)
yield {
type: 'text',
value: html.substring(ctx.start, index),
};
context.push({type: 'tag', start: index});
metadata = {isStart: html[index + 1] !== '/', attributes: {}};
if (html[index + 1] === '/')
index++;
context.push({type: 'tagname', start: index + 1});
}
} else if (ctx.type === 'tag') {
if (html[index] === '>') {
if (ctx.type === 'tagname')
metadata.name = html.substring(metadata.isStart? 1 : 2, index);
yield Object.assign({
type: 'tag',
value: html.substring(ctx.start, index + 1),
}, metadata);
context.pop();
const lastCtx: any = last(context);
lastCtx.start = index + 1;
} else if (html[index] !== ' ' && html[index] !== '/') {
context.push({type: 'attribute', start: index});
context.push({type: 'attributeName', start: index});
}
} else if (ctx.type === 'tagname') {
if (html[index] === ' ' || html[index] === '>') {
metadata.name = html.substring(ctx.start, index);
context.pop();
if (html[index] === '>')
index--;
}
} else if (ctx.type === 'attributeName') {
if (html[index] === '=') {
const attributeName = html.substring(ctx.start, index);
context.pop();
const lastCtx: any = last(context);
lastCtx.name = attributeName;
if (html[index + 1] === '"') {
lastCtx.isQuoted = true;
index++;
}
lastCtx.start = index + 1;
}
} else if (ctx.type === 'attribute') {
const isAttribteEnd = ctx.isQuoted?
html[index] === '"' && html[index - 1] !== '\\' :
html[index] === ' ' || html[index] === '>';
if (isAttribteEnd) {
metadata.attributes[ctx.name] = html.substring(ctx.start, index);
context.pop();
if (!ctx.isQuoted && html[index] === '>')
index--;
}
}
}
const ctx: any = last(context);
if (ctx.type === 'document' && ctx.start < index - 1)
yield {type: 'text', value: html.substring(ctx.start, index)};
}
export function* stripTags(tagName: string, tagsAndText: Iterable<ParseEvent>):
Generator<ParseEvent, void, void> {
let inObjectTag = false;
for (let item of tagsAndText) {
if (item.type === 'tag' &&
item.isStart &&
item.name === tagName) {
inObjectTag = true;
} else if (item.type === 'tag' &&
!item.isStart &&
item.name === tagName) {
inObjectTag = false;
} else if (!inObjectTag) {
yield item;
}
}
}
export function* addSpansAroundVerses(tagsAndText: Iterable<ParseEvent>):
Generator<ParseEvent, void, void> {
let inVerse = false;
for (let item of tagsAndText) {
if (inVerse &&
item.type === 'tag' &&
item.name === 'p' &&
!item.isStart) {
yield {
type: 'tag',
isStart: false,
value: '</span>',
name: 'span',
attributes: {},
};
inVerse = false;
} else if (item.type === 'tag' &&
/^(chapter|verse)-num(\s.*)?$/.test(item.attributes.class)) {
if (inVerse) {
yield {
type: 'tag',
isStart: false,
value: '</span>',
name: 'span',
attributes: {},
};
}
inVerse = true;
const id = item.attributes.id.replace(/^v/, 'vt');
yield {
type: 'tag',
isStart: true,
value: `<span class="verse" id="${id}">`,
name: 'span',
attributes: {class: 'verse', id},
};
}
yield item;
}
}
export function* withTagStack(tagsAndText: Iterable<ParseEvent>):
Generator<ParseEventWithStack, void, void> {
const stack: Array<TagEvent> = [];
for (let item of tagsAndText) {
if (item.type === 'tag')
yield {
type: 'tag',
name: item.name,
isStart: item.isStart,
attributes: item.attributes,
value: item.value,
stack: stack.slice(0),
};
else
yield {...item, stack: stack.slice(0)};
if (item.type === 'tag')
if (item.isStart)
stack.push(item);
else
stack.pop();
}
}
export function* addDropCapsClassToFirstLetter(
tagsAndTextWithStack: Iterable<any>): Generator<ParseEvent, void, void> {
let sawFirstReferenceNumber = false;
let sawClosingSpanOrB = false;
let addedClass = false;
for (let item of tagsAndTextWithStack) {
if (addedClass) {
yield item;
continue;
}
const {type, name, isStart, value, attributes, stack} = item;
if (!sawFirstReferenceNumber &&
type === 'tag' &&
/^(chapter|verse)-num$/.test(attributes.class)) {
sawFirstReferenceNumber = true;
yield item;
continue;
}
if (sawFirstReferenceNumber &&
!sawClosingSpanOrB &&
type === 'tag' &&
(name === 'span' || name === 'b') &&
!isStart) {
sawClosingSpanOrB = true;
yield item;
continue;
}
if (sawClosingSpanOrB &&
(last(stack) &&
last(stack).attributes &&
(last(stack).attributes.class === 'verse' ||
last(stack).attributes.class === 'woc')) &&
type === 'text') {
const parts = value.match(/^([\s"]*)(&#?[\w]+;)*([\w“])(.*)/);
if (!parts) {
yield item;
continue;
}
const [, leadingWhitespace, leadingEntities, firstLetter, rest] = parts;
if (leadingWhitespace)
yield {type: 'text', value: leadingWhitespace};
yield {
type: 'tag',
isStart: true,
value: `<span class="first-letter">`,
name: 'span',
attributes: {class: 'first-letter'},
}
if (leadingEntities)
yield {type: 'text', value: leadingEntities};
else
yield {type: 'text', value: firstLetter};
yield {
type: 'tag',
isStart: false,
value: '</span>',
name: 'span',
attributes: {},
};
if (rest)
if (leadingEntities)
yield {type: 'text', value: firstLetter + rest};
else
yield {type: 'text', value: rest};
addedClass = true;
continue;
}
yield item;
}
}
export function* withSurroundingEvents(tagsAndText: Iterable<any>):
Generator<any, void, void> {
const window = [null];
for (let item of tagsAndText) {
if (window.length === 3)
yield Object.assign({before: window[0], after: window[2]}, window[1]);
window.push(item);
if (window.length > 3)
window.shift();
}
yield Object.assign({before: window[0], after: window[2]}, window[1]);
yield Object.assign({before: window[1], after: null}, window[2]);
}
export function* stripCopyright(
tagsAndTextWithSurroundingEvents: Iterable<any>):
Generator<any, void, void> {
let inTag = false;
for (let item of tagsAndTextWithSurroundingEvents) {
if (item.type === 'text' &&
item.value[item.value.length - 1] === '(' &&
item.after.attributes &&
item.after.attributes.class === 'copyright') {
yield {
type: 'text',
value: item.value.substring(0, item.value.length - 1),
};
} else if (item.type === 'tag' &&
item.attributes.class === 'copyright') {
// Don't yield this item
inTag = true;
} else if (inTag) {
if (item.type === 'tag' && !item.isStart)
inTag = false;
} else if (item.type === 'text' &&
item.value[0] === ')' &&
last(item.before.stack).attributes &&
last(item.before.stack).attributes.class === 'copyright') {
const value = item.value.substring(1);
if (value)
yield {type: 'text', value};
} else {
yield item;
}
}
}
export default (text: string): string => Array.from(
stripCopyright(
withSurroundingEvents(
addDropCapsClassToFirstLetter(
withTagStack(
addSpansAroundVerses(
stripTags('script',
stripTags('object',
tagsAndText(text)))))))))
.map(i => i.value)
.join('');