bible-reference-formatter
Version:
Utility that converts Bible references from OSIS to human-readable formats and Paratext.
667 lines (630 loc) • 25.4 kB
JavaScript
;
function getDefaults() {
return {
"b": "$b",
"c": "$c",
"v": "$v",
"-": "-",
",": ", ",
".": " ",
"c.v": ":",
"$chapters": ["ch", "chs"],
"$verses": ["v", "vv"],
"singleChapterFormat": "bv", // or `b` or `bcv`
"singleChapterBooks": ["Obad", "Phlm", "2John", "3John", "Jude", "PrAzar", "SgThree", "Sus", "Bel", "EpJer", "PrMan", "Ps151", "AddPs"],
"Ps151Format": "bc", // or `b`
"maxPs": 150
};
}
function OsisToReadable() {
// Some subset of "Matt.1.2-Mark.3.4"
const osisFormat = /^[1-5A-Za-z]{2,}(?:\.\d{1,3}(?:\.\d{1,3})?)?(?:-[1-5A-Za-z]{2,}(?:\.\d{1,3}(?:\.\d{1,3})?)?)?$/;
let books = {};
let options = getDefaults();
// Convert an OSIS string, and an optional OSIS context, to human-readable form. Aim for the shortest understandable string: `Matt.1-Matt.2` might become `Matthew 1-2`.
function toReadable(osisString, osisContext) {
if (typeof osisString !== "string") {
throw "osisToReadable: first argument must be a string.";
}
// Create a context object, using the supplied context (if available).
const context = setContext(osisContext);
// Separate the supplied OSISes into individual OSIS references.
const osises = osisString.split(",");
const tokens = [];
while (osises.length > 0) {
// Make sure we're dealing with a valid OSIS string. It throws an exception if there's a problem.
const osis = normalizeOsis(osises.shift());
// Tokenize the OSIS string.
tokens.push(osisToToken(osis, context));
// Add a separator token if there are more OSIS strings to tokenize. We don't want to add it at the end of the loop, which is why we use `while (true)` at the top.
if (osises.length > 0) {
tokens.push({
osis: ",",
type: ",",
parts: [],
laters: []
});
}
}
return tokensToReadable(tokens);
}
// Format an array of tokens in a sequence.
function tokensToReadable(tokens) {
// Calculate data that we may need about future tokens in the sequence.
tokens = annotateTokens(tokens);
const out = [];
// First iterate over each token.
while (tokens.length > 0) {
const token = tokens.shift();
if (typeof token.bookSequence !== "undefined") {
out.push(formatBookSequence(token, tokens));
} else {
out.push(tokenToReadable(token));
}
}
return out.join("");
}
// If given a sequence like `1John,2John`, we may want to turn that into `1 and 2 John`.
function formatBookSequence(token, tokens) {
// Really, we've already checked this, but we'll make Flow happy at the cost of losing 100% code coverage because the following `if` statement is never false. Most of the time the `while` loop won't return, so we still drop down to `tokenToReadable()`.
const sequenceArray = token.bookSequence;
if (typeof sequenceArray !== "undefined") {
// The `sequenceArray` includes the current token. If `length === 1`, then the only item left in the array is the current token, which we pass to `tokenToReadable()`.
while (sequenceArray.length > 1) {
// Look for a comma-joined sequence in `books`.
const bookSequence = sequenceArray.join(",");
if (typeof books[bookSequence] !== "undefined" && typeof books[bookSequence][0] === "string") {
// First remove future tokens that we've already taken care of in this sequence.
removeBookSequenceTokens(sequenceArray.length, tokens);
// And then return the desired book string.
return books[bookSequence][0];
}
sequenceArray.pop();
}
}
// Otherwise, there was no match, so handle the token as usual.
return tokenToReadable(token);
}
// Remove the number of items in the sequence. Let's say there are three items: `["1John", "2John", "3John"]`. `1John` is the current token, which is already gone from the array. That means we need to hop ahead two books, or `3 - 1`, to prevent us from stringifying the token again.
function removeBookSequenceTokens(numberToRemove, tokens) {
while (numberToRemove > 1) {
// Just remove sequence tokens (`type: ","`); we don't need them.
tokens.shift();
// Next is the `b` token we want to remove.
tokens.shift();
numberToRemove--;
}
}
// Format a single token.
function tokenToReadable(token) {
// First check to see if we have a book range to handle specially (`1John-2John` might become `1-2 John`).
if (typeof token.bookRange === "string" && typeof books[token.bookRange] !== "undefined" && typeof books[token.bookRange][0] === "string") {
return books[token.bookRange][0];
}
// Most of the time, iterate over its `parts` to build the output string.
const out = [];
for (let i = 0, max = token.parts.length; i < max; i++) {
out.push(formatPart(token, token.parts[i]));
}
return out.join("");
}
// Format a `part` in a `token.parts`.
function formatPart(token, part) {
let prefix = "";
switch (part.type) {
case "c":
case "v":
// If we specify a specific format for this `subType`, then calculate the value to prepend to the final output. For example, `^v` when a verse appears first in a string (when `osisToReadable()` has a `context`) might want a `vv. ` prefix.
if (part.subType !== "" && typeof options[part.subType] !== "undefined") {
prefix = formatVariable(part.subType, part, token);
}
// Fall through to `b`, which uses the same logic.
case "b":
return prefix + formatVariable(part.type, part, token);
// Punctuation tokens all use the same logic. They never have prefixes because they can never appear first in a string.
case ".":
case "-":
case ",":
// Falls through. `default` is only here to satisfy code coverage. There are no other cases.
default:
return formatVariable(getBestOption(part.type, part.subType), part, token);
}
}
// `options` can contain partial matches: `bc-bc`, `bc-b`, `c-bc`, `c-b`, `-bc`, `-b`, and `-` all match a `bc-bc` string. Take the best match that exists in options.
function getBestOption(splitChar, option) {
let [pre, post] = option.split(splitChar);
const postChars = post;
// Start by matching the full string. Progressively remove ending possibilities and then beginning possibilities. For `bc-bc`, it tries to find options in the following order, knowing that the last one, the `splitChar` on its own, will always match: `bc-bc`, `bc-b`, `c-bc`, `c-b`, `-bc`, `-`
for (let i = 0, length = pre.length; i <= length; i++) {
post = postChars;
while (post.length > 0) {
if (typeof options[`${ pre }${ splitChar }${ post }`] === "string") {
return `${ pre }${ splitChar }${ post }`;
}
// Remove the last character so that we match characters closer to `splitChar` (in `-bcv`, the `v` is the least-important part).
post = post.substr(0, post.length - 1);
}
pre = pre.substr(1);
}
return splitChar;
}
// Some values can have variables indicated by a `$`. Replace them if we can.
function formatVariable(optionsKey, part, token) {
let pattern = options[optionsKey];
// We can short-circuit all this logic if there are no `$`.
if (pattern.indexOf("$") === -1) {
return pattern;
}
// Replace `$chapters` with a literal value like `ch.` or `chs.` It's a RegExp rather than a string to allow the `/g` flag.
pattern = pattern.replace(/\$chapters/g, function () {
// If `books` defines a string to use (e.g., with `Ps.$chapters`, maybe you want "Ps. 3" rather than "ch. 3"), use that instead.
const arrayToUse = typeof books[`${ part.b }.$chapters`] === "undefined" ? options.$chapters : books[`${ part.b }.$chapters`];
// Only retun the plural if there's a plural variant to use and there are later chapters.
if (arrayToUse.length > 1 && multipleChaptersPosition(part, token) > 0) {
return arrayToUse[1];
}
return arrayToUse[0];
});
// It's a RegExp rather than a string to allow the `/g` flag.
pattern = pattern.replace(/\$verses/g, function () {
// Unlike `$chapters`, `$verses` doesn't require a check inside `books` since `Ps.1.2-Ps.1.3` is going to be `vv. 2-3`.
if (options.$verses.length > 1 && hasMultipleVerses(part, token) === true) {
return options.$verses[1];
}
return options.$verses[0];
});
pattern = pattern.replace(/\$b/g, function () {
// We know that `part.b` exists in `books` because it would have already thrown an exception in `osisWithContext` or `setContext`.
const maxPosition = books[part.b].length - 1;
// If there's only one possible book name, use that.
if (maxPosition === 0) {
return books[part.b][0];
}
// Otherwise, calculate the one that we prefer to use. [0] = singular; [1] = plural; [2] = book on its own.
const preferredPosition = multipleChaptersPosition(part, token);
if (preferredPosition > maxPosition) {
return books[part.b][maxPosition];
}
return books[part.b][preferredPosition];
});
// Don't accidentally insert `undefined` into a string.
pattern = pattern.replace(/\$c/g, typeof part.c === "string" ? part.c : "");
pattern = pattern.replace(/\$v/g, typeof part.v === "string" ? part.v : "");
return pattern;
}
// If a range or sequence has multiple chapters, we may want to change the output: `chapters 1-2` rather than `chapter 1-2`, for example. The return value is 0-2, indicating which `books[osis][]` is preferred.
function multipleChaptersPosition(part, token) {
// If we're looking at a whole book and there are multiple chapters in the book, we know there are multiple chapters.
if (token.type === "b" && !isSingleChapterBook(part.b)) {
// It's a book on its own, so prefer the full-book abbreviation if available.
return 2;
}
// If we're currently looking at a chapter, include it in our calculations.
let later = part.type === "c" ? "c" : "";
// All the part types until the next book.
later += part.laters.join("") + "," + token.laters.join(",");
// A chapter range always has multiple chapters.
if (later.indexOf("-c") >= 0) {
return 1;
}
// An unusual situation, a Psalms range into another book (e.g., `Ps.149-Prov.1` or `Ps.150-Prov.1`). In the first case, we want to use the plural; in the second, we want to use the singular. `later` only has a `-b` if it's in the current `part`--`token.laters` ends before it reaches the next book in a sequence.
if (part.b === "Ps" && later.indexOf("-b") >= 0) {
// Find the chapter number.
for (let i = 0, max = token.parts.length; i < max; i++) {
const otherPart = token.parts[i];
if (otherPart.type === "c") {
// If it's less than the number of Psalms, we know that more than one chapter is involved.
if (parseInt(otherPart.c, 10) < options.maxPs) {
return 1;
}
// Once we've looked at the chapter, we don't need to look any further. We know this situation doesn't apply.
break;
}
}
return 0;
}
// If there are two or more chapter instances in a sequence, we know there are multiple chapters. "cc" = "", "", ""
if (later.split("c").length > 2) {
return 1;
}
return 0;
}
// Possibly handle `verse 1` and `verses 1-2` differently.
function hasMultipleVerses(part, token) {
// If we're currently looking at a verse, include it in our calculations.
let later = part.type === "v" ? "v" : "";
later += part.laters.join("") + "," + token.laters.join(",");
// We only care about the current chapter.
const [thisChapter] = later.split("c");
// If there's a range, we know there are multiple verses.
if (thisChapter.indexOf("-") >= 0) {
return true;
}
// "vv" = "", "", ""
if (thisChapter.split("v").length > 2) {
return true;
}
return false;
}
// Add data we'll use later to construct the final output.
function annotateTokens(tokens) {
const out = [];
// We only need `prevToken` in a sequence, which is never first, so it's OK that this value is wrong on the first loop run.
let prevToken = tokens[0];
let i = 0;
while (tokens.length > 0) {
const token = tokens.shift();
// Never first.
if (token.type === ",") {
annotateSequenceToken(token, prevToken, tokens);
} else {
annotateToken(token, tokens, i);
}
// Add future context.
annotateTokenLaters(token, tokens);
annotateTokenParts(token.parts);
out.push(token);
prevToken = token;
i++;
}
return out;
}
// Add data to a single token that we can't derive elsewehre.
function annotateToken(token, tokens, i) {
// The first `part.type` could be `c` or `v` if `osisToReadable()` is provided a start context.
if (i === 0 && token.parts[0].type !== "b") {
// `c` or `v` or `cv`
const [pre] = token.type.split("-");
let prefix = "";
// Note that we're dealing with a single-chapter book in case we want to handle it differently.
if (isSingleChapterBook(token.parts[0].b)) {
prefix = "b1";
}
// Set the `subType` (`c` and `v` don't normally have a `subType`) for later use.
token.parts[0].subType = `${ prefix }^${ pre }`;
}
}
// Create a sequence `token.parts` with the keys we'll need later.
function annotateSequenceToken(token, prevToken, tokens) {
const prevPart = prevToken.parts[prevToken.parts.length - 1];
// If preceded or followed by a range, only use the parts closest to the sequence token: `bcv-cv,bc-v` returns the subType `cv,bc`.
const prevTypes = prevToken.type.split("-");
const nextTypes = tokens[0].type.split("-");
token.parts = [{
type: ",",
// Indicates the preceding and following token types so that we can join different types differently if we want.
subType: `${ prevTypes.pop() },${ nextTypes[0] }`,
b: prevPart.b,
c: prevPart.c,
v: prevPart.v,
laters: []
}];
}
// Fill in the `laters` array for each token.
function annotateTokenLaters(token, tokens) {
const currentType = token.type;
const bookSequence = [];
// If we have a `b` sequence, we want to continue past when we would normally stop.
let keepCheckingBooks = false;
// But we still want to stop adding `token.laters`.
let latersDone = false;
if (currentType === "b") {
bookSequence.push(token.parts[0].b);
keepCheckingBooks = true;
}
for (let i = 0, max = tokens.length; i < max; i++) {
const laterToken = tokens[i];
const laterType = laterToken.type;
// Doing it this way avoids possible leading and trailing `,`.
if (laterType === ",") {
continue;
}
// If we're still in a `b` sequence, keep going.
if (keepCheckingBooks === true) {
if (laterType === "b") {
bookSequence.push(laterToken.parts[0].b);
} else {
keepCheckingBooks = false;
}
}
// Stop here if we've already encountered a book and don't need to go further.
if (latersDone === true) {
continue;
}
// Only go as far as the next book.
if (laterType.indexOf("b") >= 0) {
const [pre] = laterType.split("b");
// Only add it to the array if there's something to add. We don't care about empty strings.
if (pre.length > 0) {
token.laters.push(pre);
}
if (keepCheckingBooks === false) {
break;
}
latersDone = true;
// If there's not a book in `laterType`, then we know we need to keep looking until we find one or reach the end of the array.
} else {
token.laters.push(laterType);
}
}
if (bookSequence.length > 1) {
token.bookSequence = bookSequence;
}
}
// Add `laters` to each `token.parts`.
function annotateTokenParts(parts) {
const laters = [];
const max = parts.length;
// First we need to know what all the `laters` are.
for (let i = 0; i < max; i++) {
laters.push(parts[i].type);
}
// Then we can add them to each `part`.
for (let i = 0; i < max; i++) {
// The first element is the current type.
laters.shift();
// Create a copy rather than a reference and remove spacing parts (`.`).
parts[i].laters = laters.filter(function (value) {
return value !== ".";
});
}
}
// Convert an OSIS string to a single token.
function osisToToken(osis, context) {
// `end` may be undefined.
const [start, end] = osis.split("-");
const startToken = osisWithContext(start, context);
// If there's no `end`, we don't need to do any further processing. `startToken` is itself a complete token.
if (end === undefined) {
return startToken;
}
const endToken = osisWithContext(end, context);
// Construct a unified set of `parts`, including the range.
const parts = startToken.parts;
parts.push(makeRangePart(startToken, endToken));
const token = {
osis,
type: `${ startToken.type }-${ endToken.type }`,
// Add the end `parts` to the array.
parts: parts.concat(endToken.parts),
laters: []
};
// We may want to handle certain book-only ranges differently (`1John-2John` might be `1-2 John`).
if (token.type === "b-b") {
token.bookRange = startToken.parts[0].b + "-" + endToken.parts[0].b;
}
return token;
}
// Construct a range token with data from one preceding and following token.
function makeRangePart(startToken, endToken) {
const prevPart = startToken.parts[startToken.parts.length - 1];
const range = {
type: "-",
// We may need to know what kind of objects it's joining.
subType: `${ startToken.type }-${ endToken.type }`,
b: prevPart.b,
laters: []
};
// Only add these if they exist in the previous part. These values reflect the current context, not the future context.
if (typeof prevPart.c !== "undefined") {
range.c = prevPart.c;
}
if (typeof prevPart.v !== "undefined") {
range.v = prevPart.v;
}
return range;
}
// Tokenize a non-range OSIS string, taking context into account. The goal is to omit needless parts: if your context is `Matt.1` and the current OSIS is `Matt.2`, we can omit the book from the return token.
function osisWithContext(osis, context) {
// `c` and `v` may be undefined. `c` always exists if `v` does.
const [b, c, v] = osis.split(".");
// Don't try to guess if we encounter an unexpected book.
if (typeof books[b] === "undefined") {
throw `Unknown OSIS book: "${ b }" (${ osis })"`;
}
const out = {
osis,
type: "",
parts: [],
laters: []
};
const isSingleChapter = isSingleChapterBook(b);
// If there's an end verse, if we've set the relevant option to `bv` rather than `bcv`, and if the book only has one chapter, then we want to omit the chapter: `Phlm.1.1` = `Philemon 1`.
const omitChapter = isSingleChapter && typeof v === "string" && (options.singleChapterFormat === "bv" || options.singleChapterFormat === "b");
// Returns true if there's a chapter. It modifies `out` in-place.
if (osisBookWithContext(b, c, v, isSingleChapter, omitChapter, context, out) === false) {
return out;
}
// Returns true if there's a verse. It modifies `out` in-place.
if (osisChapterWithContext(b, c, v, omitChapter, context, out) === false) {
return out;
}
// We know there's a verse.
out.type += "v";
out.parts.push({
type: "v",
subType: "",
b,
c,
v,
laters: []
});
context.v = parseInt(v, 10);
return out;
}
// Handle a "book" part.
function osisBookWithContext(b, c, v, isSingleChapter, omitChapter, context, out) {
// If we're looking at something like `Phlm-Phlm.1` or `Matt-Phlm.1`, we may want to treat `Phlm.1` as a book rather than include a chapter. This is unusual.
if (v === undefined && isSingleChapter === true && options.singleChapterFormat === "b") {
context.b = b, context.c = 0, context.v = 0;
out.type = "b";
out.parts.push({
type: "b",
subType: "",
b,
laters: []
});
return false;
}
// Gen.1,Exod = "Exod" || Gen.1,Gen = "Gen"
if (b !== context.b || c === undefined) {
// Do it this way to keep the reference to the original object.
context.b = b, context.c = 0, context.v = 0;
out.parts.push({
type: "b",
subType: "",
b,
laters: []
});
out.type = "b";
// There's no chapter, so we don't need to continue.
if (c === undefined) {
return false;
}
// In general, the `subType` will be `b.c`.
let subType = "b.c";
// If we want to omit the chapter reference in a single-chapter book.
if (omitChapter === true) {
subType = "b.v";
// It's a single-chapter book, but there's no end verse.
} else if (isSingleChapter === true) {
// `b1` means a single-chapter book.
subType = "b1.c";
}
// We know there's a chapter, so insert the joiner.
out.parts.push({
type: ".",
subType,
b,
laters: []
});
}
// Otherwise, we know that it's the same book as in `context` and there's a chapter.
return true;
}
// Handle a "chapter" part.
function osisChapterWithContext(b, c, v, omitChapter, context, out) {
// We already know that the context book and current book are the same.
if (parseInt(c, 10) !== context.c || v === undefined) {
// We need to set `context.v` because we don't know that `osisBookWithContext()` reset it.
context.c = parseInt(c, 10), context.v = 0;
// If only `Phlm.1`, we want to include the chapter if `options.singleChapterFormat === "bv"`, but omit the chapter if it's `=== "b"`. `omitChapter` is always `false` when `v === undefined`.
if (omitChapter === false) {
out.parts.push({
type: "c",
subType: "",
b,
c,
laters: []
});
out.type += "c";
// There's no verse, so we don't need any further processing.
if (v === undefined) {
return false;
}
// We know there's a verse, so insert the joiner.
out.parts.push({
type: ".",
subType: "c.v",
b,
laters: []
});
}
}
// Otherwise, we know that it's the same book and chapter as in `context` and that there's a verse.
return true;
}
// Only checks if the book is in an array in `options`.
function isSingleChapterBook(b) {
return options.singleChapterBooks.indexOf(b) >= 0;
}
// Confirm that the string matches the expected format of an OSIS string. Throw an exception if not. Also handle Ps151 quirks.
function normalizeOsis(osis) {
if (osisFormat.test(osis) === false) {
throw `Invalid osis format: '${ osis }'`;
}
// If we want to treat Ps151 as just another Psalm (so `Ps151.1.2` might output `Psalm 151:2`).
if (options.Ps151Format === "bc") {
// Remove the chapter and treat it as `Ps.151`.
osis = osis.replace(/(?:Ps151|AddPs)(?:\.\d+\b)?/g, "Ps.151");
}
return osis;
}
// Given an optional string context, create a `context` obect.
function setContext(osis) {
// We always only want these three keys. Flow doesn't like calling `Object.seal`, however.
const out = {
b: "",
c: 0,
v: 0
};
// There's no provided context.
if (osis == null) {
return out;
}
// Don't allow sequences.
osis = normalizeOsis(osis);
// Use the end value of the range if there is one.
if (osis.indexOf("-") >= 0) {
const [, end] = osis.split("-");
return setContext(end);
}
const [b, c, v] = osis.split(".");
if (typeof books[b] === "undefined") {
throw `Unknown OSIS book provided for "context": "${ b }" (${ osis })"`;
}
// `b` always exists.
out.b = b;
// `c` and `v` don't necessarily exist. `v` only exists if `c` does.
if (typeof c === "string") {
out.c = parseInt(c, 10);
if (typeof v === "string") {
out.v = parseInt(v, 10);
}
}
return out;
}
// Override any default parameters with user-supplied parameters. Also make sure `userOptions` has all the keys we need. Each call is independent, which means that `userOptions` should contain all the keys to override defaults.
function setOptions(userOptions) {
// Reset them to the default.
options = getDefaults();
// No need to match properties if there's no argument.
if (userOptions == null) {
return;
}
// We want to ensure type consistency, so we don't just use `Object.assign`. Flow complains if we just iterate over `Object.keys()`.
const userKeys = Object.keys(userOptions);
for (let i = 0, max = userKeys.length; i < max; i++) {
const key = userKeys[i];
const defaultType = typeof options[key];
// If it's not in `defaults`, or if its type matches, set it.
if (defaultType === "undefined" || typeof userOptions[key] === defaultType) {
options[key] = userOptions[key];
// Otherwise, the types don't match.
} else {
throw `Invalid type for options["${ key }"]. It should be "${ defaultType }".`;
}
}
}
// Set valid books and abbreviations. It takes an object where each key is the OSIS book (e.g., `Matt`), and each value is a one- or two-item array. The first item is the book name to use, and the second item is the book name to use for plural cases. For example: `{"Ps": ["Psalm", "Psalms"]`. You can also use a special key of the type `OSIS.$chapters` (e.g., `Ps.$chapters`), which overrides any chapter abbreviations. For example, `{"Ps": ["Ps.", "Pss."]` could result in `Psalms 1:2, Pss. 3, 4` if given the OSIS `Ps.1.2,Ps.3,Ps.4`.
function setBooks(userBooks) {
books = {};
Object.keys(userBooks).forEach(function (key) {
const value = userBooks[key];
if (Array.isArray(value) === false) {
throw `books["${ key }"] should be an array: ${ Object.prototype.toString.call(value) }.`;
}
if (value.length < 1 || value.length > 3) {
throw `books["${ key }"] should have exactly 1, 2, or 3 items. `;
}
books[key] = value;
});
}
return {
toReadable,
setOptions,
setBooks
};
}
/* global module */
module.exports = OsisToReadable;