swordjs
Version:
swordjs - access modules from crosswire.org/sword in JS
70 lines (67 loc) • 3.33 kB
text/coffeescript
bcv_parser::regexps.space = "[\\s\\xa0]"
bcv_parser::regexps.escaped_passage = ///
(?:^ | $PRE_PASSAGE_ALLOWED_CHARACTERS ) # Beginning of string or not in the middle of a word or immediately following another book. Only count a book if it's part of a sequence: `Matt5John3` is OK, but not `1Matt5John3`
(
# Start inverted book/chapter (cb)
(?:
(?: ch (?: apters? | a?pts?\.? | a?p?s?\.? )? \s*
\d+ \s* (?: [\u2013\u2014\-] | through | thru | to) \s* \d+ \s*
(?: from | of | in ) (?: \s+ the \s+ book \s+ of )?\s* )
| (?: ch (?: apters? | a?pts?\.? | a?p?s?\.? )? \s*
\d+ \s*
(?: from | of | in ) (?: \s+ the \s+ book \s+ of )?\s* )
| (?: \d+ (?: th | nd | st ) \s*
ch (?: apter | a?pt\.? | a?p?\.? )? \s* #no plurals here since it's a single chapter
(?: from | of | in ) (?: \s+ the \s+ book \s+ of )? \s* )
)? # End inverted book/chapter (cb)
\x1f(\d+)(?:/\d+)?\x1f #book
(?:
/\d+\x1f #special Psalm chapters
| $VALID_CHARACTERS
| $TITLE (?! [a-z] ) #could be followed by a number
| $PASSAGE_COMPONENTS
| $AB (?! \w ) #a-e allows 1:1a
| $ #or the end of the string
)+
)
///gi
# These are the only valid ways to end a potential passage match. The closing parenthesis allows for fully capturing parentheses surrounding translations (ESV**)**. The last one, `[\d\x1f]` needs not to be +; otherwise `Gen5ff` becomes `\x1f0\x1f5ff`, and `adjust_regexp_end` matches the `\x1f5` and incorrectly dangles the ff.
bcv_parser::regexps.match_end_split = ///
\d \W* $TITLE
| \d \W* $FF (?: [\s\xa0*]* \.)?
| \d [\s\xa0*]* $AB (?! \w )
| \x1e (?: [\s\xa0*]* [)\]\uff09] )? #ff09 is a full-width closing parenthesis
| [\d\x1f]
///gi
bcv_parser::regexps.control = /[\x1e\x1f]/g
bcv_parser::regexps.pre_book = "$PRE_BOOK_ALLOWED_CHARACTERS"
bcv_parser::regexps.first = "$FIRST\\.?#{bcv_parser::regexps.space}*"
bcv_parser::regexps.second = "$SECOND\\.?#{bcv_parser::regexps.space}*"
bcv_parser::regexps.third = "$THIRD\\.?#{bcv_parser::regexps.space}*"
bcv_parser::regexps.range_and = "(?:[&\u2013\u2014-]|$AND|$TO)"
bcv_parser::regexps.range_only = "(?:[\u2013\u2014-]|$TO)"
# Each book regexp should return two parenthesized objects: an optional preliminary character and the book itself.
bcv_parser::regexps.get_books = (include_apocrypha, case_sensitive) ->
books = [
osis: ["Ps"]
apocrypha: true
extra: "2"
regexp: ///(\b)( # Don't match a preceding \d like usual because we only want to match a valid OSIS, which will never have a preceding digit.
Ps151
# Always follwed by ".1"; the regular Psalms parser can handle `Ps151` on its own.
)(?=\.1)///g # Case-sensitive because we only want to match a valid OSIS.
,
$BOOK_REGEXPS
]
# Short-circuit the look if we know we want all the books.
return books if include_apocrypha is true and case_sensitive is "none"
# Filter out books in the Apocrypha if we don't want them. `Array.map` isn't supported below IE9.
out = []
for book in books
continue if include_apocrypha is false and book.apocrypha? and book.apocrypha is true
if case_sensitive is "books"
book.regexp = new RegExp book.regexp.source, "g"
out.push book
out
# Default to not using the Apocrypha
bcv_parser::regexps.books = bcv_parser::regexps.get_books false, "none"