hyperpubee
Version:
Self-publishing over the decentralised internet
420 lines (336 loc) • 11.7 kB
JavaScript
const PubeeBuilder = require('./pubee-builder')
const {
LineCreator,
PoemCreator,
WorkCreator,
TitleCreator,
ContentCreator,
VerseCreator,
EmbeddingCreator,
ChapterCreator,
ChapterTitleCreator,
ParagraphCreator,
ProseCreator,
CollectionCreator,
LinkCreator
} = require('./part-creation')
const {
HYPERHASH_REGEX_STR,
KEYNAME_REGEX_STR,
HYPERHASH_REGEX,
METADATA_AUTHOR
} = require('../core/constants')
const { Embedding } = require('../core/embedding')
const { NotAnIntegerError, EmbeddingValidationError } = require('../exceptions')
const { getIndexedKeyFromComponents } = require('../core/utils')
const { Link } = require('../core/link')
const NEWLINE_REGEX = /\r\n|\r|\n/g
// Note that the matching of the patch is extremely unstrict
// Any errors there are expected to be detected when validating the embedding
// Note: will fail on patches containing the string ']]'
const PATCH_REGEX_STR = String.raw`\[\[.*?\]\]`
// DEVNOTE: '(?:' is a non-capturing group
// (not captured since we don't care about the sub-parts of a location)
const LOCATION_REGEX_STR = String.raw`(?:${KEYNAME_REGEX_STR}/)+\d+`
const EMBEDDING_REGEX = new RegExp(
String.raw`<(${HYPERHASH_REGEX_STR})/(${LOCATION_REGEX_STR})(${PATCH_REGEX_STR})?>`,
'g'
)
const LINK_REGEX = new RegExp(
String.raw`^\[(?<hash>${HYPERHASH_REGEX_STR})(/(?<location>${LOCATION_REGEX_STR}))?\]$`
)
const CHAPTER_START_REGEX = /\n(?=##)/
const WorkTypeEnum = Object.freeze({
POETRY: Symbol('HyperPubee poetry'),
PROSE: Symbol('HyperPubee prose'),
COLLECTION: Symbol('HyperPubee collection')
})
function trimAndCleanText (text) {
text = text.trim(NEWLINE_REGEX)
const lines = text.split(NEWLINE_REGEX)
const newLines = []
for (const line of lines) {
const trimmedLine = line.trim()
// DEVNOTE: only trim whitespace of lines which are completely blank
// (containing only whitespace)
if (trimmedLine === '') {
newLines.push(trimmedLine)
} else {
newLines.push(line)
}
}
return newLines.join('\n')
}
function parseTitle (workTxt, creators) {
// Note: Assumes the text is clean
// (as after running cleanNewlinesAndTrimWhiteSpace)
let title
let nonTitleText = workTxt
if (workTxt[0] === '#') {
// Title found
const endOfFirstLineI = workTxt.indexOf('\n')
title = workTxt.slice(1, endOfFirstLineI).trim()
// Throws an error when no or more than 1 blank line is found
// after the perceived title, to avoid unexpected behaviour
if (
workTxt[endOfFirstLineI + 1] !== '\n' ||
workTxt[endOfFirstLineI + 2] === '\n'
) {
throw Error(
`Expected exactly one blank line after the title (extracted title: '${title}')`
)
}
nonTitleText = workTxt.slice(endOfFirstLineI + 2)
}
if (title !== undefined) {
const titleContent = new ContentCreator(title)
const titleCreator = new TitleCreator([titleContent])
creators.push(titleCreator)
}
return nonTitleText
}
function getLocationInHyperpubeeFormat (location) {
const resParts = location.split('/')
try {
return getIndexedKeyFromComponents(resParts)
} catch (e) {
if (e instanceof NotAnIntegerError) {
throw new Error(
`Invalid location: expected an integer at the end (${location})`
)
} else {
throw e
}
}
}
function getPatchInHyperpubeeFormat (rawPatch) {
let patch = []
if (rawPatch !== undefined) {
try {
patch = JSON.parse(rawPatch)
} catch (e) {
throw new EmbeddingValidationError(`Patch has invalid structure: '${rawPatch}'`)
}
}
return patch
}
function parseTextUnit (unit) {
// Parsing base text units (lines in poetry, paragraphs in prose)
// There are 2 groups per matched embedding:
// 1) the hash
// 2) the reference within the hash
// Note that the split method splices in the matched groups
// The strategy for processing embeddings will be to detect elements
// of the split line which are hypercore hashes (the first matched group)
const unitSplitByEmbeddings = unit.split(EMBEDDING_REGEX)
const subCreators = []
let i = 0
while (i < unitSplitByEmbeddings.length) {
const unitPart = unitSplitByEmbeddings[i]
if (unitPart.match(HYPERHASH_REGEX)) {
// This is an embedding hash, and the next 2 elements the relative
// location within the hash and the patch (which can be undefined)
// (Note that we ignore the extremely unlikely case that just before or after
// an actual embedding there were 64+ consecutive characters
// accidentally matching the hyperhash-detection regex)
const rawLoc = unitSplitByEmbeddings[i + 1]
const referencedLocation = getLocationInHyperpubeeFormat(rawLoc)
const rawPatch = unitSplitByEmbeddings[i + 2]
const patch = getPatchInHyperpubeeFormat(rawPatch)
const embedding = new Embedding({
referencedHash: unitPart,
referencedLocation,
patch
})
subCreators.push(new EmbeddingCreator(embedding))
i += 3 // Skip the referenced location and patch groups
} else {
// Normal (non-embedding) content
if (unitPart.length > 0) {
subCreators.push(new ContentCreator(unitPart))
}
i += 1
}
}
return subCreators
}
function parseVerse (verseAsText) {
const lineCreators = []
const lines = verseAsText.split('\n')
for (const line of lines) {
const subCreators = parseTextUnit(line, lineCreators)
const lineCreator = new LineCreator(subCreators)
lineCreators.push(lineCreator)
}
const verseCreator = new VerseCreator(lineCreators)
return verseCreator
}
function splitInChapterBodies (text) {
const chapterBodies = text.split(CHAPTER_START_REGEX)
if (chapterBodies.length === 1) {
// Insert an artificial chapter when no chapters found
// (note: poems with one chapter are not assumed to exist)
if (chapterBodies[0].slice(0, 2) === '##') {
throw new Error(
`Text with only 1 chapter--do not use chapters then\n${chapterBodies}`
)
}
chapterBodies[0] = ['##\n\n', chapterBodies[0]].join('')
} else if (chapterBodies[0].slice(0, 2) !== '##') {
throw new Error(
'If a text consists of chapters, it should not contain text before the first chapter ("##")'
)
}
return chapterBodies
}
function parseChapterTitle (chapterBody, chapterCreators) {
const splitChapterBody = chapterBody.split('\n')
const chapterTitle = splitChapterBody[0].slice(2).trim()
if (chapterTitle !== '') {
// Empty string => no title
const content = new ContentCreator(chapterTitle)
const chapterTitleCreator = new ChapterTitleCreator([content])
chapterCreators.push(chapterTitleCreator)
}
// Throws an error when no or more than 1 blank line is found
// after the perceived title, to avoid unexpected behaviour
if (splitChapterBody[1] !== '' || splitChapterBody[2] === '\n') {
throw Error(
`Expected exactly one blank line after the start of a new chapter)\n${chapterBody}`
)
}
const remainingChapterBody = splitChapterBody.slice(2).join('\n')
return remainingChapterBody
}
function parseChapter (chapterBody, isPoem) {
const subCreators = []
const remainingChapterBody = parseChapterTitle(chapterBody, subCreators)
// The trim removes any potential dangling new lines after the last
// verse of the chapter
const constituents = remainingChapterBody.trim().split('\n\n')
if (isPoem) {
for (const verse of constituents) {
const verseCreator = parseVerse(verse)
subCreators.push(verseCreator)
}
} else {
// Prose
for (const paragraph of constituents) {
const paragraphComponents = parseTextUnit(paragraph)
subCreators.push(new ParagraphCreator(paragraphComponents))
}
}
return subCreators
}
function parseChapters (chapterBodies, isPoem) {
const parsedChapters = []
for (const chapterBody of chapterBodies) {
if (chapterBody.slice(0, 2) !== '##') {
throw new Error(
[
"Logical bug in parse code: chapter expected to start with '##'",
chapterBody
].join('--Chapter body (starting at next line): \n')
)
}
const chapterSubCreators = parseChapter(chapterBody, isPoem)
parsedChapters.push(chapterSubCreators)
}
return parsedChapters
}
function makePoemOrProseCreator (parsedChapters, isPoem) {
let components = []
if (parsedChapters.length === 1) {
// 1 chapter => work with the bare verses
components = [...parsedChapters[0]]
} else {
for (const parsedChapter of parsedChapters) {
const chapterCreator = new ChapterCreator(parsedChapter)
components.push(chapterCreator)
}
}
if (isPoem) {
return new PoemCreator(components)
} else {
return new ProseCreator(components)
}
}
function parseWorkBody ({ body, isPoem, allCreators }) {
const chapterBodies = splitInChapterBodies(body)
const parsedChapters = parseChapters(chapterBodies, isPoem)
const workTypeCreator = makePoemOrProseCreator(parsedChapters, isPoem)
allCreators.push(workTypeCreator)
}
async function createWork ({ bee, workCreator, metadata }) {
const pubeeBuilder = new PubeeBuilder(bee)
await workCreator.addToPubeeBuilder(pubeeBuilder)
await pubeeBuilder.addMetadata(metadata)
const pubee = await pubeeBuilder.build()
return pubee
}
function extractWorkType (text) {
let cleanedText = text.trim()
let workType
if (cleanedText.slice(0, 7) === '[prose]') {
workType = WorkTypeEnum.PROSE
cleanedText = cleanedText.slice(7)
} else if (cleanedText.slice(0, 8) === '[poetry]') {
workType = WorkTypeEnum.POETRY
cleanedText = cleanedText.slice(8)
} else if (cleanedText.slice(0, 12) === '[collection]') {
workType = WorkTypeEnum.COLLECTION
cleanedText = cleanedText.slice(12)
} else {
throw new Error(
"A text should start with either '[poetry]', '[prose]' or '[collection]' to indicate its type"
)
}
return [workType, cleanedText]
}
function createMetadata (author) {
const metadata = {}
if (author != null) {
metadata[METADATA_AUTHOR] = author
}
return metadata
}
function parseLineWithLink (line) {
const cleanedLine = line.trim()
const match = cleanedLine.match(LINK_REGEX)
if (match === null) {
throw new Error(`Invalid link: '${cleanedLine}'`)
}
let location
if (match.groups.location !== undefined) {
location = getLocationInHyperpubeeFormat(match.groups.location)
}
return new Link({ linkedHash: match.groups.hash, linkedLocation: location })
}
function parseCollectionBody (body, allCreators) {
const lines = body.split(NEWLINE_REGEX)
const linkCreators = []
for (const line of lines) {
const link = parseLineWithLink(line)
const linkCreator = new LinkCreator(link)
linkCreators.push(linkCreator)
}
const collectionCreator = new CollectionCreator(linkCreators)
allCreators.push(collectionCreator)
}
async function parseWork ({ text, bee, author }) {
const metadata = createMetadata(author)
const [workType, workText] = extractWorkType(text)
const cleanedText = trimAndCleanText(workText)
const allCreators = []
const body = parseTitle(cleanedText, allCreators)
if (workType === WorkTypeEnum.COLLECTION) {
parseCollectionBody(body, allCreators)
} else {
const isPoem = workType === WorkTypeEnum.POETRY
parseWorkBody({ body, isPoem, allCreators })
}
const workCreator = new WorkCreator(allCreators)
const pubee = createWork({ bee, workCreator, metadata })
return pubee
}
module.exports = { parseWork }