UNPKG

biblatex-csl-converter

Version:

a set of converters: biblatex => json, CSL => json, json => biblatex, json => CSL

1,330 lines (1,244 loc) 43.4 kB
import { BibFieldTypes, BibTypes, GroupObject, NodeArray, EntryObject, NameDictObject, RangeArray, LangidOptions, } from "../const" import { TeXSpecialChars, BiblatexAliasTypes, BiblatexFieldAliasTypes, BiblatexAliasOptions, DefaultCrossRefInheritance, TypeInheritance, } from "./const" import { BibLatexNameParser } from "./name-parser" import { BibLatexLiteralParser } from "./literal-parser" import { GroupParser } from "./group-parser" import { splitTeXString } from "./tools" import { edtfParse } from "../edtf-parser" /** Parses files in BibTeX/BibLaTeX format */ /* Based on original work by Henrik Muehe (c) 2010, * licensed under the MIT license, * https://code.google.com/archive/p/bibtex-js/ */ export interface ConfigObject { /** * - processUnknown (object [specifying content type for specific unknown]): * * Processes fields with names that are unknown, adding them to an `unknown_fields` * object to each entry. * * example: * > a = new BibLatexParser(..., {processUnknown: true}) * > a.output * { * "0:": { * ... * unknown_fields: { * ... * } * } * } * * > a = new BibLatexParser(..., {processUnknown: {commentator: 'l_name'}}) * > a.output * { * "0:": { * ... * unknown_fields: { * commentator: [ * { * given: ..., * family: ... * } * ] * ... * } * } * } */ processUnknown?: boolean | Record<string, string> /** * Processes fields with names that are known, but are not expected for the given bibtype, * adding them to an `unexpected_fields` object to each entry. */ processUnexpected?: boolean processInvalidURIs?: boolean processComments?: boolean /** * Include source location to an `location` object on each entry * * example: * > a = new BibLatexParser(..., {includeLocation: true}) * > a.output * { * "0:": { * ... * location: { * start: 1, * end: 42 * } * } * } */ includeLocation?: boolean /** * Include source text to an `raw_text` property on each entry * * example: * > a = new BibLatexParser(..., {includeRawText: true}) * > a.output * { * "0:": { * ... * raw_text: '@article{...}' * } * } */ includeRawText?: boolean crossRefInheritance?: TypeInheritance[] includeUnusedNocase?: boolean } interface ErrorObject { type: string expected?: string found?: string line?: number key?: string entry?: string field?: string field_name?: string alias_of?: string alias_of_value?: unknown value?: string[] | string variable?: string type_name?: string } interface MatchOptionsObject { skipWhitespace: string | boolean } export interface BiblatexParseResult { entries: { [key: number]: EntryObject } errors: ErrorObject[] warnings: ErrorObject[] comments: string[] strings: Record<string, string> jabref: { groups: GroupObject[] | false meta: Record<string, string> } } type Month = | "JAN" | "FEB" | "MAR" | "APR" | "MAY" | "JUN" | "JUL" | "AUG" | "SEP" | "OCT" | "NOV" | "DEC" const hasbackslash = /\\/ export interface BibDB { [key: number]: EntryObject } export class BibLatexParser { input: string config: ConfigObject pos: number startPosition = -1 endPosition = -1 entries: EntryObject[] currentKey: string | false currentEntry?: EntryObject currentType: string currentRawFields?: Record<string, unknown> bibDB: BibDB errors: ErrorObject[] warnings: ErrorObject[] months: { JAN: string FEB: string MAR: string APR: string MAY: string JUN: string JUL: string AUG: string SEP: string OCT: string NOV: string DEC: string } strings: Record<string, string> comments: string[] groupParser: GroupParser groups: GroupObject[] | false jabrefMeta: Record<string, string> jabref?: { groups: GroupObject[] | false meta: number } crossrefs: Record<string, string> constructor(input: string, config: ConfigObject = {}) { this.input = input this.config = config this.pos = 0 this.entries = [] this.bibDB = {} this.currentKey = false this.currentType = "" this.errors = [] this.warnings = [] this.comments = [] this.strings = {} // These variables are expected to be defined by some bibtex sources. this.months = { JAN: "01", FEB: "02", MAR: "03", APR: "04", MAY: "05", JUN: "06", JUL: "07", AUG: "08", SEP: "09", OCT: "10", NOV: "11", DEC: "12", } this.groupParser = new GroupParser(this.entries) this.groups = false this.jabrefMeta = {} this.crossrefs = {} } isWhitespace(s: string): boolean { return s == " " || s == "\r" || s == "\t" || s == "\n" } error(data: ErrorObject): void { this.errors.push( Object.assign({}, data, { line: this.input.slice(0, this.pos).split("\n").length, }) ) } warning(data: ErrorObject): void { this.warnings.push( Object.assign({}, data, { line: this.input.slice(0, this.pos).split("\n").length, }) ) } match( s: string, options: MatchOptionsObject = { skipWhitespace: true } ): void { if ( options.skipWhitespace === true || options.skipWhitespace === "leading" ) { this.skipWhitespace() } if (this.input.substring(this.pos, this.pos + s.length) == s) { this.pos += s.length } else { this.error({ type: "token_mismatch", expected: s, found: this.input.substring(this.pos, this.pos + s.length), }) } if ( options.skipWhitespace === true || options.skipWhitespace === "trailing" ) { this.skipWhitespace() } } tryMatch(s: string): boolean { this.skipWhitespace() if (this.input.substring(this.pos, this.pos + s.length) == s) { return true } else { return false } } skipWhitespace(): void { while (this.isWhitespace(this.input[this.pos])) { this.pos++ } if (this.input[this.pos] == "%") { while (this.input[this.pos] != "\n") { this.pos++ } this.skipWhitespace() } } skipToNext(): boolean { while (this.input.length > this.pos && this.input[this.pos] != "@") { this.pos++ } if (this.input.length == this.pos) { return false } else { return true } } valueBraces(): string { let bracecount = 0 this.match("{", { skipWhitespace: "leading" }) let string = "" while (this.pos < this.input.length) { switch (this.input[this.pos]) { case "\\": string += this.input.substring(this.pos, this.pos + 2) this.pos++ break case "}": if (bracecount === 0) { this.match("}") return string } string += "}" bracecount-- break case "{": string += "{" bracecount++ break default: string += this.input[this.pos] break } this.pos++ } this.errors.push({ type: "unexpected_eof" }) return string } valueQuotes(): string { this.match('"', { skipWhitespace: "leading" }) let string = "" while (this.pos < this.input.length) { switch (this.input[this.pos]) { case "\\": string += this.input.substring(this.pos, this.pos + 2) this.pos++ break case '"': this.match('"') return string default: string += this.input[this.pos] break } this.pos++ } this.errors.push({ type: "unexpected_eof" }) return string } singleValue(): string { if (this.tryMatch("{")) { return this.valueBraces() } else if (this.tryMatch('"')) { return this.valueQuotes() } else { let k = this.key() const kUp = k.toUpperCase() if (this.strings[k.toUpperCase()]) { return this.strings[k.toUpperCase()] } else if (kUp in this.months) { return this.months[kUp as Month] } else if (k.match("^[0-9]+$")) { return k } else { const warning: ErrorObject = { type: "undefined_variable", variable: k, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } if (this.currentKey) { warning.key = this.currentKey } this.warning(warning) // Using \u0870 as a delimiter for variables as they cannot be // used in regular latex code. return `\u0870${k}\u0870` } } } value(asis = false): string { let values: string[] = [] values.push(this.singleValue()) while (this.tryMatch("#")) { this.match("#") values.push(this.singleValue()) } let joined = values.join("") if (!asis) joined = joined.replace(/[\t ]+/g, " ").trim() return joined } key(optional = false): string { let start = this.pos while (true) { if (this.pos == this.input.length) { this.error({ type: "runaway_key" }) break } if ( ["(", ")", ",", "{", "}", " ", "=", "\t", "\n"].includes( this.input[this.pos] ) ) { let key = this.input.substring(start, this.pos) if (optional && this.input[this.pos] != ",") { this.skipWhitespace() if (this.input[this.pos] != ",") { this.pos = start return "" } } return key } else { this.pos++ } } return "" } keyEqualsValue(asis = false): [string, string] | false { let key = this.key() if (!key.length) { const error: ErrorObject = { type: "cut_off_citation", } if (this.currentEntry) { error.entry = this.currentEntry["entry_key"] // The citation is not full, we remove the existing parts. this.currentEntry["incomplete"] = true } this.error(error) return false } this.currentKey = key.toLowerCase() if (this.tryMatch("=")) { this.match("=") const val = this.value(asis) if (this.currentKey) { return [this.currentKey, val] } else { return false } } else { const error: ErrorObject = { type: "missing_equal_sign", } if (this.currentEntry) { error.entry = this.currentEntry["entry_key"] } if (this.currentKey) { error.key = this.currentKey } this.error(error) } return false } keyValueList(): void { let kv = this.keyEqualsValue() if (!kv || !this.currentRawFields) { // Entry has no fields, so we delete it. // It was the last one pushed, so we remove the last one this.entries.pop() return } let rawFields = this.currentRawFields rawFields[kv[0]] = kv[1] while (this.tryMatch(",")) { this.match(",") //fixes problems with commas at the end of a list if (this.tryMatch("}") || this.tryMatch(")")) { break } kv = this.keyEqualsValue() if (!kv) { const error: ErrorObject = { type: "key_value_error", } if (this.currentEntry) { error.entry = this.currentEntry["entry_key"] } this.error(error) break } rawFields[kv[0]] = kv[1] } } processFields(): void { if (!this.currentEntry) { return } let rawFields = this.currentRawFields! let fields = this.currentEntry["fields"] if ("crossref" in rawFields) { this.crossrefs[this.currentEntry.entry_key] = rawFields.crossref as string delete rawFields.crossref } let date: string | undefined if (rawFields.date) { // date string has precedence date = rawFields.date as string } else if (rawFields.year) { // Extract just the year if month is invalid if (rawFields.month) { let month = rawFields.month as string if ( isNaN(parseInt(month)) && month.toUpperCase() in this.months ) { month = this.months[month.toUpperCase() as Month] } month = month.replace(/~|–|—|\./g, "-") // Validate month format: MM or MM-DD // MM: 01-12 // DD: 01-31 (simplified, not checking specific months) if ( /^(0?[1-9]|1[0-2])(?:-(0?[1-9]|[12]\d|3[01]))?$/.test(month) ) { date = `${rawFields.year}-${month}` } else { // Use just the year if month is invalid date = `${rawFields.year}` // Add warning about invalid month this.warnings.push({ type: "invalid_month", field_name: "month", value: String(rawFields.month), entry: this.currentEntry.entry_key, }) } } else { date = `${rawFields.year}` } } if (date) { let dateObj = edtfParse(date) if (dateObj.valid) { fields["date"] = dateObj.cleanedString delete rawFields.year delete rawFields.month } else if (rawFields.date) { const error: ErrorObject = { type: "invalid_date", field_name: "date", value: rawFields.date as string, entry: this.currentEntry.entry_key, } if (this.currentEntry) { error.entry = this.currentEntry["entry_key"] } this.errors.push(error) } else if (rawFields.year) { // Always try to use year even if month was invalid const yearObj = edtfParse(rawFields.year as string) if (yearObj.valid) { fields["date"] = yearObj.cleanedString delete rawFields.year // Add warning about invalid month const warning: ErrorObject = { type: "unknown_date", field_name: "month", value: String(rawFields.month), entry: this.currentEntry.entry_key, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warnings.push(warning) } else { // Try to find a valid year in the string const yearMatches = Array.from( String(rawFields.year).matchAll(/\[?(\d{4})\]?/g) ) // Handle non-bracketed dates // If there are two years, take the non-bracketed one // If there is only one year, return it let mainYearMatch = yearMatches.find( (yearMatch) => !/[[\]]/.test(yearMatch[0]) ) if (mainYearMatch) { // Handle bracketed dates (original publication dates) const bracketedYearMatch = yearMatches.find( (yearMatch) => /[[\]]/.test(yearMatch[0]) ) if (bracketedYearMatch) { fields["origdate"] = bracketedYearMatch[1] } } else if (yearMatches.length) { mainYearMatch = yearMatches[0] } if (mainYearMatch) { fields["date"] = mainYearMatch[1] delete rawFields.year } else { // Add warning about invalid year const warning: ErrorObject = { type: "unknown_date", field_name: "year", value: String(rawFields.year), entry: this.currentEntry.entry_key, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warnings.push(warning) } } } } // Check for English language. If the citation is in English language, // titles may use case preservation. let langEnglish = true // By default we assume everything to be written in English. if (rawFields.langid && (rawFields.langid as string).length) { let langString = (rawFields.langid as string).toLowerCase().trim() let englishOptions = [ "english", "american", "british", "usenglish", "ukenglish", "canadian", "australian", "newzealand", ] if ( !englishOptions.some((option) => { return langString === option }) ) { langEnglish = false } } else if (rawFields.language) { // langid and language. The two mean different things, see discussion https://forums.zotero.org/discussion/33960/biblatex-import-export-csl-language-biblatex-langid // but in bibtex, language is often used for what is essentially langid. // If there is no langid, but a language, and the language happens to be // a known langid, set the langid to be equal to the language. let langid = this._reformKey(rawFields.language as string, "langid") if (langid.length) { fields["langid"] = langid if ( typeof langid === "string" && ![ "usenglish", "ukenglish", "caenglish", "auenglish", "nzenglish", ].includes(langid) ) { langEnglish = false } } } iterateFields: for (let bKey in rawFields) { if ( bKey === "date" || (["year", "month"].includes(bKey) && !this.config.processUnknown) ) { // Handled above continue iterateFields } // Replace alias fields with their main term. let aliasKey: string | undefined if (bKey in BiblatexFieldAliasTypes) { aliasKey = BiblatexFieldAliasTypes[ bKey as keyof typeof BiblatexFieldAliasTypes ] } let fKey = "" if (aliasKey) { if (rawFields[aliasKey]) { const warning: ErrorObject = { type: "alias_creates_duplicate_field", field: bKey, alias_of: aliasKey, value: rawFields[bKey] as string | string[] | undefined, alias_of_value: rawFields[aliasKey], } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warning(warning) continue iterateFields } fKey = Object.keys(BibFieldTypes).find((ft) => { return ( BibFieldTypes[ft as keyof typeof BibFieldTypes] .biblatex === aliasKey ) }) || "" } else { fKey = Object.keys(BibFieldTypes).find((ft) => { return ( BibFieldTypes[ft as keyof typeof BibFieldTypes] .biblatex === bKey ) }) || "" } let oFields: Record<string, unknown>, fType: string let bType = BibTypes[this.currentEntry["bib_type"] as keyof typeof BibTypes] if (!fKey.length) { const warning: ErrorObject = { type: "unknown_field", field_name: bKey, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warning(warning) if (!this.config.processUnknown) { continue iterateFields } if (this.currentEntry && !this.currentEntry["unknown_fields"]) { this.currentEntry["unknown_fields"] = {} } oFields = this.currentEntry && this.currentEntry["unknown_fields"] ? this.currentEntry["unknown_fields"] : {} fType = this.config.processUnknown && typeof this.config.processUnknown === "object" && this.config.processUnknown[bKey] ? this.config.processUnknown[bKey] : "f_literal" fKey = bKey } else if ( bType["required"].includes(fKey) || bType["optional"].includes(fKey) || bType["eitheror"].includes(fKey) ) { oFields = fields fType = BibFieldTypes[fKey as keyof typeof BibFieldTypes]["type"] } else if (fKey === "entrysubtype" && bType["biblatex-subtype"]) { fType = BibFieldTypes[fKey]["type"] oFields = {} continue iterateFields } else { const warning: ErrorObject = { type: "unexpected_field", field_name: bKey, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warning(warning) if (!this.config.processUnexpected) { continue iterateFields } if ( this.currentEntry && !this.currentEntry["unexpected_fields"] ) { this.currentEntry["unexpected_fields"] = {} } oFields = this.currentEntry && this.currentEntry["unexpected_fields"] ? this.currentEntry["unexpected_fields"] : {} fType = BibFieldTypes[fKey as keyof typeof BibFieldTypes]["type"] } let fValue = rawFields[bKey], reformedValue switch (fType) { case "f_date": reformedValue = edtfParse(fValue as string) if (reformedValue.valid) { oFields[fKey] = reformedValue.cleanedString } else if (this.currentEntry) { this.error({ type: "unknown_date", entry: this.currentEntry["entry_key"], field_name: fKey, value: fValue as string | string[] | undefined, }) } break case "f_integer": oFields[fKey] = this._reformLiteral(fValue as string) break case "f_key": reformedValue = this._reformKey(fValue as string, fKey) if (reformedValue.length) { oFields[fKey] = reformedValue } break case "f_literal": case "f_long_literal": oFields[fKey] = this._reformLiteral(fValue as string) break case "l_range": oFields[fKey] = this._reformRange(fValue as string) break case "f_title": oFields[fKey] = this._reformLiteral( fValue as string, langEnglish ) break case "f_uri": if ( this.config.processInvalidURIs || this._checkURI(fValue as string) ) { oFields[fKey] = this._reformURI(fValue as string) } else { const error: ErrorObject = { type: "unknown_uri", field_name: fKey, value: fValue as string | string[] | undefined, } if (this.currentEntry) { error.entry = this.currentEntry["entry_key"] } this.error(error) } break case "f_verbatim": oFields[fKey] = fValue break case "l_key": oFields[fKey] = splitTeXString(fValue as string).map( (keyField) => this._reformKey(keyField, fKey) ) break case "l_tag": oFields[fKey] = (fValue as string) .split(/[,;]/) .map((string) => string.trim()) break case "l_literal": oFields[fKey] = splitTeXString(fValue as string).map( (item) => this._reformLiteral(item.trim()) ) break case "l_name": oFields[fKey] = this._reformNameList(fValue as string) break default: // Something must be wrong in the code. console.warn(`Unrecognized type: ${fType}!`) } } } _reformKey(keyString: string, fKey: string): string | NodeArray { let keyValue = keyString.trim().toLowerCase() let fieldType = BibFieldTypes[fKey as keyof typeof BibFieldTypes] if ( BiblatexAliasOptions[fKey as keyof typeof BiblatexAliasOptions] && (BiblatexAliasOptions as Record<string, Record<string, string>>)[ fKey ][keyValue] ) { keyValue = ( BiblatexAliasOptions as Record<string, Record<string, string>> )[fKey][keyValue] } if ("options" in fieldType) { if (Array.isArray(fieldType["options"])) { if (fieldType["options"].includes(keyValue)) { return keyValue } } else { let optionValue = Object.keys(fieldType["options"]!).find( (key) => { return ( (fieldType.options as LangidOptions)[key][ "biblatex" ] === keyValue ) } ) if (optionValue) { return optionValue } else { return "" } } } if ("strict" in fieldType && fieldType.strict) { const warning: ErrorObject = { type: "unknown_key", field_name: fKey, value: keyString, } if (this.currentEntry) { warning.entry = this.currentEntry["entry_key"] } this.warning(warning) return "" } return this._reformLiteral(keyString) } _checkURI(uriString: string): boolean { /* Copyright (c) 2010-2013 Diego Perini, MIT licensed https://gist.github.com/dperini/729294 */ return /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})).?)(?::\d{2,5})?(?:[/?#]\S*)?$/i.test( uriString ) } _reformURI(uriString: string): string { return uriString.replace(/\\/g, "") } _reformNameList(nameString: string): NameDictObject[] { const people = splitTeXString(nameString) const names = people.map((person) => { const nameParser = new BibLatexNameParser(person, this.config), name = nameParser.output if (name) { return name } else { return false } }) const result: NameDictObject[] = names.filter( (name: NameDictObject | false) => { return typeof name === "object" } ) as NameDictObject[] return result } _reformRange(rangeString: string): RangeArray[] { return rangeString.split(",").map((string) => { let parts = string.split("--") if (parts.length > 1) { return [ this._reformLiteral(parts.shift()!.trim()), this._reformLiteral(parts.join("--").trim()), ] } else { parts = string.split("-") if (parts.length > 1) { return [ this._reformLiteral(parts.shift()!.trim()), this._reformLiteral(parts.join("-").trim()), ] } else { return [this._reformLiteral(string.trim())] } } }) } _reformLiteral(theValue: string, cpMode = false): NodeArray { const parser = new BibLatexLiteralParser(theValue, this.config, cpMode) return parser.output } bibType(): string { let biblatexType = this.currentType let biblatexSubtype = this.currentRawFields?.entrysubtype || false if (biblatexType in BiblatexAliasTypes) { const aliasType: string[] = ( BiblatexAliasTypes as Record<string, string[]> )[biblatexType] biblatexType = aliasType[0] if (aliasType.length > 1) { biblatexSubtype = aliasType[1] } } let bibType if ( biblatexType in BibTypes && (!biblatexSubtype || BibTypes[biblatexType]["biblatex-subtype"] === biblatexSubtype) ) { bibType = biblatexType } else { bibType = Object.keys(BibTypes).find((bType) => { return ( BibTypes[bType]["biblatex"] === biblatexType && (!biblatexSubtype || BibTypes[bType]["biblatex-subtype"] === biblatexSubtype) ) }) } if (typeof bibType === "undefined") { this.warning({ type: "unknown_type", type_name: biblatexType, }) bibType = "misc" } return bibType } createNewEntry(): void { const currentEntry: EntryObject = { bib_type: "", entry_key: this.key(true), fields: {}, } this.currentRawFields = {} this.entries.push(currentEntry) if (currentEntry && currentEntry["entry_key"].length) { this.match(",") } this.keyValueList() this.endPosition = this.pos currentEntry["bib_type"] = this.bibType() if (this.config.includeLocation) { currentEntry["location"] = { start: this.startPosition, end: this.endPosition, } } if (this.config.includeRawText) { currentEntry["raw_text"] = this.input.substring( this.startPosition, this.endPosition + 1 ) } this.currentEntry = currentEntry this.processFields() } directive(): string | null { this.match("@") this.currentType = this.key() if (!this.currentType.length) return null this.currentType = this.currentType.toLowerCase() return "@" + this.currentType } string(): void { const kv = this.keyEqualsValue(true) if (kv) { this.strings[kv[0].toUpperCase()] = kv[1] } } preamble(): void { this.value() } replaceTeXChars(): void { let value = this.input let len = TeXSpecialChars.length for (let i = 0; i < len; i++) { if (!hasbackslash.test(value)) break let texChar = TeXSpecialChars[i] value = value.replace(texChar.tex, texChar.unicode) } // Delete multiple spaces this.input = value.replace(/ +(?= )/g, "") return } stepThroughBibtex(): void { while (this.skipToNext()) { this.parseNext() } } stepThroughBibtexAsync(): Promise<null> { return this.skipToNext() ? new Promise((resolve) => resolve(this.parseNext())).then(() => this.stepThroughBibtexAsync() ) : Promise.resolve(null) } parseNext(): void { let closer this.startPosition = this.pos let d = this.directive() if (!d) return if (this.tryMatch("{")) { this.match("{") closer = "}" } else if (this.tryMatch("(")) { // apparently, references can also be surrended with round braces this.match("(") closer = ")" } else if (d === "@comment") { // braceless comments are a thing it appears closer = null } else { this.match("{") closer = "}" } if (d == "@string") { this.string() } else if (d == "@preamble") { this.preamble() } else if (d == "@comment") { this.parseComment(!closer) } else { this.createNewEntry() } if (closer) this.match(closer) } parseComment(braceless: boolean): void { let start = this.pos let braces = 1 if (braceless) { while ( this.input.length > this.pos && this.input[this.pos] != "\n" ) { this.pos++ } } else { while (this.input.length > this.pos && braces > 0) { switch (this.input[this.pos]) { case "{": braces += 1 break case "}": braces -= 1 } this.pos++ } } // no ending brace found if (braceless || braces !== 0) { return } // leave the ending brace for the main parser to pick up this.pos-- let comment = this.input.substring(start, this.pos) this.groupParser.checkString(comment) if (this.groupParser.groups.length) { this.groups = this.groupParser.groups } else { comment = comment.trim() const m = comment.match(/^jabref-meta: ([a-zA-Z]+):(.*);$/) if (m && m[1] !== "groupsversion") { this.jabrefMeta[m[1]] = m[2].replace(/\\(.)/g, "$1") } else if (comment && this.config.processComments) { this.comments.push(comment) } } } createBibDB(): void { this.entries.forEach((entry, index) => { // Start index from 1 to create less issues with testing this.bibDB[index + 1] = entry }) } cleanDB(): void { this.bibDB = JSON.parse( JSON.stringify(this.bibDB) .replace(/\u0871/, "\\\\") // Backslashes placed outside of literal fields .replace(/\u0870/, "") // variable start/end outside of literal fields ) } _resolveCrossRef(key: string, parentKey: string): void { const entry = this.entries.find((e) => e.entry_key === key)! const parent = this.entries.find((e) => e.entry_key === parentKey)! const { fields: entryFields, bib_type } = entry const { fields: parentFields, bib_type: parentType } = parent const inhertitedFields: Record<string, unknown> = {} const inhertance = this.config.crossRefInheritance ?? DefaultCrossRefInheritance for (const ti of inhertance) { if ( ti.source.includes(parentType) && ti.target.includes(bib_type) ) { for (const fi of ti.fields) { const field = fi.target const bt = BibTypes[bib_type] if ( bt.required.includes(field) || bt.optional.includes(field) || bt.eitheror.includes(field) ) { inhertitedFields[field] = parentFields[fi.source] } } } } const fields = { ...parentFields, ...inhertitedFields, ...entryFields, } entry.fields = fields } _resoveAllCrossRefs(): void { const toResolve = new Set<string>(Object.keys(this.crossrefs)) while (toResolve.size > 0) { const queue = new Set<string>( [...toResolve.values()].filter( (k) => !toResolve.has(this.crossrefs[k]) ) ) if (queue.size === 0) { const entry = toResolve.values().next().value // TODO: More precise error this.errors.push({ type: "circular_crossref", entry }) return } const key = queue.values().next().value as string const parent = this.crossrefs[key] if (!this.entries.some((e) => e.entry_key === parent)) { this.errors.push({ type: "unknown_crossref", entry: key, value: parent, }) return } this._resolveCrossRef(key, parent) queue.delete(key) toResolve.delete(key) } } parsed(): BiblatexParseResult { this.createBibDB() this._resoveAllCrossRefs() this.cleanDB() return { entries: this.bibDB, errors: this.errors, warnings: this.warnings, comments: this.comments, strings: this.strings, jabref: { groups: this.groups, meta: this.jabrefMeta, }, } } parse(): BiblatexParseResult { this.replaceTeXChars() this.stepThroughBibtex() return this.parsed() } async parseAsync(): Promise<BiblatexParseResult> { this.replaceTeXChars() await this.stepThroughBibtexAsync() return this.parsed() } } export function parse( input: string, config: ConfigObject = {} ): BiblatexParseResult { return new BibLatexParser(input, config).parse() } export function parseAsync( input: string, config: ConfigObject = {} ): Promise<BiblatexParseResult> { return new BibLatexParser(input, config).parseAsync() }