UNPKG

biblatex-csl-converter

Version:

Bibliography format converter: BibLaTeX, BibTeX, CSL-JSON, RIS, ENW, EndNote XML, Citavi, DOCX citations, ODT citations — parse, convert, and export with round-trip fidelity

257 lines (232 loc) 8.74 kB
import type { EntryObject, GroupObject, NodeObject } from "../const" type StringStartTuplet = [string, () => void] type WarningObject = { type: string group_type: string } export class GroupParser { groups: GroupObject[] groupType: string warnings: WarningObject[] entries: EntryObject[] stringStarts: StringStartTuplet[] pos: number fileDirectory: string input: string constructor(entries: EntryObject[]) { this.groups = [] this.groupType = "jabref4" this.warnings = [] this.entries = entries this.pos = 0 this.fileDirectory = "" this.input = "" this.stringStarts = [ [ "jabref-meta: databaseType:bibtex;", () => { this.groupType = "jabref4" }, ], [ "jabref-meta: databaseType:biblatex;", () => { this.groupType = "jabref4" }, ], [ "jabref-meta: groupsversion:3;", () => { this.groupType = "jabref3" }, ], ["jabref-meta: grouping:", () => this.readGroupInfo("jabref4.1")], ["jabref-meta: groupstree:", () => this.readGroupInfo("")], //@retorquere: There seems to be a string missing ["jabref-meta: fileDirectory:", () => this.readFileDirectory()], ] } checkString(input: string): void { this.input = input //let searchPos = 0 this.pos = 0 this.stringStarts.find((stringStart) => { const pos = input.indexOf(stringStart[0], this.pos) if (pos < 0) { return false } else { this.pos = pos + stringStart[0].length stringStart[1]() return true } }) } readGroupInfo(groupType: string): void { if (groupType) this.groupType = groupType switch (this.groupType) { case "jabref3": this.readJabref3() break case "jabref4": case "jabref4.1": this.readJabref4() break default: break } } readFileDirectory(): void { let fileDirectory = "", input = this.input ? this.input : "", pos = this.pos while (input.length > pos && input[pos] !== ";") { fileDirectory += input[pos] pos++ } this.fileDirectory = fileDirectory this.pos = pos } readJabref3(): void { /* The JabRef Groups format is... interesting. To parse it, you must: 1. Unwrap the lines (just remove the newlines) 2. Split the lines on ';' (but not on '\;') 3. Each line is a group which is formatted as follows: <level> <type>:<name>\;<intersect>\;<citekey1>\;<citekey2>\;.... Each level can interact with the level it is nested under; either no interaction (intersect = 0), intersection (intersect = 1) or union (intersect = 2). There are several group types: root-level (all references are implicitly available on the root level), ExplicitGroup (the citation keys are listed in the group line) or query-type groups. I have only implemented explicit groups. */ // skip any whitespace after the identifying string */ while ( this.input.length > this.pos && "\r\n ".indexOf(this.input[this.pos]) >= 0 ) { this.pos++ } // simplify parsing by taking the whole comment, throw away newlines, replace the escaped separators with tabs, and // then split on the remaining non-escaped separators // I use \u2004 to protect \; and \u2005 to protect \\\; (the escaped version of ';') when splitting lines at ; let lines = this.input .substring(this.pos) .replace(/[\r\n]/g, "") .replace(/\\\\\\;/g, "\u2005") .replace(/\\;/g, "\u2004") .split(";") lines = lines.map((line) => line.replace(/\u2005/g, ";")) const levels: { [key: number]: GroupObject } = { "0": { name: "", references: [], groups: [] } } for (const line of lines) { if (line === "") { continue } const match = line.match(/^([0-9])\s+([^:]+):(.*)/) if (!match) { return } const level = parseInt(match[1], 10) const type = match[2] const referenceMatch = match[3] const references = referenceMatch ? referenceMatch.split("\u2004").filter((key) => key) : [] const name = references.shift()! const intersection = references.shift() // 0 = independent, 1 = intersection, 2 = union // ignore root level, has no refs anyway in the comment if (level === 0) { continue } // remember this group as the current `level` level, so that any following `level + 1` levels can find it levels[level] = { name, groups: [], references } // and add it to its parent levels[level - 1].groups.push(levels[level]) // treat all groups as explicit if (type !== "ExplicitGroup") { this.warnings.push({ type: "unsupported_jabref_group", group_type: type, }) } switch (intersection) { case "0": // do nothing more break case "1": // intersect with parent. Hardly ever used. levels[level].references = levels[level].references.filter( (key) => levels[level - 1].references.includes(key), ) break case "2": // union with parent levels[level].references = [ ...new Set([ ...levels[level].references, ...levels[level - 1].references, ]), ] break } } this.groups = levels["0" as unknown as number].groups } clearGroups(groups: GroupObject[]): void { for (const group of groups) { group.references = [] this.clearGroups(group.groups || []) } } readJabref4(): void { this.readJabref3() if (this.groupType === "jabref4.1") { this.clearGroups(this.groups) } // this assumes the JabRef groups always come after the references this.entries.forEach((bib) => { if (!bib.unknown_fields?.groups || !bib.entry_key) { return } // this assumes ref.unknown_fields.groups is a single text chunk const groups = bib.unknown_fields.groups .reduce((string: string, node: NodeObject) => { if ("text" in node) { const text: string = node.text, // undo undescores to marks -- groups content is in verbatim-ish mode sub = (node.marks || []).find( (mark) => mark.type === "sub", ) ? "_" : "" string += sub + text } return string }, "") .trim() if (bib.unknown_fields) { delete bib.unknown_fields.groups } if (!groups.length) { return } groups.split(/\s*,\s*/).forEach((groupName) => { const group = this.find(groupName) if (group) { group.references.push(bib.entry_key) } }) }) } find(name: string, groups: GroupObject[] | undefined): GroupObject | false { groups = groups || this.groups if (!groups) { return false } for (let i = 0; i < groups.length; i++) { if (groups[i].name === name) return groups[i] const group = this.find(name, groups[i].groups) if (group) return group } return false } }