biblatex-csl-converter

import { BibTypes, BibFieldTypes, NodeArray, NameDictObject, NodeObject, } from "../../const" import { edtfParse } from "../../edtf-parser" import { BibDB } from "../../import/biblatex" import { toSentenceCase } from "./sentence-caser" /** Converts a BibDB to a DB of the CSL type. * @param bibDB The bibliography database to convert. */ export interface TagEntry { open: string close: string } export type Tags = Record<string, TagEntry> const TAGS: Tags = { strong: { open: "", close: "" }, em: { open: "", close: "" }, sub: { open: "", close: "" }, sup: { open: "", close: "" }, smallcaps: { open: '', close: "", }, nocase: { open: '', close: "" }, enquote: { open: "“", close: "”" }, url: { open: "", close: "" }, undefined: { open: "[", close: "]" }, } type ConfigObject = { escapeText?: boolean useEntryKeys?: boolean // Whether to output using the entry keys language?: string // Language of the citation } type ErrorObject = { type: string variable: string } type CSLDateObject = { "date-parts"?: [number[]] | [number[], number[]] circa?: boolean } type CSLNameObject = { literal?: string given?: string family?: string suffix?: string "non-dropping-particle"?: string "dropping-particle"?: string } export interface CSLEntry { id?: string [key: string]: unknown } export type CSLOutput = Record<string, CSLEntry> export class CSLExporter { bibDB: BibDB pks: string[] config: ConfigObject cslDB: Record<string, CSLEntry> errors: ErrorObject[] constructor( bibDB: BibDB, pks: string[] | false = false, config: ConfigObject = {} ) { this.bibDB = bibDB if (pks) { this.pks = pks // A list of pk values of the bibliography items to be exported. } else { this.pks = Object.keys(bibDB) // If none are selected, all keys are exported. } this.config = config this.cslDB = {} this.errors = [] } parse(): CSLOutput { for (let bibId in this.bibDB) { if (this.pks.indexOf(bibId) !== -1) { const id = this.config.useEntryKeys ? this.bibDB[bibId].entry_key || bibId : bibId this.cslDB[id] = this.getCSLEntry(bibId) this.cslDB[id].id = id } } return this.cslDB } /** Converts one BibDB entry to CSL format. * @function getCSLEntry * @param id The id identifying the bibliography entry. */ getCSLEntry(id: string): CSLEntry { let bib = this.bibDB[id as unknown as number], fValues: CSLEntry = {} if (!bib.fields || !bib.bib_type || !BibTypes[bib.bib_type]) { return fValues } // If there's a language field, use it for deciding whether to apply sentence casing if (bib.fields.langid) { this.config.language = bib.fields.langid as string } for (let fKey in bib.fields) { if ( bib.fields[fKey] !== "" && fKey in BibFieldTypes && "csl" in BibFieldTypes[fKey] ) { let fValue = bib.fields[fKey] let fType = BibFieldTypes[fKey]["type"] let key: string const csl = BibFieldTypes[fKey].csl! if (typeof csl === "string") { key = csl } else if (csl[bib.bib_type]) { key = csl[bib.bib_type] } else { key = csl["*"] } let reformedValue switch (fType) { case "f_date": reformedValue = this._reformDate(fValue as string) if (reformedValue) { fValues[key] = reformedValue } break case "f_integer": fValues[key] = this._reformInteger(fValue) break case "f_key": fValues[key] = this._reformKey(fValue, fKey) break case "f_literal": case "f_long_literal": fValues[key] = this._reformText(fValue) break case "l_range": fValues[key] = this._reformRange(fValue) break case "f_title": fValues[key] = this._reformTitle(fValue) break case "f_uri": case "f_verbatim": fValues[key] = fValue break case "l_key": fValues[key] = (fValue as (string | NodeArray)[]) .map((key: string | NodeArray) => { return this._reformKey(key, fKey) }) .join(" and ") break case "l_literal": fValues[key] = (fValue as NodeArray[]) .map((text: NodeArray) => this._reformText(text)) .join(", ") break case "l_name": fValues[key] = this._reformName( fValue as NameDictObject[] ) break case "l_tag": fValues[key] = (fValue as string[]).join(", ") break default: console.warn(`Unrecognized field type: ${fType}!`) } } } fValues["type"] = BibTypes[bib.bib_type].csl return fValues } _reformKey(theValue: string | unknown, fKey: string): string { if (typeof theValue === "string") { let fieldType = BibFieldTypes[fKey]! if (Array.isArray(fieldType["options"])) { return theValue } else { return fieldType["options"]![theValue]["csl"] } } else { return this._reformText(theValue) } } _reformRange(theValue: unknown): string { if (!Array.isArray(theValue)) { console.warn(`Wrong format for range`, theValue) return "" } return theValue .map((interval) => this._reformInterval(interval)) .filter((interval) => interval.length) .join(",") } _reformInterval(theValue: unknown): string { if (!Array.isArray(theValue)) { console.warn(`Wrong format for interval`, theValue) return "" } return theValue.map((text) => this._reformText(text)).join("-") } _reformInteger(theValue: unknown): string | number { let theString = this._reformText(theValue) let theInt = parseInt(theString) if (theString !== String(theInt)) { return theString } return theInt } _escapeText(theValue: unknown): string { if (!(typeof theValue === "string")) { console.warn(`Wrong format for escapeText`, theValue) return "" } return theValue .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/'/g, "'") .replace(/"/g, """) } _reformTitle(theValue: unknown): string { let html = "", lastMarks: string[] = [] if (!Array.isArray(theValue)) { console.warn(`Wrong format for reformTitle`, theValue) return html } // Get the plain text to potentially apply sentence casing const plainText = theValue.reduce((text, node) => { if ("text" in node) { return text + node.text } else { return text } }, "") // Check if the language is English or not specified // Check against all English language variants based on langidOptions values // Only apply sentence casing for English (default) language const isEnglishLanguage = !this.config.language || this.config.language.toLowerCase().endsWith("english") // Only apply sentence casing for English or unspecified language let sentenceCasedText: string = isEnglishLanguage ? toSentenceCase(plainText) : plainText theValue.forEach((node: NodeObject) => { if (node.type === "variable") { // This is an undefined variable // This should usually not happen, as CSL doesn't know what to // do with these. We'll put them into an unsupported tag. html += `${TAGS.undefined.open}${node.attrs!.variable}${ TAGS.undefined.close }` this.errors.push({ type: "undefined_variable", variable: node.attrs!.variable as string, }) return } let newMarks = node.marks ? node.marks.map((mark) => mark.type) : [] // close all tags that are not present in current text node. let closing = false, closeTags: string[] = [] lastMarks.forEach((mark, index) => { if (mark != newMarks[index]) { closing = true } if (closing) { closeTags.push(TAGS[mark].close) } }) // Add close tags in reverse order to close innermost tags // first. closeTags.reverse() html += closeTags.join("") // open all new tags that were not present in the last text node. let opening = false newMarks.forEach((mark, index) => { if (mark != lastMarks[index]) { opening = true } if (opening) { html += TAGS[mark].open } }) if ("text" in node) { const sentencesCasedNodeText = sentenceCasedText.substr( 0, node.text.length ) sentenceCasedText = sentenceCasedText.slice(node.text.length) const insertedText = newMarks.find((mark) => mark === "nocase") ? node.text : sentencesCasedNodeText html += this.config.escapeText ? this._escapeText(insertedText) : insertedText } lastMarks = newMarks }) // Close all still open tags lastMarks .slice() .reverse() .forEach((mark) => { html += TAGS[mark].close }) return html } _reformText(theValue: unknown): string { let html = "", lastMarks: string[] = [] if (!Array.isArray(theValue)) { console.warn(`Wrong format for reformText`, theValue) return html } theValue.forEach((node: NodeObject) => { if (node.type === "variable") { // This is an undefined variable // This should usually not happen, as CSL doesn't know what to // do with these. We'll put them into an unsupported tag. html += `${TAGS.undefined.open}${node.attrs!.variable}${ TAGS.undefined.close }` this.errors.push({ type: "undefined_variable", variable: node.attrs!.variable as string, }) return } let newMarks = node.marks ? node.marks.map((mark) => mark.type) : [] // close all tags that are not present in current text node. let closing = false, closeTags: string[] = [] lastMarks.forEach((mark, index) => { if (mark != newMarks[index]) { closing = true } if (closing) { closeTags.push(TAGS[mark].close) } }) // Add close tags in reverse order to close innermost tags // first. closeTags.reverse() html += closeTags.join("") // open all new tags that were not present in the last text node. let opening = false newMarks.forEach((mark, index) => { if (mark != lastMarks[index]) { opening = true } if (opening) { html += TAGS[mark].open } }) if ("text" in node) { html += this.config.escapeText ? this._escapeText(node.text) : node.text } lastMarks = newMarks }) // Close all still open tags lastMarks .slice() .reverse() .forEach((mark) => { html += TAGS[mark].close }) return html } _reformDate(dateStr: string): false | CSLDateObject { let dateObj = edtfParse(dateStr) const reformedDate: CSLDateObject = {} if (!dateObj.valid) { return false } else if ( dateObj.values.length > 1 && Array.isArray(dateObj.values[0]) && Array.isArray(dateObj.values[1]) ) { if ( dateObj.values[0][0] === 0 && !String(dateObj.values[1]).replace(/9/g, "").length ) { // both from and to year are completely unknown. return false } const intervalFrom: Array<number | string> = dateObj.values[0], intervalTo: Array<number | string> = dateObj.values[1] const intervalDateParts: [number[], number[]] = [ intervalFrom .slice(0, 3) .map((value) => parseInt(value as string)), intervalTo .slice(0, 3) .map((value) => parseInt(value as string)), ] reformedDate["date-parts"] = intervalDateParts } else { const values: number[] = dateObj.values .slice(0, 3) .map((value) => parseInt(value as string)) reformedDate["date-parts"] = [values] if (dateObj.type === "Interval") { // Open interval that we cannot represent, so we make it circa instead. reformedDate["circa"] = true } } if (dateObj.uncertain || dateObj.approximate) { reformedDate["circa"] = true } return reformedDate } _reformName(theNames: NameDictObject[]): CSLNameObject[] { const names = theNames.map((name) => { const reformedName: CSLNameObject = {} if (name.literal) { let literal = this._reformText(name.literal) if (literal.length) { reformedName["literal"] = literal } else { return false } } else { reformedName["given"] = this._reformText(name.given!) reformedName["family"] = this._reformText(name.family!) if (name.suffix) { reformedName["suffix"] = this._reformText(name.suffix) } if (name.prefix) { if (name.useprefix) { reformedName["non-dropping-particle"] = this._reformText(name.prefix) } else { reformedName["dropping-particle"] = this._reformText( name.prefix ) } } reformedName["family"] = this._reformText(name["family"]!) } return reformedName }) return names.filter((name) => name) as CSLNameObject[] } }