biblatex-csl-converter

Version:

Bibliography format converter: BibLaTeX, BibTeX, CSL-JSON, RIS, ENW, EndNote XML, Citavi, DOCX citations, ODT citations — parse, convert, and export with round-trip fidelity

github.com/fiduswriter/biblatex-csl-converter

fiduswriter/biblatex-csl-converter

1,440 lines (1,422 loc) • 76.1 kB

TypeScript

interface UnknownFieldsObject { groups?: NodeObject[]; [key: string]: NodeObject[] | RangeArray[] | NodeArray[] | Array<NodeArray | string> | NameDictObject[] | string | undefined; } interface MarkObject { type: string; } interface OtherNodeObject { type: string; marks?: MarkObject[]; attrs?: Record<string, unknown>; } interface TextNodeObject { type: "text"; text: string; marks?: MarkObject[]; attrs?: Record<string, unknown>; } type NodeObject = OtherNodeObject | TextNodeObject; type NodeArray = NodeObject[]; interface EntryLocation { start: number; end: number; } interface EntryObject { entry_key: string; incomplete?: boolean; bib_type: string; location?: EntryLocation; raw_text?: string; fields: Record<string, unknown>; unexpected_fields?: Record<string, unknown>; unknown_fields?: UnknownFieldsObject; } type NameDictObject = { literal?: NodeArray; family?: NodeArray; given?: NodeArray; prefix?: NodeArray; suffix?: NodeArray; useprefix?: boolean; }; type GroupObject = { name: string; references: string[]; groups: GroupObject[]; }; type RangeArray = [NodeArray, NodeArray] | [NodeArray]; interface LangidOptions$1 { [key: string]: { csl: string; biblatex: string; }; } interface BibFieldType { type: string; biblatex: string; csl?: string | Record<string, string>; options?: string[] | LangidOptions$1; strict?: boolean; } /** A list of field types of Bibligraphy DB with lookup by field name. */ declare const BibFieldTypes: Record<string, BibFieldType>; interface BibType { order: number; biblatex: string; csl: string; required: string[]; eitheror: string[]; optional: string[]; "biblatex-subtype"?: string; } /** A list of all bib types and their fields. */ declare const BibTypes: Record<string, BibType>; type SimpleDateArray = Array<string | number>; type DateArray = readonly (string | number | SimpleDateArray)[]; interface EDTFOutputObject { type: string; valid: boolean; values: DateArray; cleanedString: string; uncertain: boolean; approximate: boolean; } declare function edtfParse(dateString: string): EDTFOutputObject; interface TypeInheritance { source: string[]; target: string[]; fields: FieldInheritance[]; } interface FieldInheritance { source: string; target: string; } type StringStartTuplet = [string, () => void]; type WarningObject$1 = { type: string; group_type: string; }; declare class GroupParser { groups: GroupObject[]; groupType: string; warnings: WarningObject$1[]; entries: EntryObject[]; stringStarts: StringStartTuplet[]; pos: number; fileDirectory: string; input: string; constructor(entries: EntryObject[]); checkString(input: string): void; readGroupInfo(groupType: string): void; readFileDirectory(): void; readJabref3(): void; clearGroups(groups: GroupObject[]): void; readJabref4(): void; find(name: string, groups: GroupObject[] | undefined): GroupObject | false; } /** Parses files in BibTeX/BibLaTeX format */ interface ConfigObject$2 { /** * - processUnknown (object [specifying content type for specific unknown]): * * Processes fields with names that are unknown, adding them to an `unknown_fields` * object to each entry. * * example: * > a = new BibLatexParser(..., {processUnknown: true}) * > a.output * { * "0:": { * ... * unknown_fields: { * ... * } * } * } * * > a = new BibLatexParser(..., {processUnknown: {commentator: 'l_name'}}) * > a.output * { * "0:": { * ... * unknown_fields: { * commentator: [ * { * given: ..., * family: ... * } * ] * ... * } * } * } */ processUnknown?: boolean | Record<string, string>; /** * Processes fields with names that are known, but are not expected for the given bibtype, * adding them to an `unexpected_fields` object to each entry. */ processUnexpected?: boolean; processInvalidURIs?: boolean; processComments?: boolean; /** * Include source location to an `location` object on each entry * * example: * > a = new BibLatexParser(..., {includeLocation: true}) * > a.output * { * "0:": { * ... * location: { * start: 1, * end: 42 * } * } * } */ includeLocation?: boolean; /** * Include source text to an `raw_text` property on each entry * * example: * > a = new BibLatexParser(..., {includeRawText: true}) * > a.output * { * "0:": { * ... * raw_text: '@article{...}' * } * } */ includeRawText?: boolean; crossRefInheritance?: TypeInheritance[]; includeUnusedNocase?: boolean; } interface ErrorObject$9 { type: string; expected?: string; found?: string; line?: number; key?: string; entry?: string; field?: string; field_name?: string; alias_of?: string; alias_of_value?: unknown; value?: string[] | string; variable?: string; type_name?: string; } interface MatchOptionsObject { skipWhitespace: string | boolean; } interface BiblatexParseResult { entries: { [key: number]: EntryObject; }; errors: ErrorObject$9[]; warnings: ErrorObject$9[]; comments: string[]; strings: Record<string, string>; jabref: { groups: GroupObject[] | false; meta: Record<string, string>; }; } interface BibDB { [key: number]: EntryObject; } declare class BibLatexParser { input: string; config: ConfigObject$2; pos: number; startPosition: number; endPosition: number; entries: EntryObject[]; currentKey: string | false; currentEntry?: EntryObject; currentType: string; currentRawFields?: Record<string, unknown>; bibDB: BibDB; errors: ErrorObject$9[]; warnings: ErrorObject$9[]; months: { JAN: string; FEB: string; MAR: string; APR: string; MAY: string; JUN: string; JUL: string; AUG: string; SEP: string; OCT: string; NOV: string; DEC: string; }; strings: Record<string, string>; comments: string[]; groupParser: GroupParser; groups: GroupObject[] | false; jabrefMeta: Record<string, string>; jabref?: { groups: GroupObject[] | false; meta: number; }; crossrefs: Record<string, string>; constructor(input: string, config?: ConfigObject$2); isWhitespace(s: string): boolean; error(data: ErrorObject$9): void; warning(data: ErrorObject$9): void; match(s: string, options?: MatchOptionsObject): void; tryMatch(s: string): boolean; skipWhitespace(): void; skipToNext(): boolean; valueBraces(): string; valueQuotes(): string; singleValue(): string; value(asis?: boolean): string; key(optional?: boolean): string; keyEqualsValue(asis?: boolean): [string, string] | false; keyValueList(): void; processFields(): void; _reformKey(keyString: string, fKey: string): string | NodeArray; _checkURI(uriString: string): boolean; _reformURI(uriString: string): string; _reformNameList(nameString: string): NameDictObject[]; _reformRange(rangeString: string): RangeArray[]; _reformLiteral(theValue: string, cpMode?: boolean): NodeArray; bibType(): string; createNewEntry(): void; directive(): string | null; string(): void; preamble(): void; replaceTeXChars(): void; stepThroughBibtex(): void; stepThroughBibtexAsync(): Promise<null>; parseNext(): void; parseComment(braceless: boolean): void; createBibDB(): void; cleanDB(): void; _resolveCrossRef(key: string, parentKey: string): void; _resoveAllCrossRefs(): void; parsed(): BiblatexParseResult; parse(): BiblatexParseResult; parseAsync(): Promise<BiblatexParseResult>; } declare function parse(input: string, config?: ConfigObject$2): BiblatexParseResult; declare function parseAsync(input: string, config?: ConfigObject$2): Promise<BiblatexParseResult>; type ConfigObject$1 = { traditionalNames?: boolean; exportUnexpectedFields?: boolean; }; type BibObject = { type: string; key: string; values?: Record<string, unknown>; }; type WarningObject = { type: string; variable: string; }; declare class BibLatexExporter { bibDB: BibDB; pks: string[]; config: ConfigObject$1; warnings: WarningObject[]; bibtexStr: string; bibtexArray: BibObject[]; constructor(bibDB: BibDB, pks?: string[] | false, config?: ConfigObject$1); parse(): string; _reformKey(theValue: string | unknown, fKey: string): string; _reformRange(theValue: unknown): string; _reformInterval(theValue: unknown): string; _reformName(theValue: unknown): string; _protectNamePart(namePart: string): string; _escapeTeX(theValue: unknown): string; _reformText(theValue: unknown): string; _getBibtexString(biblist: BibObject[]): string; } type ConfigObject = { escapeText?: boolean; useEntryKeys?: boolean; language?: string; }; type ErrorObject$8 = { type: string; variable: string; }; type CSLDateObject = { "date-parts"?: [number[]] | [number[], number[]]; circa?: boolean; }; type CSLNameObject = { literal?: string; given?: string; family?: string; suffix?: string; "non-dropping-particle"?: string; "dropping-particle"?: string; }; interface CSLEntry$1 { id?: string; [key: string]: unknown; } type CSLOutput = Record<string, CSLEntry$1>; declare class CSLExporter { bibDB: BibDB; pks: string[]; config: ConfigObject; cslDB: Record<string, CSLEntry$1>; errors: ErrorObject$8[]; constructor(bibDB: BibDB, pks?: string[] | false, config?: ConfigObject); parse(): CSLOutput; /** Converts one BibDB entry to CSL format. * @function getCSLEntry * @param id The id identifying the bibliography entry. */ getCSLEntry(id: string): CSLEntry$1; _reformKey(theValue: string | unknown, fKey: string): string; _reformRange(theValue: unknown): string; _reformInterval(theValue: unknown): string; _reformInteger(theValue: unknown): string | number; _escapeText(theValue: unknown): string; _reformTitle(theValue: unknown): string; _reformText(theValue: unknown): string; _reformDate(dateStr: string): false | CSLDateObject; _reformName(theNames: NameDictObject[]): CSLNameObject[]; } /** * Type definitions for the i18n module. * * Kept in a separate file so that `locales.ts` (which is auto-generated) can * import the `Locale` interface without creating a circular dependency with * `index.ts` (which in turn imports the locale objects from `locales.ts`). */ /** Human-readable labels for every supported BibLaTeX / CSL field key. */ type FieldTitles = Record<string, string>; /** * Help / hint text for selected fields. Only fields that benefit from extra * explanation have an entry here (date format, name-prefix convention, …). */ type FieldHelp = Record<string, string>; /** Human-readable labels for every supported reference-type key. */ type TypeTitles = Record<string, string>; /** * Per-reference-type overrides for field labels. * * Outer key = reference-type key (e.g. `"video"`). * Inner key = field key (e.g. `"author"`). * Value = label to use instead of the generic one in `fieldTitles`. */ type FieldTitlesByType = Record<string, Record<string, string>>; /** * Human-readable labels for every value valid in the BibLaTeX `langid` field, * including BibTeX-level aliases (e.g. `pinyin`, `american`, `english`). */ type LangidOptions = Record<string, string>; /** * Human-readable labels for option values used in fields other than `langid`: * `editortype`, `pagination`, `pubstate`, and the `type` sub-field. */ type OtherOptions = Record<string, string>; /** A complete locale object — one per language JSON file. */ interface Locale { fieldTitles: FieldTitles; fieldHelp: FieldHelp; typeTitles: TypeTitles; fieldTitlesByType: FieldTitlesByType; langidOptions: LangidOptions; otherOptions: OtherOptions; } /** * i18n module for biblatex-csl-converter * * Provides human-readable labels for reference types, field names, field help * text, and option values in multiple languages. * * ## Structure of a locale object * ```json * { * "fieldTitles": { "<fieldKey>": "<label>", … }, * "fieldHelp": { "<fieldKey>": "<help text>", … }, * "typeTitles": { "<typeKey>": "<label>", … }, * "fieldTitlesByType": { "<typeKey>": { "<fieldKey>": "<label>" }, … }, * "langidOptions": { "<langidKey>": "<label>", … }, * "otherOptions": { "<optionKey>": "<label>", … } * } * ``` * * ## Per-type field overrides (`fieldTitlesByType`) * Some field names carry a different human meaning depending on the reference * type. For example, the `author` field of a `video` entry is conventionally * labelled "Director(s)" rather than "Author(s)". Use `getFieldTitle()` which * checks `fieldTitlesByType[type][field]` first and falls back to * `fieldTitles[field]`. * * ## Option split * `langidOptions` covers every value valid for the BibLaTeX `langid` field * (including BibTeX-level aliases such as `pinyin`, `american`, `english`). * `otherOptions` covers editortype values, pagination values, pubstate values, * and `type` sub-field values (mathesis, phdthesis, techreport, …). * * ## Locale data * The locale data lives in `src/i18n/locales.ts`, which is auto-generated by: * npm run compile_i18n * * ## Supported languages * ar, bg, cs, de, en, es, fr, it, ja, ko, nl, pl, pt-BR, pt-PT, ru, sv, tr, zh */ /** * All built-in locales keyed by IETF language tag. * * Consumers that need a language not listed here can supply their own `Locale` * object — every public helper accepts a `Locale` directly. */ declare const locales: Readonly<Record<string, Locale>>; /** * Return the `Locale` for *lang*, falling back to English when not available. * * Lookup order: * 1. Exact tag (e.g. `"pt-BR"`) * 2. Base subtag (e.g. `"pt"` from `"pt-BR"`) * 3. English fallback * * @example * getLocale("de") // → locales.de * getLocale("pt-BR") // → locales["pt-BR"] * getLocale("zh") // → locales.en (fallback) */ declare function getLocale(lang: string): Locale; /** * Return the human-readable label for *fieldKey* in the context of *typeKey*, * using *locale* for the translation. * * Checks `locale.fieldTitlesByType[typeKey][fieldKey]` first, then falls back * to `locale.fieldTitles[fieldKey]`, and finally to the raw key itself. * * @example * getFieldTitle(locales.en, "book", "author") // → "Author(s)" * getFieldTitle(locales.en, "video", "author") // → "Director(s)" * getFieldTitle(locales.de, "book", "author") // → "Autor(en)" */ declare function getFieldTitle(locale: Locale, typeKey: string, fieldKey: string): string; /** * Return the human-readable label for *typeKey* in *locale*, falling back to * the raw key if not found. * * @example * getTypeTitle(locales.fr, "article-journal") // → "Article de revue" */ declare function getTypeTitle(locale: Locale, typeKey: string): string; /** * Return the help/hint text for *fieldKey* in *locale*, or `undefined` when * no help text is defined for that field. * * @example * getFieldHelp(locales.en, "date") // → "In <em>Extended Date Time Format</em>…" * getFieldHelp(locales.en, "title") // → undefined */ declare function getFieldHelp(locale: Locale, fieldKey: string): string | undefined; /** * Return the human-readable label for a `langid` field value in *locale*, * falling back to the raw key if not found. * * @example * getLangidTitle(locales.de, "french") // → "Französisch" * getLangidTitle(locales.en, "brportuguese") // → "Brazilian Portuguese" */ declare function getLangidTitle(locale: Locale, langidKey: string): string; /** * Return the human-readable label for a non-language option value in *locale* * (i.e. an `editortype`, `pagination`, `pubstate`, or `type` sub-field value), * falling back to the raw key if not found. * * @example * getOtherOptionTitle(locales.de, "phdthesis") // → "Ph.D. These" * getOtherOptionTitle(locales.en, "inpreparation") // → "In preparation" */ declare function getOtherOptionTitle(locale: Locale, optionKey: string): string; /** * Citavi JSON format parser * * Supports the JSON format exported by Citavi (SwissAcademic.Citavi), * including the "WordPlaceholder" citation format used in docx exports * and direct project-export arrays. * * For the native XML project format (.ctv5 / .ctv6) see citavi-xml.ts. * * Field semantics are derived from the official per-type documentation at: * https://www1.citavi.com/sub/manual-citaviweb/en/fields_in_citavi.html * * Every Citavi reference type shares the same underlying JSON field names; * only the *meaning* and *display label* of each field changes per type. * This parser maps those semantic differences to the appropriate internal * BibLaTeX/CSL field names. */ interface CitaviPerson { FirstName?: string; LastName?: string; MiddleName?: string; /** Used for institutional/corporate names when no personal name is present */ Name?: string; Sex?: number; Id?: string; [key: string]: unknown; } interface CitaviPublisher { Name?: string; Place?: string; [key: string]: unknown; } interface CitaviPeriodical { Name?: string; StandardAbbreviation?: string; UserAbbreviation1?: string; /** Electronic ISSN */ Eissn?: string; /** Print ISSN */ Issn?: string; [key: string]: unknown; } interface CitaviLocation { Address?: { UriString?: string; OriginalString?: string; LinkedResourceType?: number; [key: string]: unknown; }; LocationType?: number; [key: string]: unknown; } interface CitaviKeyword { Name?: string; [key: string]: unknown; } interface CitaviReference { Id?: string; BibTeXKey?: string; CitationKey?: string; ReferenceType?: string; Title?: string; /** In the JSON the field is sometimes `Title1` (older exports) */ Title1?: string; Subtitle?: string; TitleSupplement?: string; ShortTitle?: string; ParallelTitle?: string; Authors?: CitaviPerson[]; Editors?: CitaviPerson[]; Translators?: CitaviPerson[]; Collaborators?: CitaviPerson[]; OthersInvolved?: CitaviPerson[]; Organizations?: CitaviPerson[]; Publishers?: CitaviPublisher[]; PlaceOfPublication?: string; Periodical?: CitaviPeriodical; Year?: string; YearResolved?: string; Date?: string; Date2?: string; AccessDate?: string; Volume?: string; Number?: string; Issue?: string; Edition?: string; NumberOfVolumes?: string; SeriesTitle?: string; OnlineAddress?: string; PageRange?: string; PageCount?: string; Isbn?: string; Issn?: string; Doi?: string; Abstract?: string; Keywords?: CitaviKeyword[]; Language?: string; LanguageCode?: string; Locations?: CitaviLocation[]; StorageMedium?: string; /** * Records where the bibliographic metadata was imported from * (e.g. `"CrossRef"`, `"PubMed"`). */ SourceOfBibliographicInformation?: string; SpecificField1?: string; SpecificField2?: string; SpecificField3?: string; SpecificField4?: string; SpecificField5?: string; SpecificField6?: string; SpecificField7?: string; ParentReference?: CitaviReference; [key: string]: unknown; } /** * A single entry in a Citavi `WordPlaceholder.Entries` array. * Each entry links a bibliographic reference to citation-specific decorations * (page locator, prefix/suffix, etc.). */ interface CitaviEntry { /** UUID identifying this placeholder entry instance */ Id?: string; /** UUID of the linked bibliographic reference */ ReferenceId?: string; /** Embedded bibliographic reference (present in WordPlaceholder format) */ Reference?: CitaviReference; /** * UUID of a Citavi knowledge item (quotation, thought, or summary) that * this citation entry is associated with. Present when the citation was * inserted from the Citavi knowledge panel rather than directly from the * reference list. */ AssociateWithKnowledgeItemId?: string; /** * Integer indicating the type of quotation/knowledge item this citation * represents within Citavi. Observed value: `1`. The full enum mapping * is not yet known. */ QuotationType?: number; /** * Text to prepend to the formatted citation (e.g. `"Vgl. "`, `"See "`). * Absent when no prefix is set. Citavi formats this according to the * active citation style (e.g. auto-capitalising the first word in footnotes). */ Prefix?: string; /** * Text to append to the formatted citation (e.g. `", etc."`). * Absent when no suffix is set. * Existence confirmed by the Citavi manual; not yet observed in real files. */ Suffix?: string; /** * Citation-specific page/locator range. Contains `OriginalString` with * the full range text plus typed `StartPage`/`EndPage` sub-objects. */ PageRange?: { /** Full range string, e.g. `"100-105"` or `"Col. 12-14"`. Absent when no pages are set. */ OriginalString?: string; /** * What the locator numbers represent; determines the prefix the * citation style renders (e.g. `p.`, `Col.`, `Nr.`, `§`). * * Known values (integer-to-type mapping inferred from the Citavi * manual's prose — exact values NOT confirmed by observed data): * 0 = Pages (default, confirmed observed) * 1 = Columns (inferred) * 2 = Section numbers (inferred) * 3 = Margin numbers (inferred) * 4 = Other / free-form (inferred) */ NumberingType?: number; /** * Whether Arabic or Roman numerals are used. * * Known values (inferred from the Citavi manual; not confirmed by * observed data beyond 0): * 0 = Arabic (default, confirmed observed) * 1 = Roman (inferred) */ NumeralSystem?: number; StartPage?: { OriginalString?: string; PrettyString?: string; /** Numeric value; absent when the page is not fully numeric */ Number?: number; IsFullyNumeric?: boolean; NumberingType?: number; NumeralSystem?: number; }; EndPage?: { OriginalString?: string; PrettyString?: string; Number?: number; IsFullyNumeric?: boolean; NumberingType?: number; NumeralSystem?: number; }; [key: string]: unknown; } | null; /** * Controls bibliography inclusion for this citation entry. * * Known string values (confirmed by Citavi manual; not yet observed in real files): * absent / default = reference appears in both citation and bibliography * `"/bibonly"` = reference appears only in the bibliography, not in-text * `"/nobib"` = reference appears only in-text, not in the bibliography */ BibliographyEntry?: string; /** * Overrides which citation-style rule set (formatting variant) is used for * this entry — e.g. to use the bibliography layout for a single in-text * citation. Serialised form not yet observed in real files. */ RuleSet?: unknown; /** * Selects among the citation style's optional formatting variants (1, 2, * or 3) for this entry. Commonly used to suppress or force "ibid."-style * short forms. Serialised form not yet observed in real files. */ FormatOption?: unknown; /** * Overrides where this citation is physically inserted (in-text vs. * footnote), independently of what the citation style normally dictates. * Serialised form not yet observed in real files. */ InsertAs?: unknown; /** * When `true`, the `NumberingType` for the locator is inherited from the * document default rather than set per-citation. */ UseNumberingTypeOfParentDocument?: boolean; /** * When `true`, the citation style's own default prefix overrides any * custom `Prefix` string on this entry. When `false` and `Prefix` is * absent, no prefix is added. */ UseStandardPrefix?: boolean; [key: string]: unknown; } /** * Top-level Citavi JSON payload. Three shapes are supported: * 1. WordPlaceholder – `{ Entries: [...] }` * 2. Project export – `{ References: [...] }` * 3. Plain array – `CitaviReference[]` */ type CitaviInput = { Entries?: CitaviEntry[]; References?: CitaviReference[]; [key: string]: unknown; } | CitaviReference[]; interface ErrorObject$7 { type: string; field?: string; field_name?: string; value?: unknown; entry?: string; } declare class CitaviParser { input: CitaviInput; entries: EntryObject[]; errors: ErrorObject$7[]; warnings: ErrorObject$7[]; /** Track processed Citavi Reference IDs to avoid duplicate imports */ private seenIds; private usedKeys; /** * Maps each Citavi Reference `Id` (UUID) to the final `entry_key` that was * assigned to it after normalisation. Populated during `parse()` so that * callers (e.g. `DocxCitationsParser`) can resolve a `ReferenceId` from a * `CitaviEntry` back to the actual key used in the returned `BibDB`. */ referenceIdToEntryKey: Map<string, string>; constructor(input: CitaviInput); parse(): Record<number, EntryObject>; /** * Walk the input structure and return a flat ordered list of all * CitaviReference objects, with parent references appearing before their * children so they receive lower bibDB indices. */ private collectReferences; private convertReference; private processTitle; private processNames; /** Append `names` to an existing name-list field, or create it. */ private addToNameField; private processDate; private processPublisher; private processPeriodical; private processNumbering; private processIdentifiers; private processLocations; private processKeywords; private processLanguage; private processSpecificFields; /** * Pull relevant fields out of a ParentReference into the child entry. * This handles Contribution-in-Book, MusicTrack-in-Album, etc. */ private processParentReference; private convertPersonList; private convertPerson; /** * Parse a Citavi date string into an ISO-8601 date string. * Handles ISO datetime (2007-12-01T00:00:00) and plain date (2007-12-01). * Returns null if the input cannot be parsed. */ private parseISODate; /** * Citavi stores page ranges either as plain strings or in a small XML * dialect embedded in the JSON string: * * <sp><n>2</n>...<os>2</os>...</sp> * <ep><n>6</n>...<os>6</os>...</ep> * <os>2-6</os> * * We extract the *last* `<os>` value, which holds the full human-readable * range (e.g. "2-6"). If no XML is present we use the raw string directly. */ private parsePageRange; private convertRange; private buildEntryKey; /** * Citavi's ShortTitle field often contains an auto-generated prefix like * "Burton 2013 – Sweeney Todd" or "Manning (Ed.) 2016 – Food and supply…". * Strip everything up to and including the first en-dash / em-dash / hyphen * separator so we retain only the actual short title fragment. */ private cleanShortTitle; private convertRichText; } declare function parseCitavi(input: CitaviInput): Record<number, EntryObject>; /** * Citavi XML project format parser (.ctv5 / .ctv6) * * The XML format stores Persons, Keywords, Publishers, Periodicals and * SeriesTitles in separate lookup tables and links them to references via * "OnetoN" relation elements whose text content is: * * <referenceId>;<linkedId1>;<linkedId2>… * * This parser resolves all those links, reconstructs CitaviReference objects, * attaches parent references for Contribution-type entries, and then hands the * result to CitaviParser for the actual field-level conversion. * * Accepts any DOM Document object (browser Document, or one produced by a * library such as @xmldom/xmldom), or a plain XML string that will be parsed * with the global DOMParser when available (browser / Deno / Node 22+). * * Only DOM Level 2 APIs are used (getElementsByTagName, getAttribute, * childNodes, textContent) so the class is compatible with @xmldom/xmldom. */ interface XmlErrorObject { type: string; entry?: string; value?: unknown; } declare class CitaviXmlParser { private doc; errors: XmlErrorObject[]; warnings: XmlErrorObject[]; constructor(input: Document | string); parse(): Record<number, EntryObject>; private buildPersonMap; private buildKeywordMap; private buildPublisherMap; private buildPeriodicalMap; private buildSeriesTitleMap; /** * Parse a OnetoN relation section into a Map<referenceId, linkedId[]>. * * Each <OnetoN> text node has the form: * <referenceId>;<linkedId1>;<linkedId2>… */ private buildOneToNMap; /** * Build a Map<childReferenceId, parentReferenceId> from ReferenceReferences. * * Each <OnetoN> text node has the form: * <parentId>;<childId1>;<childId2>… */ private buildChildToParentMap; /** * Return the first *direct* child element with the given tag name, or null. * Uses childNodes iteration rather than getElementsByTagName so that * deeply-nested elements with the same tag name are not mistakenly matched. */ private firstChildEl; /** * Return the trimmed text content of the first direct child element with * the given tag name, or null if no such child exists. */ private childText; private resolvePersons; } declare function parseCitaviXml(input: Document | string): Record<number, EntryObject>; interface CSLEntry { id?: string; type?: string; [key: string]: unknown; } interface ErrorObject$6 { type: string; field?: string; value?: unknown; entry?: string; } declare class CSLParser { input: Record<string, CSLEntry>; entries: EntryObject[]; errors: ErrorObject$6[]; warnings: ErrorObject$6[]; private usedKeys; /** * Maps each raw CSL `id` string to the final `entry_key` assigned after * normalisation. Populated during `parse()` so that callers (e.g. * `DocxCitationsParser`, `OdtCitationsParser`) can resolve a raw CSL id * back to the actual key used in the returned BibDB — even when the parser * synthesised a lastname+year key that bears no resemblance to the original. */ rawIdToEntryKey: Map<string, string>; constructor(input: Record<string, CSLEntry>); parse(): Record<number, EntryObject>; private convertEntry; private getBibType; private convertField; private convertDate; private convertNames; private convertInteger; private convertKey; private convertRange; private convertKeyList; private convertLiteralList; private convertTags; private convertRichText; } declare function parseCSL(input: Record<string, CSLEntry>): Record<number, EntryObject>; /** * DOCX Citations importer * * Extracts bibliographic citations from the XML of a DOCX file's * word/document.xml (or any XML fragment containing the same markup). * * Supported citation manager formats and how each is handled: * * - Word native `CITATION key \l locale` inline field + * `customXml/item1.xml` sources (passed as * `sourcesXml` option). Delegated to * DocxNativeParser in docx-native.ts. * * - Zotero `ADDIN ZOTERO_ITEM CSL_CITATION {json}` inline * field. The embedded CSL-JSON `citationItems` * array is reshaped into a Record<string, CSLEntry> * and fed to CSLParser. * * - Mendeley Cite v3 Current add-in: `<w:sdt>` with the citation JSON * (current) base64-encoded in `w:tag w:val= * "MENDELEY_CITATION_v3_{base64}"`. Decoded payload * is CSL-JSON; handled identically to Zotero. * * - Mendeley Desktop Legacy add-in: `ADDIN CSL_CITATION {json}` inline * (legacy) field. Same CSL-JSON shape; handled identically * to Zotero. * * - EndNote `ADDIN EN.CITE <EndNote>…</EndNote>` — XML is * either entity-escaped inline or base64-encoded in * `<w:fldData>`. The `<record>` subtree is * converted to an EndNoteRecord object and passed to * EndNoteParser. * * - Citavi `<w:sdt>` wrapping `ADDIN CitaviPlaceholder * {base64}`. The base64-decoded JSON contains a * WordPlaceholder with `Entries[].ReferenceId` * UUIDs. Two sub-cases: * * A. If the payload already embeds `Reference` * objects (some Citavi export modes), they are * passed directly to CitaviParser. * * B. In older or incomplete formats, only UUIDs may * be present without embedded references. Such * citations cannot be fully resolved and will * generate warnings. * * Usage: * const parser = new DocxCitationsParser(documentXml, { * sourcesXml, // contents of customXml/item1.xml (Word-native) * }) * const result = parser.parse() * // result.entries → BibDB (Record<number, EntryObject>) * // result.errors → ErrorObject[] * // result.warnings → ErrorObject[] * * The `sourcesXml` option must be the contents of `customXml/item1.xml` from * the DOCX ZIP when Word-native citations are present. * * Citavi citations embed complete bibliographic data directly in each citation * field, so no external Citavi project file is required. */ interface DocxCitationsParseResult { entries: Record<number, EntryObject>; errors: ErrorObject$5[]; warnings: ErrorObject$5[]; } /** * Mutable accumulator passed to static extraction methods when processing * multiple document elements in a single pass. All four fields are mutated * in place as entries are discovered and keys are deduplicated. */ interface CitationAccumulator$1 { entries: EntryObject[]; errors: ErrorObject$5[]; warnings: ErrorObject$5[]; seenKeys: Set<string>; /** * Persistent map from raw CSL `id` strings to the normalised `entry_key` * values assigned by `CSLParser`. Accumulated across all citation elements * processed with the same accumulator so that duplicate items (already in * `seenKeys`) can still have their metadata resolved to the correct key. */ cslRawIdToEntryKey: Map<string, string>; } interface ErrorObject$5 { type: string; field?: string; value?: unknown; entry?: string; } /** * Per-entry citation metadata, keyed by `entry_key`. * * This captures the cite-specific decorations that surround a bibliographic * reference inside a single citation: page locators, textual prefixes / * suffixes, and author-rendering flags. It is returned alongside the * `entries` BibDB when `retrieveMetadata` is `true` on a static method call. * * Field availability by format: * * | Field | Zotero | Mendeley | EndNote | Citavi | * |------------------|--------|----------|----------------------|-------------------------------------| * | locator | ✅ | ✅ | ✅ (Pages) | ✅ (PageRange.OriginalString) | * | label | ✅ | ✅ | – | ✅ (derived from PageRange.NumberingType; mapping inferred from manual) | * | prefix | ✅ | ✅ | ✅ | ✅ | * | suffix | ✅ | ✅ | ✅ | ✅ (confirmed by manual; not yet seen in real files) | * | suppressAuthor | ✅ | ✅ | – | – | * | authorOnly | ✅ | ✅ | – | – | * | authorYear | – | – | ✅ (AuthorYear attr) | – | * | bibliographyEntry| – | – | – | ✅ (confirmed by manual; not yet seen in real files) | * | ruleSet | – | – | – | ✅ (confirmed by manual; serialised form unknown) | * | formatOption | – | – | – | ✅ (confirmed by manual; serialised form unknown) | * | insertAs | – | – | – | ✅ (confirmed by manual; serialised form unknown) | */ interface CitationItemMetadata$1 { /** The `entry_key` of the corresponding entry in the returned `entries` BibDB. */ entry_key: string; /** * Pinpoint location within the cited work (page number, chapter, etc.). * For CSL formats this is the raw `locator` string; for EndNote it is the * `<Pages>` element text; for Citavi it is `PageRange.OriginalString`. */ locator?: string; /** * CSL locator type label (e.g. `"page"`, `"chapter"`, `"section"`). * For CSL-based formats (Zotero, Mendeley) this is the raw `label` string * from the citation item. For Citavi it is derived from `PageRange.NumberingType`: * `0` (Pages) → `"page"`, `1` (Columns) → `"column"`, * `2` (Section numbers) → `"section"`, `3` (Margin numbers) → `"note"`, * `4` (Other / free-form) → `"custom"`. * The integer-to-label mapping for Citavi is inferred from the Citavi manual * and has not been confirmed against observed data beyond value `0`. */ label?: string; /** Text to prepend to the formatted citation (e.g. `"see "`, `"cf. "`). */ prefix?: string; /** Text to append to the formatted citation (e.g. `", etc."`). */ suffix?: string; /** * When `true`, author names are suppressed in the formatted output, * leaving only the year (and locator) in parentheses: `(2020, p. 45)`. * Only populated for CSL-based formats (Zotero, Mendeley). */ suppressAuthor?: boolean; /** * When `true`, only the author name is rendered with nothing else: * `William T. Williams`. * Only populated for CSL-based formats (Zotero, Mendeley). */ authorOnly?: boolean; /** * When `true`, the author name is rendered outside the parentheses while * the year (and locator) remain inside: `William T. Williams (2020, p. 45)`. * This reflects the `AuthorYear="1"` attribute on EndNote's `<Cite>` element. * Only populated for EndNote citations. */ authorYear?: boolean; /** * Controls whether and where this reference appears in the bibliography. * Only populated for Citavi citations (from `Entries[].BibliographyEntry`). * * Known values: * `"/bibonly"` – reference appears only in the bibliography, not in-text * `"/nobib"` – reference appears only in-text, not in the bibliography * * When absent the reference appears in both (default behaviour). * Confirmed by the Citavi manual; not yet observed in real files. */ bibliographyEntry?: string; /** * Overrides which citation-style rule set (formatting variant) is used for * this entry. Only populated for Citavi citations (from `Entries[].RuleSet`). * Serialised form not yet observed in real files. */ ruleSet?: unknown; /** * Selects among the citation style's optional formatting variants for this * entry (values 1, 2, or 3). Only populated for Citavi citations (from * `Entries[].FormatOption`). Serialised form not yet observed in real files. */ formatOption?: unknown; /** * Overrides where the citation is physically inserted (in-text vs. footnote). * Only populated for Citavi citations (from `Entries[].InsertAs`). * Serialised form not yet observed in real files. */ insertAs?: unknown; } interface CitationResult$1 { isCitation: boolean; format?: string; entries?: Record<number, EntryObject>; errors?: ErrorObject$5[]; warnings?: ErrorObject$5[]; /** * Per-entry citation metadata (locators, prefixes, suffixes, flags). * Only populated when `retrieveMetadata` is `true` on the static method call. */ metadata?: CitationItemMetadata$1[]; } interface BibliographyResult$1 { isBibliography: boolean; format?: string; } interface DocxCitationsParserOptions { /** * Contents of `customXml/item1.xml` from the DOCX ZIP, using the MS * Office Bibliography XML namespace. Required to resolve Word-native and * `CITATION` keys into full bibliographic data. */ sourcesXml?: string; } declare class DocxCitationsParser { private documentXml; private options; entries: EntryObject[]; errors: ErrorObject$5[]; warnings: ErrorObject$5[]; /** Entry keys already added — prevents duplicates across multiple fields. */ private seenKeys; /** Persistent raw CSL id → normalised entry_key map for the instance parse. */ private cslRawIdToEntryKey; constructor(documentXml: string, options?: DocxCitationsParserOptions); /** * Check if an SDT block contains citation data (without full document parsing). * * @param sdtXml - XML string of a single <w:sdt>...</w:sdt> block * @returns CitationCheckResult indicating whether it's a citation and its format */ static sdtCitation(sdtXml: string, retrieve?: boolean, retrieveMetadata?: boolean, acc?: CitationAccumulator$1): CitationResult$1; /** * Check or extract bibliography rendering region from an SDT block. * * @param sdtXml - XML string of a single <w:sdt>...</w:sdt> block * @param retrieve - If true, extract data (currently returns empty as bibliographies have no importable data) * @returns BibliographyResult indicating whether it's a bibliography */ static sdtBibliography(sdtXml: string): BibliographyResult$1; /** * Check or extract citation data from a field instruction. * * @param instrText - The concatenated instruction text from w:instrText elements * @param retrieve - If true, extract and return full citation data * @param fldData - Optional field data (for EndNote base64 payloads) * @param options - Optional parser options (e.g., sourcesXml for Word native) * @returns CitationResult with format and optionally entries/errors/warnings */ static fieldCitation(instrText: string, retrieve?: boolean, retrieveMetadata?: boolean, extractWordNative?: boolean, fldData?: string, options?: DocxCitationsParserOptions, acc?: CitationAccumulator$1): CitationResult$1; /** * Check or extract bibliography rendering region from a field instruction. * * @param instrText - The concatenated instruction text * @param retrieve - If true, extract data (currently returns empty as bibliographies have no importable data) * @returns BibliographyResult indicating whether it's a bibliography */ static fieldBibliography(instrText: string): BibliographyResult$1; /** * Extract CSL citation JSON data. */ private static extractCslJsonData; /** * Extract EndNote citation data. */ private static extractEndNoteData; /** * Extract Citavi citation data from base64-encoded WordPlaceholder JSON. * * Citavi embeds complete bibliographic data directly in each citation via * `Reference` objects within the `Entries` array. This method checks for * embedded references and converts them via CitaviParser. If no embedded * references are found (only UUIDs), a warning is generated. */ private static extractCitaviData; /** * Extract Word native citation data. */ private static extractWordNativeData; /** * Parse EndNote XML payload. */ private static parseEndNoteXml; /** * Parse EndNote record XML. */ private static parseEndNoteRecordXml; /** * Parse contributors XML. */ private static parseContributorsXml; /** * Parse dates XML. */ private static parseDatesXml; /** * Parse URLs XML. */ private static parseUrlsXml; /** * Strip style tags and decode XML entities. */ private static stripStyleTagsStatic; /** * Unescape XML entities. */ private static unescapeXmlEntitiesStatic; /** * Decode base64. */ private static decodeBase64Static; parse(): DocxCitationsParseResult; private parseSdtBlocks; /** * Tokenises the document XML into field-code events, replays them with a * depth counter to handle nested fields (EN.CITE wraps EN.CITE.DATA), and * dispatches each completed instruction to the correct handler. * * Per the DOCX spec (and documented in CITATIONS_IN_DOCS.md), all * <w:instrText> elements between `begin` and `separate` must be * concatenated before the instruction is interpreted. */ private parseFieldCodes; /** * Delegates to DocxNativeParser, passing `seenKeys` so that sources * already imported via other field types are not duplicated, and so that * newly imported keys are recorded for future deduplication. */ private parseSourcesXml; } declare function parseDocxCitations(documentXml: string, options?: DocxCitationsParserOptions): DocxCitationsParseResult; /** * EndNote XML parser * Supports EndNote XML export format (both EndNote.dtd and RSXML.dtd variants) * as well as EndNote Cite While You Write format */ interface ErrorObject$4 { type: string; field?: string; value?: unknown; entry?: string; } interface EndNoteParseResult { entries: Record<number, EntryObject>; errors: ErrorObject$4[]; warnings: ErrorObject$4[]; } interface EndNoteStyle { "#text"?: string; color?: string; face?: string; font?: string; size?: string; } type EndNoteStyledValue = string | { "#text"?: string; style?: EndNoteStyle | EndNoteStyle[]; } | EndNoteStyle; interface EndNoteAuthor { "#text"?: string; style?: EndNoteStyle | EndNoteStyle[]; "corp-name"?: string; "first-name"?: string; initials?: string; "last-name"?: string; "middle-initial"?: string; role?: string; salutation?: string; suffix?: string; title?: string; } interface EndNoteDate { "#text"?: string; style?: EndNoteStyle | EndNoteStyle[]; day?: string; julian?: string; month?: string; year?: string; } interface EndNoteUrl { "#text"?: string; style?: EndNoteStyle | EndNoteStyle[]; "has-ut"?: "yes" | "no"; "ppv-app"?: string; "ppv-ref"?: "yes" | "no"; "ppv-ut"?: string; } interface EndNoteUrlGroup { url?: EndNoteUrl | EndNoteUrl[]; } interface EndNoteUrls { "web-urls"?: EndNoteUrlGroup; "pdf-urls"?: EndNoteUrlGroup; "text-urls"?: EndNoteUrlGroup; "related-urls"?: EndNoteUrlGroup; "image-urls"?: EndNoteUrlGroup; } interface EndNoteRecord { database?: string | { "#text"?: string; name?: string; path?: string; }; "source-app"?: string | { "#text"?: string; name?: string; version?: string; }; "rec-number"?: string | number; "foreign-keys"?: { key?: { "#text"?: string; app?: string; "db-id"?: string; timestamp?: string; } | Array<{ "#text"?: string; app?: string; "db-id"?: string; timestamp?: string; }>; }; "ref-type"?: string | { "#text"?: string; name?: string; }; contributors?: { authors?: { author?: EndNoteAuthor | EndNoteAuthor[]; }; "secondary-authors"?: { author?: EndNoteAuthor | EndNoteAuthor[]; }; "tertiary-authors"?: { author?: EndNoteAuthor | EndNoteAuthor[]; }; "subsidiary-authors"?: { author?: EndNoteAuthor | EndNoteAuthor[]; }; "translated-authors"?: { author?: EndNoteAuthor | EndNoteAuthor[]; }; editors?: { editor?: EndNoteAuth