UNPKG

biblatex-csl-converter

Version:

Bibliography format converter: BibLaTeX, BibTeX, CSL-JSON, RIS, ENW, EndNote XML, Citavi, DOCX citations, ODT citations — parse, convert, and export with round-trip fidelity

1,535 lines (1,346 loc) 66.8 kB
/** * Citavi JSON format parser * * Supports the JSON format exported by Citavi (SwissAcademic.Citavi), * including the "WordPlaceholder" citation format used in docx exports * and direct project-export arrays. * * For the native XML project format (.ctv5 / .ctv6) see citavi-xml.ts. * * Field semantics are derived from the official per-type documentation at: * https://www1.citavi.com/sub/manual-citaviweb/en/fields_in_citavi.html * * Every Citavi reference type shares the same underlying JSON field names; * only the *meaning* and *display label* of each field changes per type. * This parser maps those semantic differences to the appropriate internal * BibLaTeX/CSL field names. */ import { BibTypes, type EntryObject, type NameDictObject, type NodeArray, type RangeArray, type TextNodeObject, } from "../const" import { lookupLangid, makeEntryKey } from "./tools" // ─── Citavi reference type → internal BibType mapping ─────────────────────── const CitaviTypeMap: Record<string, string> = { // ── Periodical articles ────────────────────────────────────────────────── JournalArticle: "article-journal", NewspaperArticle: "article-newspaper", SpecialIssue: "periodical", // ── Books and book-like items ──────────────────────────────────────────── Book: "book", BookEdited: "collection", CollectedWorks: "collection", AudioBook: "book", // ── Contributions / chapters ───────────────────────────────────────────── Contribution: "incollection", ContributionInLegalCommentary: "incollection", // ── Conference materials ───────────────────────────────────────────────── ConferenceProceedings: "proceedings", // ── Reports / gray literature ──────────────────────────────────────────── UnpublishedWork: "report", PressRelease: "report", NewsAgencyReport: "report", // ── Theses / manuscripts ───────────────────────────────────────────────── Thesis: "thesis", Manuscript: "unpublished", // ── Online resources ───────────────────────────────────────────────────── InternetDocument: "online", // ── Legal materials ────────────────────────────────────────────────────── LegalCommentary: "book", // treated as a reference book ContributionInLegalCommentary_parent: "book", // parent type alias (unused externally) CourtDecision: "legal_case", StatuteOrRegulation: "legislation", // ── Standards and patents ──────────────────────────────────────────────── Standard: "standard", Patent: "patent", // ── Audio / visual ─────────────────────────────────────────────────────── Movie: "video", Broadcast: "video", AudioOrVideoDocument: "video", RadioPlay: "audio", MusicAlbum: "audio", MusicTrack: "audio", // ── Software ───────────────────────────────────────────────────────────── ComputerProgram: "software", // ── Maps ───────────────────────────────────────────────────────────────── Map: "map", // ── Personal / interview ───────────────────────────────────────────────── PersonalCommunication: "personal_communication", InterviewMaterial: "interview", // ── Presentations / lectures ───────────────────────────────────────────── Lecture: "misc", // ── Archive / file ─────────────────────────────────────────────────────── ArchiveMaterial: "misc", File: "misc", // ── Catch-all ──────────────────────────────────────────────────────────── Unknown: "misc", } // ─── Per-type semantic overrides ───────────────────────────────────────────── // // Citavi reuses the same JSON field names for every reference type but assigns // different display labels (and therefore meanings) per type. For example, // `Authors` means "Director" for a Movie but "Cartographer" for a Map. We // capture only the cases where the semantic mapping diverges from the default // (Authors → author, Editors → editor, etc.) so we can map to the right // internal role fields. // // Possible target roles: "author" | "editor" | "translator" | // "editora" (secondary contributors) | "holder" (patent assignee) | // "institution" (organisations acting as author-like entity) // // null means "ignore this field for this type" interface RoleOverride { Authors?: string | null Editors?: string | null Collaborators?: string | null Organizations?: string | null } const TypeRoleOverrides: Record<string, RoleOverride> = { // Movie: Authors = Director, Editors = Studio/Producer, Collaborators = Lead actors Movie: { Authors: "author", // Director Editors: "editor", // Studio / Producer Collaborators: "editora", // Secondary contributors }, // Broadcast: same pattern as Movie Broadcast: { Authors: "author", // Director Editors: "editor", // Editor (added in Citavi 6) Collaborators: "editora", Organizations: "editora", // Station }, // AudioOrVideoDocument: Authors = Director, Collaborators = Editors (of the work) AudioOrVideoDocument: { Authors: "author", // Director Editors: "editor", Collaborators: "editora", }, // AudioBook: Collaborators = Speaker, Editors = Director (Citavi 6) AudioBook: { Authors: "author", Editors: "author", // Director Collaborators: "editora", // Speaker }, // RadioPlay: Editors = Director (Citavi 6), Organizations = Station RadioPlay: { Authors: "author", Editors: "author", // Director Collaborators: "editora", Organizations: "editora", }, // MusicTrack: Authors = Composer, Collaborators = Artist/Performer, Editors = Director MusicTrack: { Authors: "author", // Composer → author Editors: "author", // Director Collaborators: "editora", // Artist / Performer Organizations: "editora", // Orchestra }, // MusicAlbum: same as MusicTrack at the album level MusicAlbum: { Authors: "author", Editors: "author", // Director Collaborators: "editora", Organizations: "editora", }, // Patent: Authors = Inventor, Editors = Assignee → holder Patent: { Authors: "author", // Inventor Editors: "holder", // Assignee Collaborators: null, }, // Thesis: Collaborators = Academic advisor (→ editora), Organizations = Academic institution Thesis: { Authors: "author", Collaborators: "editora", // Academic advisor Organizations: "institution", }, // Lecture / Presentation: Authors = Speaker, Collaborators = Host Lecture: { Authors: "author", // Speaker Collaborators: "editora", // Host Organizations: "editora", }, // InterviewMaterial: Authors = Interviewer, Collaborators = Interviewee InterviewMaterial: { Authors: "author", // Interviewer Collaborators: "editora", // Interviewee }, // PersonalCommunication: Authors = Sender, Collaborators = Recipient PersonalCommunication: { Authors: "author", // Sender Collaborators: "editora", // Recipient }, // ConferenceProceedings: Editors = Publisher/Editor, no Authors ConferenceProceedings: { Authors: null, Editors: "editor", Collaborators: "editora", Organizations: "editora", }, // SpecialIssue: Editors = Guest editor, no Authors SpecialIssue: { Authors: null, Editors: "editor", Collaborators: null, }, // BookEdited / CollectedWorks: Editors = Publisher role → editor of volume BookEdited: { Authors: "author", Editors: "editor", Collaborators: "editora", }, CollectedWorks: { Authors: "author", Editors: "editor", Collaborators: "editora", }, // LegalCommentary: Authors = "Founded by" (original author), Editors = Author (current) LegalCommentary: { Authors: "bookauthor", // "Founded by" — original author of the commentary Editors: "author", // Current Author Collaborators: null, }, // ContributionInLegalCommentary: Authors = Editor (of the section), no Editors ContributionInLegalCommentary: { Authors: "author", // Editor of the commented section Editors: null, Collaborators: null, }, // CourtDecision: no personal authors; Organizations = Court CourtDecision: { Authors: null, Editors: null, Collaborators: null, Organizations: "institution", }, // StatuteOrRegulation: no personal authors; Organizations = Legislature/Authority StatuteOrRegulation: { Authors: null, Editors: null, Collaborators: null, Organizations: "institution", }, // Standard: no personal authors; Organizations = issuing body Standard: { Authors: null, Editors: null, Collaborators: null, Organizations: "institution", }, // ArchiveMaterial: Collaborators = Recipient (Citavi 6), Organizations = Archive ArchiveMaterial: { Authors: "author", Collaborators: "editora", // Recipient Organizations: "institution", // Archive }, // File: Collaborators = Addressee, Organizations = Organization File: { Authors: "author", Collaborators: "editora", // Addressee Organizations: "editora", }, // InternetDocument: Editors = Publisher (role), Organizations = Organization InternetDocument: { Authors: "author", Editors: "editor", Organizations: "editora", }, } // ─── Per-type semantic overrides for non-name fields ───────────────────────── // // Some Citavi fields carry different meanings per type. We capture overrides // for the structured fields that affect internal field mapping. interface FieldOverride { // Which internal field should Citavi's `Number` map to? Number?: string | null // Which internal field should `NumberOfVolumes` map to? NumberOfVolumes?: string | null // Which internal field should `Volume` map to? Volume?: string | null // Which internal field should `SeriesTitle` map to? SeriesTitle?: string | null // For types where Subtitle carries a special meaning, map it here instead // of appending it to the title. null = discard. SubtitleField?: string | null // TitleSupplement semantic override TitleSupplementField?: string | null // SpecificField1–7 overrides: map slot → internal field name (or null to skip) SpecificField1?: string | null SpecificField2?: string | null SpecificField3?: string | null SpecificField4?: string | null SpecificField5?: string | null SpecificField6?: string | null SpecificField7?: string | null } const TypeFieldOverrides: Record<string, FieldOverride> = { // JournalArticle // Number = issue number → issue // NumberOfVolumes = article number → number (eid) // Date2 = "Online since" (handled separately) // SpecificField1 = Database → note JournalArticle: { Number: "issue", NumberOfVolumes: "eid", SpecificField1: "note", // Database }, // NewspaperArticle // Number = Edition → number // Periodical = Newspaper → journaltitle (handled in processPeriodical) NewspaperArticle: { Number: "number", }, // SpecialIssue // Number = Number → number SpecialIssue: { Number: "number", }, // Book – nothing special beyond defaults // BookEdited / CollectedWorks BookEdited: { NumberOfVolumes: "volumes", }, CollectedWorks: { NumberOfVolumes: "volumes", }, // ConferenceProceedings // SpecificField1 = Location of conference → venue // SpecificField4 = Event date → eventdate // SpecificField7 = Conference name → eventtitle ConferenceProceedings: { NumberOfVolumes: "volumes", SpecificField1: "venue", SpecificField4: "eventdate", SpecificField7: "eventtitle", }, // Thesis // TitleSupplement = Type of thesis → type // Organizations = Academic institution → institution // SpecificField1 = Institute → note Thesis: { TitleSupplementField: "type", SpecificField1: "note", // Institute }, // Map // Number = Scale → note (no direct internal field for scale) Map: { Number: "note", // Scale }, // Patent // Number = Patent number → number // Volume = Bibliographic IPC → volume (IPC classification) // Subtitle = Type → titleaddon // TitleSupplement = Claims → abstract (closest match) // SpecificField1 = Issue country → location // SpecificField2 = Applicant → note // SpecificField3 = Application year → origdate (partial) // SpecificField4 = Application country → origlocation // SpecificField5 = Application number → eprint // SpecificField6 = Priority date → note2 (appended to note) // SpecificField7 = Patent family → note Patent: { Number: "number", SubtitleField: "titleaddon", // Subtitle = Type TitleSupplementField: "abstract", // Claims SpecificField1: "location", // Issue country SpecificField2: "note", // Applicant SpecificField3: "origdate", // Application year SpecificField4: "origlocation", // Application country SpecificField5: "eprint", // Application number }, // Movie // SpecificField1 = Country → location // SpecificField2 = Length → pagetotal (runtime proxy) // Date = Release date, Date2 = Broadcast date (handled separately) Movie: { SpecificField1: "location", // Country of production SpecificField2: "pagetotal", // Length/runtime }, // Broadcast (Radio or TV) // Volume = Episode // SeriesTitle = Series title Broadcast: { Volume: "number", // Episode number SeriesTitle: "series", SpecificField2: "pagetotal", // Length }, // RadioPlay // NumberOfVolumes = Episode (Citavi 6) // SeriesTitle = Series title RadioPlay: { NumberOfVolumes: "number", // Episode SeriesTitle: "series", }, // AudioBook // SpecificField2 = Length AudioBook: { SpecificField2: "pagetotal", }, // AudioOrVideoDocument // Volume = Volume (Citavi 6) // SeriesTitle = Series title AudioOrVideoDocument: { SeriesTitle: "series", }, // MusicTrack // Number = Number → number // ParentReference = parent album (handled in processParentReference) // SpecificField2 = Length MusicTrack: { Number: "number", SpecificField2: "pagetotal", }, // Software // SpecificField1 = License type → note // SpecificField2 = License number → eprint // SpecificField3 = Operating system → note (appended) // SpecificField4 = Version → version // TitleSupplement = Technical details → titleaddon Software: { TitleSupplementField: "titleaddon", SpecificField4: "version", }, // InternetDocument // Editors = Publisher → editor (but actually the publisher role) // SeriesTitle = Series title InternetDocument: { SeriesTitle: "series", }, // Lecture / Presentation // SeriesTitle = Conference or lecture series → series // SpecificField2 = Length (Citavi 6) Lecture: { SeriesTitle: "series", SpecificField2: "pagetotal", }, // Manuscript – nothing special // UnpublishedWork / Report // Number = Number → number // SeriesTitle = Series title → series UnpublishedWork: { Number: "number", SeriesTitle: "series", }, // PressRelease PressRelease: { SpecificField4: "note", // Embargo }, // NewsAgencyReport NewsAgencyReport: { SpecificField4: "note", // Embargo }, // ArchiveMaterial // Number = Call number → library // Volume = Volume ArchiveMaterial: { Number: "library", // Call number SpecificField1: "note", // Archive location (Citavi 6) }, // File // Number = Case number → number // NumberOfVolumes = File size → pagetotal // TitleSupplement = File type → titleaddon File: { Number: "number", NumberOfVolumes: "pagetotal", TitleSupplementField: "titleaddon", }, // CourtDecision // Number = Issue number → number // Periodical = Source of bibliographic information → journaltitle // SpecificField1 = Paragraph numbers → note // SpecificField2 = Case number → number (override) // SpecificField3 = Type of decision → type // SpecificField4 = ECLI → eprint CourtDecision: { Number: "issue", SpecificField1: "note", // Paragraph numbers SpecificField2: "number", // Case number SpecificField3: "type", // Type of decision SpecificField4: "eprint", // ECLI }, // StatuteOrRegulation // Number = Issue number → number // Subtitle = Abbreviation → shorthand StatuteOrRegulation: { Number: "number", SubtitleField: "shorthand", // Abbreviation }, // LegalCommentary // SpecificField1 = Common abbreviation → shorthand // SeriesTitle = Series → series LegalCommentary: { SeriesTitle: "series", SpecificField1: "shorthand", }, // Standard // Number = Number → number // Volume = ICS Notation → volume // SpecificField2 = Standard type → type Standard: { Number: "number", SpecificField2: "type", }, // PersonalCommunication // TitleSupplement = Mode → howpublished PersonalCommunication: { TitleSupplementField: "howpublished", }, // InterviewMaterial // TitleSupplement = Title supplement // (no override needed for non-default fields) // ContributionInLegalCommentary // Volume = Additions → volume // Date2 = Condition → note ContributionInLegalCommentary: { SpecificField2: null, // not mapped }, } // ─── TypeScript interfaces for Citavi JSON ─────────────────────────────────── export interface CitaviPerson { FirstName?: string LastName?: string MiddleName?: string /** Used for institutional/corporate names when no personal name is present */ Name?: string Sex?: number Id?: string [key: string]: unknown } export interface CitaviPublisher { Name?: string Place?: string [key: string]: unknown } export interface CitaviPeriodical { Name?: string StandardAbbreviation?: string UserAbbreviation1?: string /** Electronic ISSN */ Eissn?: string /** Print ISSN */ Issn?: string [key: string]: unknown } export interface CitaviLocation { Address?: { UriString?: string OriginalString?: string LinkedResourceType?: number [key: string]: unknown } LocationType?: number [key: string]: unknown } export interface CitaviKeyword { Name?: string [key: string]: unknown } export interface CitaviReference { // Core identifiers Id?: string BibTeXKey?: string CitationKey?: string ReferenceType?: string // Titles Title?: string /** In the JSON the field is sometimes `Title1` (older exports) */ Title1?: string Subtitle?: string TitleSupplement?: string ShortTitle?: string ParallelTitle?: string // People Authors?: CitaviPerson[] Editors?: CitaviPerson[] Translators?: CitaviPerson[] Collaborators?: CitaviPerson[] OthersInvolved?: CitaviPerson[] Organizations?: CitaviPerson[] // Publisher / place Publishers?: CitaviPublisher[] PlaceOfPublication?: string // Periodical info Periodical?: CitaviPeriodical // Dates Year?: string YearResolved?: string Date?: string Date2?: string AccessDate?: string // Numbering Volume?: string Number?: string Issue?: string Edition?: string NumberOfVolumes?: string SeriesTitle?: string OnlineAddress?: string // Page information PageRange?: string PageCount?: string // Identifiers Isbn?: string Issn?: string Doi?: string // Other fields Abstract?: string Keywords?: CitaviKeyword[] Language?: string LanguageCode?: string Locations?: CitaviLocation[] StorageMedium?: string /** * Records where the bibliographic metadata was imported from * (e.g. `"CrossRef"`, `"PubMed"`). */ SourceOfBibliographicInformation?: string // Citavi SpecificField slots (meaning varies per ReferenceType) SpecificField1?: string SpecificField2?: string SpecificField3?: string SpecificField4?: string SpecificField5?: string SpecificField6?: string SpecificField7?: string // Nested / parent reference (for Contributions, MusicTracks, etc.) ParentReference?: CitaviReference [key: string]: unknown } /** * A single entry in a Citavi `WordPlaceholder.Entries` array. * Each entry links a bibliographic reference to citation-specific decorations * (page locator, prefix/suffix, etc.). */ export interface CitaviEntry { /** UUID identifying this placeholder entry instance */ Id?: string /** UUID of the linked bibliographic reference */ ReferenceId?: string /** Embedded bibliographic reference (present in WordPlaceholder format) */ Reference?: CitaviReference /** * UUID of a Citavi knowledge item (quotation, thought, or summary) that * this citation entry is associated with. Present when the citation was * inserted from the Citavi knowledge panel rather than directly from the * reference list. */ AssociateWithKnowledgeItemId?: string /** * Integer indicating the type of quotation/knowledge item this citation * represents within Citavi. Observed value: `1`. The full enum mapping * is not yet known. */ QuotationType?: number /** * Text to prepend to the formatted citation (e.g. `"Vgl. "`, `"See "`). * Absent when no prefix is set. Citavi formats this according to the * active citation style (e.g. auto-capitalising the first word in footnotes). */ Prefix?: string /** * Text to append to the formatted citation (e.g. `", etc."`). * Absent when no suffix is set. * Existence confirmed by the Citavi manual; not yet observed in real files. */ Suffix?: string /** * Citation-specific page/locator range. Contains `OriginalString` with * the full range text plus typed `StartPage`/`EndPage` sub-objects. */ PageRange?: { /** Full range string, e.g. `"100-105"` or `"Col. 12-14"`. Absent when no pages are set. */ OriginalString?: string /** * What the locator numbers represent; determines the prefix the * citation style renders (e.g. `p.`, `Col.`, `Nr.`, `§`). * * Known values (integer-to-type mapping inferred from the Citavi * manual's prose — exact values NOT confirmed by observed data): * 0 = Pages (default, confirmed observed) * 1 = Columns (inferred) * 2 = Section numbers (inferred) * 3 = Margin numbers (inferred) * 4 = Other / free-form (inferred) */ NumberingType?: number /** * Whether Arabic or Roman numerals are used. * * Known values (inferred from the Citavi manual; not confirmed by * observed data beyond 0): * 0 = Arabic (default, confirmed observed) * 1 = Roman (inferred) */ NumeralSystem?: number StartPage?: { OriginalString?: string PrettyString?: string /** Numeric value; absent when the page is not fully numeric */ Number?: number IsFullyNumeric?: boolean NumberingType?: number NumeralSystem?: number } EndPage?: { OriginalString?: string PrettyString?: string Number?: number IsFullyNumeric?: boolean NumberingType?: number NumeralSystem?: number } [key: string]: unknown } | null /** * Controls bibliography inclusion for this citation entry. * * Known string values (confirmed by Citavi manual; not yet observed in real files): * absent / default = reference appears in both citation and bibliography * `"/bibonly"` = reference appears only in the bibliography, not in-text * `"/nobib"` = reference appears only in-text, not in the bibliography */ BibliographyEntry?: string /** * Overrides which citation-style rule set (formatting variant) is used for * this entry — e.g. to use the bibliography layout for a single in-text * citation. Serialised form not yet observed in real files. */ RuleSet?: unknown /** * Selects among the citation style's optional formatting variants (1, 2, * or 3) for this entry. Commonly used to suppress or force "ibid."-style * short forms. Serialised form not yet observed in real files. */ FormatOption?: unknown /** * Overrides where this citation is physically inserted (in-text vs. * footnote), independently of what the citation style normally dictates. * Serialised form not yet observed in real files. */ InsertAs?: unknown /** * When `true`, the `NumberingType` for the locator is inherited from the * document default rather than set per-citation. */ UseNumberingTypeOfParentDocument?: boolean /** * When `true`, the citation style's own default prefix overrides any * custom `Prefix` string on this entry. When `false` and `Prefix` is * absent, no prefix is added. */ UseStandardPrefix?: boolean [key: string]: unknown } /** * Top-level Citavi JSON payload. Three shapes are supported: * 1. WordPlaceholder – `{ Entries: [...] }` * 2. Project export – `{ References: [...] }` * 3. Plain array – `CitaviReference[]` */ export type CitaviInput = | { Entries?: CitaviEntry[] References?: CitaviReference[] [key: string]: unknown } | CitaviReference[] interface ErrorObject { type: string field?: string field_name?: string value?: unknown entry?: string } // ─── Parser ────────────────────────────────────────────────────────────────── export class CitaviParser { input: CitaviInput entries: EntryObject[] errors: ErrorObject[] warnings: ErrorObject[] /** Track processed Citavi Reference IDs to avoid duplicate imports */ private seenIds: Set<string> = new Set() private usedKeys: Set<string> = new Set() /** * Maps each Citavi Reference `Id` (UUID) to the final `entry_key` that was * assigned to it after normalisation. Populated during `parse()` so that * callers (e.g. `DocxCitationsParser`) can resolve a `ReferenceId` from a * `CitaviEntry` back to the actual key used in the returned `BibDB`. */ referenceIdToEntryKey: Map<string, string> = new Map() constructor(input: CitaviInput) { this.input = input this.entries = [] this.errors = [] this.warnings = [] } parse(): Record<number, EntryObject> { const references = this.collectReferences() for (let i = 0; i < references.length; i++) { const ref = references[i] const id = ref.Id || String(i + 1) if (this.seenIds.has(id)) { this.warnings.push({ type: "duplicate_entry", entry: id, }) continue } this.seenIds.add(id) const converted = this.convertReference(ref, i + 1) if (converted) { this.entries.push(converted) } } const bibDB: Record<number, EntryObject> = {} this.entries.forEach((entry, index) => { bibDB[index + 1] = entry }) return bibDB } // ─── Input flattening ──────────────────────────────────────────────────── /** * Walk the input structure and return a flat ordered list of all * CitaviReference objects, with parent references appearing before their * children so they receive lower bibDB indices. */ private collectReferences(): CitaviReference[] { const refs: CitaviReference[] = [] const addRef = (ref: CitaviReference) => { if (!ref) return // Collect parent first so it gets a lower index if (ref.ParentReference) { addRef(ref.ParentReference) } refs.push(ref) } if (Array.isArray(this.input)) { for (const ref of this.input as CitaviReference[]) { addRef(ref) } } else { const obj = this.input as { Entries?: CitaviEntry[] References?: CitaviReference[] } if (obj.Entries && Array.isArray(obj.Entries)) { // WordPlaceholder format for (const entry of obj.Entries) { if (entry.Reference) { addRef(entry.Reference) } } } else if (obj.References && Array.isArray(obj.References)) { // Project-export format for (const ref of obj.References) { addRef(ref) } } } return refs } // ─── Reference conversion ──────────────────────────────────────────────── private convertReference( ref: CitaviReference, index: number, ): EntryObject | false { const entryId = ref.Id || String(index) if (!ref.ReferenceType) { this.warnings.push({ type: "missing_reference_type", entry: entryId, }) } const refType = ref.ReferenceType || "Unknown" // Warn when refType has no entry in our mapping table (falls back to misc) if (refType !== "Unknown" && !CitaviTypeMap[refType]) { this.warnings.push({ type: "unknown_type", value: refType, entry: entryId, }) } const bibType = CitaviTypeMap[refType] || "misc" // Error when the type map itself points to an unregistered internal type // (this would be a bug in CitaviTypeMap) if (!BibTypes[bibType]) { this.errors.push({ type: "unknown_type", value: refType, entry: entryId, }) return false } const effectiveBibType = bibType const fields: Record<string, unknown> = {} const roleOverrides = TypeRoleOverrides[refType] || {} const fieldOverrides = TypeFieldOverrides[refType] || {} // ── Titles ────────────────────────────────────────────────────────── this.processTitle(ref, fields, refType, fieldOverrides, entryId) // ── People ────────────────────────────────────────────────────────── this.processNames(ref, fields, roleOverrides) // ── Date ──────────────────────────────────────────────────────────── this.processDate(ref, fields, refType, entryId) // ── Access date ───────────────────────────────────────────────────── if (ref.AccessDate) { const iso = ref.AccessDate.split("T")[0] if (iso) fields.urldate = iso } // ── Publisher / Place ─────────────────────────────────────────────── this.processPublisher(ref, fields) // ── Periodical ────────────────────────────────────────────────────── this.processPeriodical(ref, fields) // ── Volume / Issue / Number / Edition / Series ─────────────────────── this.processNumbering(ref, fields, refType, fieldOverrides) // ── Pages ─────────────────────────────────────────────────────────── if (ref.PageRange) { const parsed = this.parsePageRange(ref.PageRange) if (parsed) { fields.pages = parsed } else { this.warnings.push({ type: "unparsed_page_range", field_name: "pages", value: ref.PageRange, entry: entryId, }) } } // ── Page total / number of pages ──────────────────────────────────── if (ref.PageCount) { fields.pagetotal = this.convertRichText(ref.PageCount) } // ── Identifiers ───────────────────────────────────────────────────── this.processIdentifiers(ref, fields, entryId) // ── Online address / URL ───────────────────────────────────────────── // OnlineAddress is a dedicated top-level field (shown in table view) if (ref.OnlineAddress) { fields.url = ref.OnlineAddress.trim() } else { // Fall back to scanning the Locations array this.processLocations(ref, fields) } // ── Abstract ──────────────────────────────────────────────────────── if (ref.Abstract) { fields.abstract = this.convertRichText(ref.Abstract) } // ── Keywords ──────────────────────────────────────────────────────── this.processKeywords(ref, fields) // ── Language ──────────────────────────────────────────────────────── this.processLanguage(ref, fields, entryId) // ── SpecificField slots ────────────────────────────────────────────── this.processSpecificFields(ref, fields, fieldOverrides, entryId) // ── Parent reference ───────────────────────────────────────────────── if (ref.ParentReference) { this.processParentReference(ref.ParentReference, fields) } // ── Entry key ─────────────────────────────────────────────────────── const entryKey = this.buildEntryKey(ref, index, entryId) // Record the UUID → entry_key mapping so callers can resolve metadata. if (ref.Id) { this.referenceIdToEntryKey.set(ref.Id, entryKey) } return { entry_key: entryKey, bib_type: effectiveBibType, fields, } } // ─── Field processors ──────────────────────────────────────────────────── private processTitle( ref: CitaviReference, fields: Record<string, unknown>, _refType: string, fo: FieldOverride, entryId: string, ) { // Older Citavi JSON exports use "Title1"; newer use "Title" const rawTitle = ref.Title || ref.Title1 if (!rawTitle) { this.warnings.push({ type: "missing_title", entry: entryId, }) return } const subtitle = ref.Subtitle const fo_subtitle = fo.SubtitleField // Default behaviour: append Subtitle to Title with ": " // Override: map Subtitle to a different field (or null = discard) let mainTitle = rawTitle if (subtitle) { if (fo_subtitle === undefined) { // Default: colon-join mainTitle = `${rawTitle}: ${subtitle}` } else if (fo_subtitle !== null) { // Specific field target fields[fo_subtitle] = this.convertRichText(subtitle) } // fo_subtitle === null → discard subtitle } fields.title = this.convertRichText(mainTitle) // Short title — Citavi prepends "Author Year – " which we strip if (ref.ShortTitle) { const cleaned = this.cleanShortTitle(ref.ShortTitle) if (cleaned) fields.shorttitle = this.convertRichText(cleaned) } // TitleSupplement const supp = ref.TitleSupplement if (supp) { const suppTarget = fo.TitleSupplementField if (suppTarget === undefined) { // Default: titleaddon fields.titleaddon = this.convertRichText(supp) } else if (suppTarget !== null) { fields[suppTarget] = this.convertRichText(supp) } } } private processNames( ref: CitaviReference, fields: Record<string, unknown>, ro: RoleOverride, ) { // Helper: get role for a slot, falling back to a default const role = ( slot: keyof RoleOverride, defaultRole: string | null, ): string | null => { if (slot in ro) { return ro[slot] !== null && ro[slot] !== undefined ? (ro[slot] as string) : defaultRole } return defaultRole } const authorsRole = role("Authors", "author") const editorsRole = role("Editors", "editor") const collaboRole = role("Collaborators", "editora") const orgRole = role("Organizations", null) if (ref.Authors && ref.Authors.length > 0 && authorsRole) { this.addToNameField( fields, authorsRole, this.convertPersonList(ref.Authors), ) } if (ref.Editors && ref.Editors.length > 0 && editorsRole) { this.addToNameField( fields, editorsRole, this.convertPersonList(ref.Editors), ) } if (ref.Collaborators && ref.Collaborators.length > 0 && collaboRole) { this.addToNameField( fields, collaboRole, this.convertPersonList(ref.Collaborators), ) } if (ref.Organizations && ref.Organizations.length > 0 && orgRole) { // Organizations are always institutional names this.addToNameField( fields, orgRole, this.convertPersonList(ref.Organizations, true), ) } // Translators are always translators, regardless of type if (ref.Translators && ref.Translators.length > 0) { this.addToNameField( fields, "translator", this.convertPersonList(ref.Translators), ) } } /** Append `names` to an existing name-list field, or create it. */ private addToNameField( fields: Record<string, unknown>, fieldName: string, names: NameDictObject[], ) { if (names.length === 0) return if (fields[fieldName]) { // Merge into existing list fields[fieldName] = (fields[fieldName] as NameDictObject[]).concat( names, ) } else { fields[fieldName] = names } } private processDate( ref: CitaviReference, fields: Record<string, unknown>, refType: string, entryId: string, ) { // For most types: // Date = primary date (release, publication, …) // Date2 = secondary date (broadcast, online-since, revised, …) // // Special cases: // Patent: Date = Application date, Date2 = Publication date // → we prefer Date2 (the publication date) as the primary date // Statute: Date2 = Revised → store as origdate const preferDate2AsPrimary = refType === "Patent" const primaryRaw = preferDate2AsPrimary ? ref.Date2 || ref.Date : ref.Date || ref.Date2 // Resolve year from YearResolved / Year when no ISO date is available const yearFallback = ref.YearResolved || ref.Year if (primaryRaw) { const iso = this.parseISODate(primaryRaw) if (iso) { fields.date = iso } else if (/^\d{4}$/.test(primaryRaw.trim())) { fields.date = primaryRaw.trim() } else if (yearFallback && /^\d{4}$/.test(yearFallback.trim())) { this.warnings.push({ type: "unparsed_date", field_name: "date", value: primaryRaw, entry: entryId, }) fields.date = yearFallback.trim() } else { this.warnings.push({ type: "unparsed_date", field_name: "date", value: primaryRaw, entry: entryId, }) } } else if (yearFallback && /^\d{4}$/.test(yearFallback.trim())) { fields.date = yearFallback.trim() } // Secondary date handling if (!preferDate2AsPrimary && ref.Date2) { // For StatuteOrRegulation, Date2 = Revised → origdate if (refType === "StatuteOrRegulation") { const iso = this.parseISODate(ref.Date2) if (iso) fields.origdate = iso } // For JournalArticle, Date2 = "Online since" — we can store as note or ignore // (no standard biblatex field for this, omit silently) } } private processPublisher( ref: CitaviReference, fields: Record<string, unknown>, ) { // Collect publisher names from Publishers array if (ref.Publishers && ref.Publishers.length > 0) { const names = ref.Publishers.map((p) => p.Name || "").filter( Boolean, ) if (names.length > 0) { fields.publisher = [this.convertRichText(names.join(" / "))] } // Some Publisher objects also carry a Place const places = ref.Publishers.map((p) => p.Place || "").filter( Boolean, ) if (places.length > 0 && !ref.PlaceOfPublication) { fields.location = [this.convertRichText(places.join(" / "))] } } // Explicit PlaceOfPublication always wins over Places from Publishers if (ref.PlaceOfPublication) { fields.location = [this.convertRichText(ref.PlaceOfPublication)] } // For Thesis, Organizations = Academic institution → institution field // (handled already in processNames as "institution", but we also want // PlaceOfPublication → location which is "Location of institution" for Thesis) } private processPeriodical( ref: CitaviReference, fields: Record<string, unknown>, ) { if (!ref.Periodical) return const name = ref.Periodical.Name if (name) { fields.journaltitle = this.convertRichText(name) } const abbr = ref.Periodical.StandardAbbreviation || ref.Periodical.UserAbbreviation1 if (abbr) { fields.shortjournal = this.convertRichText(abbr) } } private processNumbering( ref: CitaviReference, fields: Record<string, unknown>, _refType: string, fo: FieldOverride, ) { // Volume const volTarget = fo.Volume ?? "volume" if (ref.Volume && volTarget) { fields[volTarget] = this.convertRichText(ref.Volume) } // Issue (the Citavi `Issue` field — not the same as `Number`) if (ref.Issue) { fields.issue = this.convertRichText(ref.Issue) } // Number (meaning varies per type; default is "number") const numTarget = fo.Number ?? "number" if (ref.Number && numTarget) { // Only set if not already set by Issue for journalArticle if (numTarget === "issue" && fields.issue) { // Already populated from the dedicated Issue field; skip } else { fields[numTarget] = this.convertRichText(ref.Number) } } // NumberOfVolumes (meaning varies per type; default is "volumes") const novTarget = fo.NumberOfVolumes ?? "volumes" if (ref.NumberOfVolumes && novTarget) { fields[novTarget] = this.convertRichText(ref.NumberOfVolumes) } // Edition if (ref.Edition) { fields.edition = this.convertRichText(ref.Edition) } // SeriesTitle (meaning varies per type; default is "series") const serTarget = fo.SeriesTitle ?? "series" if (ref.SeriesTitle && serTarget) { fields[serTarget] = this.convertRichText(ref.SeriesTitle) } } private processIdentifiers( ref: CitaviReference, fields: Record<string, unknown>, entryId: string, ) { if (ref.Doi) { const doi = ref.Doi.trim() // A DOI should never contain spaces; warn if it looks malformed if (doi.includes(" ")) { this.warnings.push({ type: "suspect_doi", field_name: "doi", value: doi, entry: entryId, }) } // doi is f_verbatim → store as a plain string, not a NodeArray fields.doi = doi } if (ref.Isbn) { fields.isbn = this.convertRichText(ref.Isbn) } if (ref.Issn) { fields.issn = this.convertRichText(ref.Issn) } } private processLocations( ref: CitaviReference, fields: Record<string, unknown>, ) { if (!ref.Locations || ref.Locations.length === 0) return for (const loc of ref.Locations) { const uri = loc.Address?.UriString || loc.Address?.OriginalString if (uri && /^https?:\/\//i.test(uri)) { fields.url = uri return } } } private processKeywords( ref: CitaviReference, fields: Record<string, unknown>, ) { if (!ref.Keywords || ref.Keywords.length === 0) return const kws = ref.Keywords.map((k) => k.Name || "").filter(Boolean) if (kws.length > 0) { fields.keywords = kws } } private processLanguage( ref: CitaviReference, fields: Record<string, unknown>, entryId: string, ) { // Prefer LanguageCode (BCP-47) over Language (full name string) const code = ref.LanguageCode || ref.Language if (code) { const trimmed = code.trim() if (!trimmed) { this.warnings.push({ type: "empty_language", field_name: "langid", entry: entryId, }) return } // Map the raw code/name to one of the internal langid option keys // recognised by BibFieldTypes.langid (e.g. "german", "usenglish"). const langidKey = lookupLangid(trimmed) if (langidKey) { fields.langid = langidKey } else { this.warnings.push({ type: "unknown_language", field_name: "langid", value: trimmed, entry: entryId, }) } } } private processSpecificFields( ref: CitaviReference, fields: Record<string, unknown>, fo: FieldOverride, entryId: string, ) { const slots: Array<[keyof CitaviReference, keyof FieldOverride]> = [ ["SpecificField1", "SpecificField1"], ["SpecificField2", "SpecificField2"], ["SpecificField3", "SpecificField3"], ["SpecificField4", "SpecificField4"], ["SpecificField5", "SpecificField5"], ["SpecificField6", "SpecificField6"], ["SpecificField7", "SpecificField7"], ] for (const [refKey, foKey] of slots) { const value = ref[refKey] as string | undefined if (!value) continue const target = fo[foKey] as string | null | undefined if (target === null) continue // explicitly ignored if (target === undefined) { // The slot has a value but no mapping has been defined for this // reference type — the data would be silently lost, so warn. this.warnings.push({ type: "unmapped_specific_field", field_name: String(refKey), value, entry: entryId, }) continue } // Append rather than overwrite if the field already has content if (fields[target]) { // For note, append with semicolon if (target === "note") { const existing = fields[target] as NodeArray const firstNode = existing[0] as TextNodeObject | undefined const existingText = firstNode?.text ?? "" fields[target] = this.convertRichText( `${existingText}; ${value}`, ) } // For other fields, don't overwrite } else { fields[target] = this.convertRichText(value) } } // StorageMedium → howpublished for types that don't over