biblatex-csl-converter
Version:
Bibliography format converter: BibLaTeX, BibTeX, CSL-JSON, RIS, ENW, EndNote XML, Citavi, DOCX citations, ODT citations — parse, convert, and export with round-trip fidelity
1,440 lines (1,422 loc) • 76.1 kB
TypeScript
interface UnknownFieldsObject {
groups?: NodeObject[];
[key: string]: NodeObject[] | RangeArray[] | NodeArray[] | Array<NodeArray | string> | NameDictObject[] | string | undefined;
}
interface MarkObject {
type: string;
}
interface OtherNodeObject {
type: string;
marks?: MarkObject[];
attrs?: Record<string, unknown>;
}
interface TextNodeObject {
type: "text";
text: string;
marks?: MarkObject[];
attrs?: Record<string, unknown>;
}
type NodeObject = OtherNodeObject | TextNodeObject;
type NodeArray = NodeObject[];
interface EntryLocation {
start: number;
end: number;
}
interface EntryObject {
entry_key: string;
incomplete?: boolean;
bib_type: string;
location?: EntryLocation;
raw_text?: string;
fields: Record<string, unknown>;
unexpected_fields?: Record<string, unknown>;
unknown_fields?: UnknownFieldsObject;
}
type NameDictObject = {
literal?: NodeArray;
family?: NodeArray;
given?: NodeArray;
prefix?: NodeArray;
suffix?: NodeArray;
useprefix?: boolean;
};
type GroupObject = {
name: string;
references: string[];
groups: GroupObject[];
};
type RangeArray = [NodeArray, NodeArray] | [NodeArray];
interface LangidOptions$1 {
[key: string]: {
csl: string;
biblatex: string;
};
}
interface BibFieldType {
type: string;
biblatex: string;
csl?: string | Record<string, string>;
options?: string[] | LangidOptions$1;
strict?: boolean;
}
/** A list of field types of Bibligraphy DB with lookup by field name. */
declare const BibFieldTypes: Record<string, BibFieldType>;
interface BibType {
order: number;
biblatex: string;
csl: string;
required: string[];
eitheror: string[];
optional: string[];
"biblatex-subtype"?: string;
}
/** A list of all bib types and their fields. */
declare const BibTypes: Record<string, BibType>;
type SimpleDateArray = Array<string | number>;
type DateArray = readonly (string | number | SimpleDateArray)[];
interface EDTFOutputObject {
type: string;
valid: boolean;
values: DateArray;
cleanedString: string;
uncertain: boolean;
approximate: boolean;
}
declare function edtfParse(dateString: string): EDTFOutputObject;
interface TypeInheritance {
source: string[];
target: string[];
fields: FieldInheritance[];
}
interface FieldInheritance {
source: string;
target: string;
}
type StringStartTuplet = [string, () => void];
type WarningObject$1 = {
type: string;
group_type: string;
};
declare class GroupParser {
groups: GroupObject[];
groupType: string;
warnings: WarningObject$1[];
entries: EntryObject[];
stringStarts: StringStartTuplet[];
pos: number;
fileDirectory: string;
input: string;
constructor(entries: EntryObject[]);
checkString(input: string): void;
readGroupInfo(groupType: string): void;
readFileDirectory(): void;
readJabref3(): void;
clearGroups(groups: GroupObject[]): void;
readJabref4(): void;
find(name: string, groups: GroupObject[] | undefined): GroupObject | false;
}
/** Parses files in BibTeX/BibLaTeX format
*/
interface ConfigObject$2 {
/**
* - processUnknown (object [specifying content type for specific unknown]):
*
* Processes fields with names that are unknown, adding them to an `unknown_fields`
* object to each entry.
*
* example:
* > a = new BibLatexParser(..., {processUnknown: true})
* > a.output
* {
* "0:": {
* ...
* unknown_fields: {
* ...
* }
* }
* }
*
* > a = new BibLatexParser(..., {processUnknown: {commentator: 'l_name'}})
* > a.output
* {
* "0:": {
* ...
* unknown_fields: {
* commentator: [
* {
* given: ...,
* family: ...
* }
* ]
* ...
* }
* }
* }
*/
processUnknown?: boolean | Record<string, string>;
/**
* Processes fields with names that are known, but are not expected for the given bibtype,
* adding them to an `unexpected_fields` object to each entry.
*/
processUnexpected?: boolean;
processInvalidURIs?: boolean;
processComments?: boolean;
/**
* Include source location to an `location` object on each entry
*
* example:
* > a = new BibLatexParser(..., {includeLocation: true})
* > a.output
* {
* "0:": {
* ...
* location: {
* start: 1,
* end: 42
* }
* }
* }
*/
includeLocation?: boolean;
/**
* Include source text to an `raw_text` property on each entry
*
* example:
* > a = new BibLatexParser(..., {includeRawText: true})
* > a.output
* {
* "0:": {
* ...
* raw_text: '@article{...}'
* }
* }
*/
includeRawText?: boolean;
crossRefInheritance?: TypeInheritance[];
includeUnusedNocase?: boolean;
}
interface ErrorObject$9 {
type: string;
expected?: string;
found?: string;
line?: number;
key?: string;
entry?: string;
field?: string;
field_name?: string;
alias_of?: string;
alias_of_value?: unknown;
value?: string[] | string;
variable?: string;
type_name?: string;
}
interface MatchOptionsObject {
skipWhitespace: string | boolean;
}
interface BiblatexParseResult {
entries: {
[key: number]: EntryObject;
};
errors: ErrorObject$9[];
warnings: ErrorObject$9[];
comments: string[];
strings: Record<string, string>;
jabref: {
groups: GroupObject[] | false;
meta: Record<string, string>;
};
}
interface BibDB {
[key: number]: EntryObject;
}
declare class BibLatexParser {
input: string;
config: ConfigObject$2;
pos: number;
startPosition: number;
endPosition: number;
entries: EntryObject[];
currentKey: string | false;
currentEntry?: EntryObject;
currentType: string;
currentRawFields?: Record<string, unknown>;
bibDB: BibDB;
errors: ErrorObject$9[];
warnings: ErrorObject$9[];
months: {
JAN: string;
FEB: string;
MAR: string;
APR: string;
MAY: string;
JUN: string;
JUL: string;
AUG: string;
SEP: string;
OCT: string;
NOV: string;
DEC: string;
};
strings: Record<string, string>;
comments: string[];
groupParser: GroupParser;
groups: GroupObject[] | false;
jabrefMeta: Record<string, string>;
jabref?: {
groups: GroupObject[] | false;
meta: number;
};
crossrefs: Record<string, string>;
constructor(input: string, config?: ConfigObject$2);
isWhitespace(s: string): boolean;
error(data: ErrorObject$9): void;
warning(data: ErrorObject$9): void;
match(s: string, options?: MatchOptionsObject): void;
tryMatch(s: string): boolean;
skipWhitespace(): void;
skipToNext(): boolean;
valueBraces(): string;
valueQuotes(): string;
singleValue(): string;
value(asis?: boolean): string;
key(optional?: boolean): string;
keyEqualsValue(asis?: boolean): [string, string] | false;
keyValueList(): void;
processFields(): void;
_reformKey(keyString: string, fKey: string): string | NodeArray;
_checkURI(uriString: string): boolean;
_reformURI(uriString: string): string;
_reformNameList(nameString: string): NameDictObject[];
_reformRange(rangeString: string): RangeArray[];
_reformLiteral(theValue: string, cpMode?: boolean): NodeArray;
bibType(): string;
createNewEntry(): void;
directive(): string | null;
string(): void;
preamble(): void;
replaceTeXChars(): void;
stepThroughBibtex(): void;
stepThroughBibtexAsync(): Promise<null>;
parseNext(): void;
parseComment(braceless: boolean): void;
createBibDB(): void;
cleanDB(): void;
_resolveCrossRef(key: string, parentKey: string): void;
_resoveAllCrossRefs(): void;
parsed(): BiblatexParseResult;
parse(): BiblatexParseResult;
parseAsync(): Promise<BiblatexParseResult>;
}
declare function parse(input: string, config?: ConfigObject$2): BiblatexParseResult;
declare function parseAsync(input: string, config?: ConfigObject$2): Promise<BiblatexParseResult>;
type ConfigObject$1 = {
traditionalNames?: boolean;
exportUnexpectedFields?: boolean;
};
type BibObject = {
type: string;
key: string;
values?: Record<string, unknown>;
};
type WarningObject = {
type: string;
variable: string;
};
declare class BibLatexExporter {
bibDB: BibDB;
pks: string[];
config: ConfigObject$1;
warnings: WarningObject[];
bibtexStr: string;
bibtexArray: BibObject[];
constructor(bibDB: BibDB, pks?: string[] | false, config?: ConfigObject$1);
parse(): string;
_reformKey(theValue: string | unknown, fKey: string): string;
_reformRange(theValue: unknown): string;
_reformInterval(theValue: unknown): string;
_reformName(theValue: unknown): string;
_protectNamePart(namePart: string): string;
_escapeTeX(theValue: unknown): string;
_reformText(theValue: unknown): string;
_getBibtexString(biblist: BibObject[]): string;
}
type ConfigObject = {
escapeText?: boolean;
useEntryKeys?: boolean;
language?: string;
};
type ErrorObject$8 = {
type: string;
variable: string;
};
type CSLDateObject = {
"date-parts"?: [number[]] | [number[], number[]];
circa?: boolean;
};
type CSLNameObject = {
literal?: string;
given?: string;
family?: string;
suffix?: string;
"non-dropping-particle"?: string;
"dropping-particle"?: string;
};
interface CSLEntry$1 {
id?: string;
[key: string]: unknown;
}
type CSLOutput = Record<string, CSLEntry$1>;
declare class CSLExporter {
bibDB: BibDB;
pks: string[];
config: ConfigObject;
cslDB: Record<string, CSLEntry$1>;
errors: ErrorObject$8[];
constructor(bibDB: BibDB, pks?: string[] | false, config?: ConfigObject);
parse(): CSLOutput;
/** Converts one BibDB entry to CSL format.
* @function getCSLEntry
* @param id The id identifying the bibliography entry.
*/
getCSLEntry(id: string): CSLEntry$1;
_reformKey(theValue: string | unknown, fKey: string): string;
_reformRange(theValue: unknown): string;
_reformInterval(theValue: unknown): string;
_reformInteger(theValue: unknown): string | number;
_escapeText(theValue: unknown): string;
_reformTitle(theValue: unknown): string;
_reformText(theValue: unknown): string;
_reformDate(dateStr: string): false | CSLDateObject;
_reformName(theNames: NameDictObject[]): CSLNameObject[];
}
/**
* Type definitions for the i18n module.
*
* Kept in a separate file so that `locales.ts` (which is auto-generated) can
* import the `Locale` interface without creating a circular dependency with
* `index.ts` (which in turn imports the locale objects from `locales.ts`).
*/
/** Human-readable labels for every supported BibLaTeX / CSL field key. */
type FieldTitles = Record<string, string>;
/**
* Help / hint text for selected fields. Only fields that benefit from extra
* explanation have an entry here (date format, name-prefix convention, …).
*/
type FieldHelp = Record<string, string>;
/** Human-readable labels for every supported reference-type key. */
type TypeTitles = Record<string, string>;
/**
* Per-reference-type overrides for field labels.
*
* Outer key = reference-type key (e.g. `"video"`).
* Inner key = field key (e.g. `"author"`).
* Value = label to use instead of the generic one in `fieldTitles`.
*/
type FieldTitlesByType = Record<string, Record<string, string>>;
/**
* Human-readable labels for every value valid in the BibLaTeX `langid` field,
* including BibTeX-level aliases (e.g. `pinyin`, `american`, `english`).
*/
type LangidOptions = Record<string, string>;
/**
* Human-readable labels for option values used in fields other than `langid`:
* `editortype`, `pagination`, `pubstate`, and the `type` sub-field.
*/
type OtherOptions = Record<string, string>;
/** A complete locale object — one per language JSON file. */
interface Locale {
fieldTitles: FieldTitles;
fieldHelp: FieldHelp;
typeTitles: TypeTitles;
fieldTitlesByType: FieldTitlesByType;
langidOptions: LangidOptions;
otherOptions: OtherOptions;
}
/**
* i18n module for biblatex-csl-converter
*
* Provides human-readable labels for reference types, field names, field help
* text, and option values in multiple languages.
*
* ## Structure of a locale object
* ```json
* {
* "fieldTitles": { "<fieldKey>": "<label>", … },
* "fieldHelp": { "<fieldKey>": "<help text>", … },
* "typeTitles": { "<typeKey>": "<label>", … },
* "fieldTitlesByType": { "<typeKey>": { "<fieldKey>": "<label>" }, … },
* "langidOptions": { "<langidKey>": "<label>", … },
* "otherOptions": { "<optionKey>": "<label>", … }
* }
* ```
*
* ## Per-type field overrides (`fieldTitlesByType`)
* Some field names carry a different human meaning depending on the reference
* type. For example, the `author` field of a `video` entry is conventionally
* labelled "Director(s)" rather than "Author(s)". Use `getFieldTitle()` which
* checks `fieldTitlesByType[type][field]` first and falls back to
* `fieldTitles[field]`.
*
* ## Option split
* `langidOptions` covers every value valid for the BibLaTeX `langid` field
* (including BibTeX-level aliases such as `pinyin`, `american`, `english`).
* `otherOptions` covers editortype values, pagination values, pubstate values,
* and `type` sub-field values (mathesis, phdthesis, techreport, …).
*
* ## Locale data
* The locale data lives in `src/i18n/locales.ts`, which is auto-generated by:
* npm run compile_i18n
*
* ## Supported languages
* ar, bg, cs, de, en, es, fr, it, ja, ko, nl, pl, pt-BR, pt-PT, ru, sv, tr, zh
*/
/**
* All built-in locales keyed by IETF language tag.
*
* Consumers that need a language not listed here can supply their own `Locale`
* object — every public helper accepts a `Locale` directly.
*/
declare const locales: Readonly<Record<string, Locale>>;
/**
* Return the `Locale` for *lang*, falling back to English when not available.
*
* Lookup order:
* 1. Exact tag (e.g. `"pt-BR"`)
* 2. Base subtag (e.g. `"pt"` from `"pt-BR"`)
* 3. English fallback
*
* @example
* getLocale("de") // → locales.de
* getLocale("pt-BR") // → locales["pt-BR"]
* getLocale("zh") // → locales.en (fallback)
*/
declare function getLocale(lang: string): Locale;
/**
* Return the human-readable label for *fieldKey* in the context of *typeKey*,
* using *locale* for the translation.
*
* Checks `locale.fieldTitlesByType[typeKey][fieldKey]` first, then falls back
* to `locale.fieldTitles[fieldKey]`, and finally to the raw key itself.
*
* @example
* getFieldTitle(locales.en, "book", "author") // → "Author(s)"
* getFieldTitle(locales.en, "video", "author") // → "Director(s)"
* getFieldTitle(locales.de, "book", "author") // → "Autor(en)"
*/
declare function getFieldTitle(locale: Locale, typeKey: string, fieldKey: string): string;
/**
* Return the human-readable label for *typeKey* in *locale*, falling back to
* the raw key if not found.
*
* @example
* getTypeTitle(locales.fr, "article-journal") // → "Article de revue"
*/
declare function getTypeTitle(locale: Locale, typeKey: string): string;
/**
* Return the help/hint text for *fieldKey* in *locale*, or `undefined` when
* no help text is defined for that field.
*
* @example
* getFieldHelp(locales.en, "date") // → "In <em>Extended Date Time Format</em>…"
* getFieldHelp(locales.en, "title") // → undefined
*/
declare function getFieldHelp(locale: Locale, fieldKey: string): string | undefined;
/**
* Return the human-readable label for a `langid` field value in *locale*,
* falling back to the raw key if not found.
*
* @example
* getLangidTitle(locales.de, "french") // → "Französisch"
* getLangidTitle(locales.en, "brportuguese") // → "Brazilian Portuguese"
*/
declare function getLangidTitle(locale: Locale, langidKey: string): string;
/**
* Return the human-readable label for a non-language option value in *locale*
* (i.e. an `editortype`, `pagination`, `pubstate`, or `type` sub-field value),
* falling back to the raw key if not found.
*
* @example
* getOtherOptionTitle(locales.de, "phdthesis") // → "Ph.D. These"
* getOtherOptionTitle(locales.en, "inpreparation") // → "In preparation"
*/
declare function getOtherOptionTitle(locale: Locale, optionKey: string): string;
/**
* Citavi JSON format parser
*
* Supports the JSON format exported by Citavi (SwissAcademic.Citavi),
* including the "WordPlaceholder" citation format used in docx exports
* and direct project-export arrays.
*
* For the native XML project format (.ctv5 / .ctv6) see citavi-xml.ts.
*
* Field semantics are derived from the official per-type documentation at:
* https://www1.citavi.com/sub/manual-citaviweb/en/fields_in_citavi.html
*
* Every Citavi reference type shares the same underlying JSON field names;
* only the *meaning* and *display label* of each field changes per type.
* This parser maps those semantic differences to the appropriate internal
* BibLaTeX/CSL field names.
*/
interface CitaviPerson {
FirstName?: string;
LastName?: string;
MiddleName?: string;
/** Used for institutional/corporate names when no personal name is present */
Name?: string;
Sex?: number;
Id?: string;
[key: string]: unknown;
}
interface CitaviPublisher {
Name?: string;
Place?: string;
[key: string]: unknown;
}
interface CitaviPeriodical {
Name?: string;
StandardAbbreviation?: string;
UserAbbreviation1?: string;
/** Electronic ISSN */
Eissn?: string;
/** Print ISSN */
Issn?: string;
[key: string]: unknown;
}
interface CitaviLocation {
Address?: {
UriString?: string;
OriginalString?: string;
LinkedResourceType?: number;
[key: string]: unknown;
};
LocationType?: number;
[key: string]: unknown;
}
interface CitaviKeyword {
Name?: string;
[key: string]: unknown;
}
interface CitaviReference {
Id?: string;
BibTeXKey?: string;
CitationKey?: string;
ReferenceType?: string;
Title?: string;
/** In the JSON the field is sometimes `Title1` (older exports) */
Title1?: string;
Subtitle?: string;
TitleSupplement?: string;
ShortTitle?: string;
ParallelTitle?: string;
Authors?: CitaviPerson[];
Editors?: CitaviPerson[];
Translators?: CitaviPerson[];
Collaborators?: CitaviPerson[];
OthersInvolved?: CitaviPerson[];
Organizations?: CitaviPerson[];
Publishers?: CitaviPublisher[];
PlaceOfPublication?: string;
Periodical?: CitaviPeriodical;
Year?: string;
YearResolved?: string;
Date?: string;
Date2?: string;
AccessDate?: string;
Volume?: string;
Number?: string;
Issue?: string;
Edition?: string;
NumberOfVolumes?: string;
SeriesTitle?: string;
OnlineAddress?: string;
PageRange?: string;
PageCount?: string;
Isbn?: string;
Issn?: string;
Doi?: string;
Abstract?: string;
Keywords?: CitaviKeyword[];
Language?: string;
LanguageCode?: string;
Locations?: CitaviLocation[];
StorageMedium?: string;
/**
* Records where the bibliographic metadata was imported from
* (e.g. `"CrossRef"`, `"PubMed"`).
*/
SourceOfBibliographicInformation?: string;
SpecificField1?: string;
SpecificField2?: string;
SpecificField3?: string;
SpecificField4?: string;
SpecificField5?: string;
SpecificField6?: string;
SpecificField7?: string;
ParentReference?: CitaviReference;
[key: string]: unknown;
}
/**
* A single entry in a Citavi `WordPlaceholder.Entries` array.
* Each entry links a bibliographic reference to citation-specific decorations
* (page locator, prefix/suffix, etc.).
*/
interface CitaviEntry {
/** UUID identifying this placeholder entry instance */
Id?: string;
/** UUID of the linked bibliographic reference */
ReferenceId?: string;
/** Embedded bibliographic reference (present in WordPlaceholder format) */
Reference?: CitaviReference;
/**
* UUID of a Citavi knowledge item (quotation, thought, or summary) that
* this citation entry is associated with. Present when the citation was
* inserted from the Citavi knowledge panel rather than directly from the
* reference list.
*/
AssociateWithKnowledgeItemId?: string;
/**
* Integer indicating the type of quotation/knowledge item this citation
* represents within Citavi. Observed value: `1`. The full enum mapping
* is not yet known.
*/
QuotationType?: number;
/**
* Text to prepend to the formatted citation (e.g. `"Vgl. "`, `"See "`).
* Absent when no prefix is set. Citavi formats this according to the
* active citation style (e.g. auto-capitalising the first word in footnotes).
*/
Prefix?: string;
/**
* Text to append to the formatted citation (e.g. `", etc."`).
* Absent when no suffix is set.
* Existence confirmed by the Citavi manual; not yet observed in real files.
*/
Suffix?: string;
/**
* Citation-specific page/locator range. Contains `OriginalString` with
* the full range text plus typed `StartPage`/`EndPage` sub-objects.
*/
PageRange?: {
/** Full range string, e.g. `"100-105"` or `"Col. 12-14"`. Absent when no pages are set. */
OriginalString?: string;
/**
* What the locator numbers represent; determines the prefix the
* citation style renders (e.g. `p.`, `Col.`, `Nr.`, `§`).
*
* Known values (integer-to-type mapping inferred from the Citavi
* manual's prose — exact values NOT confirmed by observed data):
* 0 = Pages (default, confirmed observed)
* 1 = Columns (inferred)
* 2 = Section numbers (inferred)
* 3 = Margin numbers (inferred)
* 4 = Other / free-form (inferred)
*/
NumberingType?: number;
/**
* Whether Arabic or Roman numerals are used.
*
* Known values (inferred from the Citavi manual; not confirmed by
* observed data beyond 0):
* 0 = Arabic (default, confirmed observed)
* 1 = Roman (inferred)
*/
NumeralSystem?: number;
StartPage?: {
OriginalString?: string;
PrettyString?: string;
/** Numeric value; absent when the page is not fully numeric */
Number?: number;
IsFullyNumeric?: boolean;
NumberingType?: number;
NumeralSystem?: number;
};
EndPage?: {
OriginalString?: string;
PrettyString?: string;
Number?: number;
IsFullyNumeric?: boolean;
NumberingType?: number;
NumeralSystem?: number;
};
[key: string]: unknown;
} | null;
/**
* Controls bibliography inclusion for this citation entry.
*
* Known string values (confirmed by Citavi manual; not yet observed in real files):
* absent / default = reference appears in both citation and bibliography
* `"/bibonly"` = reference appears only in the bibliography, not in-text
* `"/nobib"` = reference appears only in-text, not in the bibliography
*/
BibliographyEntry?: string;
/**
* Overrides which citation-style rule set (formatting variant) is used for
* this entry — e.g. to use the bibliography layout for a single in-text
* citation. Serialised form not yet observed in real files.
*/
RuleSet?: unknown;
/**
* Selects among the citation style's optional formatting variants (1, 2,
* or 3) for this entry. Commonly used to suppress or force "ibid."-style
* short forms. Serialised form not yet observed in real files.
*/
FormatOption?: unknown;
/**
* Overrides where this citation is physically inserted (in-text vs.
* footnote), independently of what the citation style normally dictates.
* Serialised form not yet observed in real files.
*/
InsertAs?: unknown;
/**
* When `true`, the `NumberingType` for the locator is inherited from the
* document default rather than set per-citation.
*/
UseNumberingTypeOfParentDocument?: boolean;
/**
* When `true`, the citation style's own default prefix overrides any
* custom `Prefix` string on this entry. When `false` and `Prefix` is
* absent, no prefix is added.
*/
UseStandardPrefix?: boolean;
[key: string]: unknown;
}
/**
* Top-level Citavi JSON payload. Three shapes are supported:
* 1. WordPlaceholder – `{ Entries: [...] }`
* 2. Project export – `{ References: [...] }`
* 3. Plain array – `CitaviReference[]`
*/
type CitaviInput = {
Entries?: CitaviEntry[];
References?: CitaviReference[];
[key: string]: unknown;
} | CitaviReference[];
interface ErrorObject$7 {
type: string;
field?: string;
field_name?: string;
value?: unknown;
entry?: string;
}
declare class CitaviParser {
input: CitaviInput;
entries: EntryObject[];
errors: ErrorObject$7[];
warnings: ErrorObject$7[];
/** Track processed Citavi Reference IDs to avoid duplicate imports */
private seenIds;
private usedKeys;
/**
* Maps each Citavi Reference `Id` (UUID) to the final `entry_key` that was
* assigned to it after normalisation. Populated during `parse()` so that
* callers (e.g. `DocxCitationsParser`) can resolve a `ReferenceId` from a
* `CitaviEntry` back to the actual key used in the returned `BibDB`.
*/
referenceIdToEntryKey: Map<string, string>;
constructor(input: CitaviInput);
parse(): Record<number, EntryObject>;
/**
* Walk the input structure and return a flat ordered list of all
* CitaviReference objects, with parent references appearing before their
* children so they receive lower bibDB indices.
*/
private collectReferences;
private convertReference;
private processTitle;
private processNames;
/** Append `names` to an existing name-list field, or create it. */
private addToNameField;
private processDate;
private processPublisher;
private processPeriodical;
private processNumbering;
private processIdentifiers;
private processLocations;
private processKeywords;
private processLanguage;
private processSpecificFields;
/**
* Pull relevant fields out of a ParentReference into the child entry.
* This handles Contribution-in-Book, MusicTrack-in-Album, etc.
*/
private processParentReference;
private convertPersonList;
private convertPerson;
/**
* Parse a Citavi date string into an ISO-8601 date string.
* Handles ISO datetime (2007-12-01T00:00:00) and plain date (2007-12-01).
* Returns null if the input cannot be parsed.
*/
private parseISODate;
/**
* Citavi stores page ranges either as plain strings or in a small XML
* dialect embedded in the JSON string:
*
* <sp><n>2</n>...<os>2</os>...</sp>
* <ep><n>6</n>...<os>6</os>...</ep>
* <os>2-6</os>
*
* We extract the *last* `<os>` value, which holds the full human-readable
* range (e.g. "2-6"). If no XML is present we use the raw string directly.
*/
private parsePageRange;
private convertRange;
private buildEntryKey;
/**
* Citavi's ShortTitle field often contains an auto-generated prefix like
* "Burton 2013 – Sweeney Todd" or "Manning (Ed.) 2016 – Food and supply…".
* Strip everything up to and including the first en-dash / em-dash / hyphen
* separator so we retain only the actual short title fragment.
*/
private cleanShortTitle;
private convertRichText;
}
declare function parseCitavi(input: CitaviInput): Record<number, EntryObject>;
/**
* Citavi XML project format parser (.ctv5 / .ctv6)
*
* The XML format stores Persons, Keywords, Publishers, Periodicals and
* SeriesTitles in separate lookup tables and links them to references via
* "OnetoN" relation elements whose text content is:
*
* <referenceId>;<linkedId1>;<linkedId2>…
*
* This parser resolves all those links, reconstructs CitaviReference objects,
* attaches parent references for Contribution-type entries, and then hands the
* result to CitaviParser for the actual field-level conversion.
*
* Accepts any DOM Document object (browser Document, or one produced by a
* library such as @xmldom/xmldom), or a plain XML string that will be parsed
* with the global DOMParser when available (browser / Deno / Node 22+).
*
* Only DOM Level 2 APIs are used (getElementsByTagName, getAttribute,
* childNodes, textContent) so the class is compatible with @xmldom/xmldom.
*/
interface XmlErrorObject {
type: string;
entry?: string;
value?: unknown;
}
declare class CitaviXmlParser {
private doc;
errors: XmlErrorObject[];
warnings: XmlErrorObject[];
constructor(input: Document | string);
parse(): Record<number, EntryObject>;
private buildPersonMap;
private buildKeywordMap;
private buildPublisherMap;
private buildPeriodicalMap;
private buildSeriesTitleMap;
/**
* Parse a OnetoN relation section into a Map<referenceId, linkedId[]>.
*
* Each <OnetoN> text node has the form:
* <referenceId>;<linkedId1>;<linkedId2>…
*/
private buildOneToNMap;
/**
* Build a Map<childReferenceId, parentReferenceId> from ReferenceReferences.
*
* Each <OnetoN> text node has the form:
* <parentId>;<childId1>;<childId2>…
*/
private buildChildToParentMap;
/**
* Return the first *direct* child element with the given tag name, or null.
* Uses childNodes iteration rather than getElementsByTagName so that
* deeply-nested elements with the same tag name are not mistakenly matched.
*/
private firstChildEl;
/**
* Return the trimmed text content of the first direct child element with
* the given tag name, or null if no such child exists.
*/
private childText;
private resolvePersons;
}
declare function parseCitaviXml(input: Document | string): Record<number, EntryObject>;
interface CSLEntry {
id?: string;
type?: string;
[key: string]: unknown;
}
interface ErrorObject$6 {
type: string;
field?: string;
value?: unknown;
entry?: string;
}
declare class CSLParser {
input: Record<string, CSLEntry>;
entries: EntryObject[];
errors: ErrorObject$6[];
warnings: ErrorObject$6[];
private usedKeys;
/**
* Maps each raw CSL `id` string to the final `entry_key` assigned after
* normalisation. Populated during `parse()` so that callers (e.g.
* `DocxCitationsParser`, `OdtCitationsParser`) can resolve a raw CSL id
* back to the actual key used in the returned BibDB — even when the parser
* synthesised a lastname+year key that bears no resemblance to the original.
*/
rawIdToEntryKey: Map<string, string>;
constructor(input: Record<string, CSLEntry>);
parse(): Record<number, EntryObject>;
private convertEntry;
private getBibType;
private convertField;
private convertDate;
private convertNames;
private convertInteger;
private convertKey;
private convertRange;
private convertKeyList;
private convertLiteralList;
private convertTags;
private convertRichText;
}
declare function parseCSL(input: Record<string, CSLEntry>): Record<number, EntryObject>;
/**
* DOCX Citations importer
*
* Extracts bibliographic citations from the XML of a DOCX file's
* word/document.xml (or any XML fragment containing the same markup).
*
* Supported citation manager formats and how each is handled:
*
* - Word native `CITATION key \l locale` inline field +
* `customXml/item1.xml` sources (passed as
* `sourcesXml` option). Delegated to
* DocxNativeParser in docx-native.ts.
*
* - Zotero `ADDIN ZOTERO_ITEM CSL_CITATION {json}` inline
* field. The embedded CSL-JSON `citationItems`
* array is reshaped into a Record<string, CSLEntry>
* and fed to CSLParser.
*
* - Mendeley Cite v3 Current add-in: `<w:sdt>` with the citation JSON
* (current) base64-encoded in `w:tag w:val=
* "MENDELEY_CITATION_v3_{base64}"`. Decoded payload
* is CSL-JSON; handled identically to Zotero.
*
* - Mendeley Desktop Legacy add-in: `ADDIN CSL_CITATION {json}` inline
* (legacy) field. Same CSL-JSON shape; handled identically
* to Zotero.
*
* - EndNote `ADDIN EN.CITE <EndNote>…</EndNote>` — XML is
* either entity-escaped inline or base64-encoded in
* `<w:fldData>`. The `<record>` subtree is
* converted to an EndNoteRecord object and passed to
* EndNoteParser.
*
* - Citavi `<w:sdt>` wrapping `ADDIN CitaviPlaceholder
* {base64}`. The base64-decoded JSON contains a
* WordPlaceholder with `Entries[].ReferenceId`
* UUIDs. Two sub-cases:
*
* A. If the payload already embeds `Reference`
* objects (some Citavi export modes), they are
* passed directly to CitaviParser.
*
* B. In older or incomplete formats, only UUIDs may
* be present without embedded references. Such
* citations cannot be fully resolved and will
* generate warnings.
*
* Usage:
* const parser = new DocxCitationsParser(documentXml, {
* sourcesXml, // contents of customXml/item1.xml (Word-native)
* })
* const result = parser.parse()
* // result.entries → BibDB (Record<number, EntryObject>)
* // result.errors → ErrorObject[]
* // result.warnings → ErrorObject[]
*
* The `sourcesXml` option must be the contents of `customXml/item1.xml` from
* the DOCX ZIP when Word-native citations are present.
*
* Citavi citations embed complete bibliographic data directly in each citation
* field, so no external Citavi project file is required.
*/
interface DocxCitationsParseResult {
entries: Record<number, EntryObject>;
errors: ErrorObject$5[];
warnings: ErrorObject$5[];
}
/**
* Mutable accumulator passed to static extraction methods when processing
* multiple document elements in a single pass. All four fields are mutated
* in place as entries are discovered and keys are deduplicated.
*/
interface CitationAccumulator$1 {
entries: EntryObject[];
errors: ErrorObject$5[];
warnings: ErrorObject$5[];
seenKeys: Set<string>;
/**
* Persistent map from raw CSL `id` strings to the normalised `entry_key`
* values assigned by `CSLParser`. Accumulated across all citation elements
* processed with the same accumulator so that duplicate items (already in
* `seenKeys`) can still have their metadata resolved to the correct key.
*/
cslRawIdToEntryKey: Map<string, string>;
}
interface ErrorObject$5 {
type: string;
field?: string;
value?: unknown;
entry?: string;
}
/**
* Per-entry citation metadata, keyed by `entry_key`.
*
* This captures the cite-specific decorations that surround a bibliographic
* reference inside a single citation: page locators, textual prefixes /
* suffixes, and author-rendering flags. It is returned alongside the
* `entries` BibDB when `retrieveMetadata` is `true` on a static method call.
*
* Field availability by format:
*
* | Field | Zotero | Mendeley | EndNote | Citavi |
* |------------------|--------|----------|----------------------|-------------------------------------|
* | locator | ✅ | ✅ | ✅ (Pages) | ✅ (PageRange.OriginalString) |
* | label | ✅ | ✅ | – | ✅ (derived from PageRange.NumberingType; mapping inferred from manual) |
* | prefix | ✅ | ✅ | ✅ | ✅ |
* | suffix | ✅ | ✅ | ✅ | ✅ (confirmed by manual; not yet seen in real files) |
* | suppressAuthor | ✅ | ✅ | – | – |
* | authorOnly | ✅ | ✅ | – | – |
* | authorYear | – | – | ✅ (AuthorYear attr) | – |
* | bibliographyEntry| – | – | – | ✅ (confirmed by manual; not yet seen in real files) |
* | ruleSet | – | – | – | ✅ (confirmed by manual; serialised form unknown) |
* | formatOption | – | – | – | ✅ (confirmed by manual; serialised form unknown) |
* | insertAs | – | – | – | ✅ (confirmed by manual; serialised form unknown) |
*/
interface CitationItemMetadata$1 {
/** The `entry_key` of the corresponding entry in the returned `entries` BibDB. */
entry_key: string;
/**
* Pinpoint location within the cited work (page number, chapter, etc.).
* For CSL formats this is the raw `locator` string; for EndNote it is the
* `<Pages>` element text; for Citavi it is `PageRange.OriginalString`.
*/
locator?: string;
/**
* CSL locator type label (e.g. `"page"`, `"chapter"`, `"section"`).
* For CSL-based formats (Zotero, Mendeley) this is the raw `label` string
* from the citation item. For Citavi it is derived from `PageRange.NumberingType`:
* `0` (Pages) → `"page"`, `1` (Columns) → `"column"`,
* `2` (Section numbers) → `"section"`, `3` (Margin numbers) → `"note"`,
* `4` (Other / free-form) → `"custom"`.
* The integer-to-label mapping for Citavi is inferred from the Citavi manual
* and has not been confirmed against observed data beyond value `0`.
*/
label?: string;
/** Text to prepend to the formatted citation (e.g. `"see "`, `"cf. "`). */
prefix?: string;
/** Text to append to the formatted citation (e.g. `", etc."`). */
suffix?: string;
/**
* When `true`, author names are suppressed in the formatted output,
* leaving only the year (and locator) in parentheses: `(2020, p. 45)`.
* Only populated for CSL-based formats (Zotero, Mendeley).
*/
suppressAuthor?: boolean;
/**
* When `true`, only the author name is rendered with nothing else:
* `William T. Williams`.
* Only populated for CSL-based formats (Zotero, Mendeley).
*/
authorOnly?: boolean;
/**
* When `true`, the author name is rendered outside the parentheses while
* the year (and locator) remain inside: `William T. Williams (2020, p. 45)`.
* This reflects the `AuthorYear="1"` attribute on EndNote's `<Cite>` element.
* Only populated for EndNote citations.
*/
authorYear?: boolean;
/**
* Controls whether and where this reference appears in the bibliography.
* Only populated for Citavi citations (from `Entries[].BibliographyEntry`).
*
* Known values:
* `"/bibonly"` – reference appears only in the bibliography, not in-text
* `"/nobib"` – reference appears only in-text, not in the bibliography
*
* When absent the reference appears in both (default behaviour).
* Confirmed by the Citavi manual; not yet observed in real files.
*/
bibliographyEntry?: string;
/**
* Overrides which citation-style rule set (formatting variant) is used for
* this entry. Only populated for Citavi citations (from `Entries[].RuleSet`).
* Serialised form not yet observed in real files.
*/
ruleSet?: unknown;
/**
* Selects among the citation style's optional formatting variants for this
* entry (values 1, 2, or 3). Only populated for Citavi citations (from
* `Entries[].FormatOption`). Serialised form not yet observed in real files.
*/
formatOption?: unknown;
/**
* Overrides where the citation is physically inserted (in-text vs. footnote).
* Only populated for Citavi citations (from `Entries[].InsertAs`).
* Serialised form not yet observed in real files.
*/
insertAs?: unknown;
}
interface CitationResult$1 {
isCitation: boolean;
format?: string;
entries?: Record<number, EntryObject>;
errors?: ErrorObject$5[];
warnings?: ErrorObject$5[];
/**
* Per-entry citation metadata (locators, prefixes, suffixes, flags).
* Only populated when `retrieveMetadata` is `true` on the static method call.
*/
metadata?: CitationItemMetadata$1[];
}
interface BibliographyResult$1 {
isBibliography: boolean;
format?: string;
}
interface DocxCitationsParserOptions {
/**
* Contents of `customXml/item1.xml` from the DOCX ZIP, using the MS
* Office Bibliography XML namespace. Required to resolve Word-native and
* `CITATION` keys into full bibliographic data.
*/
sourcesXml?: string;
}
declare class DocxCitationsParser {
private documentXml;
private options;
entries: EntryObject[];
errors: ErrorObject$5[];
warnings: ErrorObject$5[];
/** Entry keys already added — prevents duplicates across multiple fields. */
private seenKeys;
/** Persistent raw CSL id → normalised entry_key map for the instance parse. */
private cslRawIdToEntryKey;
constructor(documentXml: string, options?: DocxCitationsParserOptions);
/**
* Check if an SDT block contains citation data (without full document parsing).
*
* @param sdtXml - XML string of a single <w:sdt>...</w:sdt> block
* @returns CitationCheckResult indicating whether it's a citation and its format
*/
static sdtCitation(sdtXml: string, retrieve?: boolean, retrieveMetadata?: boolean, acc?: CitationAccumulator$1): CitationResult$1;
/**
* Check or extract bibliography rendering region from an SDT block.
*
* @param sdtXml - XML string of a single <w:sdt>...</w:sdt> block
* @param retrieve - If true, extract data (currently returns empty as bibliographies have no importable data)
* @returns BibliographyResult indicating whether it's a bibliography
*/
static sdtBibliography(sdtXml: string): BibliographyResult$1;
/**
* Check or extract citation data from a field instruction.
*
* @param instrText - The concatenated instruction text from w:instrText elements
* @param retrieve - If true, extract and return full citation data
* @param fldData - Optional field data (for EndNote base64 payloads)
* @param options - Optional parser options (e.g., sourcesXml for Word native)
* @returns CitationResult with format and optionally entries/errors/warnings
*/
static fieldCitation(instrText: string, retrieve?: boolean, retrieveMetadata?: boolean, extractWordNative?: boolean, fldData?: string, options?: DocxCitationsParserOptions, acc?: CitationAccumulator$1): CitationResult$1;
/**
* Check or extract bibliography rendering region from a field instruction.
*
* @param instrText - The concatenated instruction text
* @param retrieve - If true, extract data (currently returns empty as bibliographies have no importable data)
* @returns BibliographyResult indicating whether it's a bibliography
*/
static fieldBibliography(instrText: string): BibliographyResult$1;
/**
* Extract CSL citation JSON data.
*/
private static extractCslJsonData;
/**
* Extract EndNote citation data.
*/
private static extractEndNoteData;
/**
* Extract Citavi citation data from base64-encoded WordPlaceholder JSON.
*
* Citavi embeds complete bibliographic data directly in each citation via
* `Reference` objects within the `Entries` array. This method checks for
* embedded references and converts them via CitaviParser. If no embedded
* references are found (only UUIDs), a warning is generated.
*/
private static extractCitaviData;
/**
* Extract Word native citation data.
*/
private static extractWordNativeData;
/**
* Parse EndNote XML payload.
*/
private static parseEndNoteXml;
/**
* Parse EndNote record XML.
*/
private static parseEndNoteRecordXml;
/**
* Parse contributors XML.
*/
private static parseContributorsXml;
/**
* Parse dates XML.
*/
private static parseDatesXml;
/**
* Parse URLs XML.
*/
private static parseUrlsXml;
/**
* Strip style tags and decode XML entities.
*/
private static stripStyleTagsStatic;
/**
* Unescape XML entities.
*/
private static unescapeXmlEntitiesStatic;
/**
* Decode base64.
*/
private static decodeBase64Static;
parse(): DocxCitationsParseResult;
private parseSdtBlocks;
/**
* Tokenises the document XML into field-code events, replays them with a
* depth counter to handle nested fields (EN.CITE wraps EN.CITE.DATA), and
* dispatches each completed instruction to the correct handler.
*
* Per the DOCX spec (and documented in CITATIONS_IN_DOCS.md), all
* <w:instrText> elements between `begin` and `separate` must be
* concatenated before the instruction is interpreted.
*/
private parseFieldCodes;
/**
* Delegates to DocxNativeParser, passing `seenKeys` so that sources
* already imported via other field types are not duplicated, and so that
* newly imported keys are recorded for future deduplication.
*/
private parseSourcesXml;
}
declare function parseDocxCitations(documentXml: string, options?: DocxCitationsParserOptions): DocxCitationsParseResult;
/**
* EndNote XML parser
* Supports EndNote XML export format (both EndNote.dtd and RSXML.dtd variants)
* as well as EndNote Cite While You Write format
*/
interface ErrorObject$4 {
type: string;
field?: string;
value?: unknown;
entry?: string;
}
interface EndNoteParseResult {
entries: Record<number, EntryObject>;
errors: ErrorObject$4[];
warnings: ErrorObject$4[];
}
interface EndNoteStyle {
"#text"?: string;
color?: string;
face?: string;
font?: string;
size?: string;
}
type EndNoteStyledValue = string | {
"#text"?: string;
style?: EndNoteStyle | EndNoteStyle[];
} | EndNoteStyle;
interface EndNoteAuthor {
"#text"?: string;
style?: EndNoteStyle | EndNoteStyle[];
"corp-name"?: string;
"first-name"?: string;
initials?: string;
"last-name"?: string;
"middle-initial"?: string;
role?: string;
salutation?: string;
suffix?: string;
title?: string;
}
interface EndNoteDate {
"#text"?: string;
style?: EndNoteStyle | EndNoteStyle[];
day?: string;
julian?: string;
month?: string;
year?: string;
}
interface EndNoteUrl {
"#text"?: string;
style?: EndNoteStyle | EndNoteStyle[];
"has-ut"?: "yes" | "no";
"ppv-app"?: string;
"ppv-ref"?: "yes" | "no";
"ppv-ut"?: string;
}
interface EndNoteUrlGroup {
url?: EndNoteUrl | EndNoteUrl[];
}
interface EndNoteUrls {
"web-urls"?: EndNoteUrlGroup;
"pdf-urls"?: EndNoteUrlGroup;
"text-urls"?: EndNoteUrlGroup;
"related-urls"?: EndNoteUrlGroup;
"image-urls"?: EndNoteUrlGroup;
}
interface EndNoteRecord {
database?: string | {
"#text"?: string;
name?: string;
path?: string;
};
"source-app"?: string | {
"#text"?: string;
name?: string;
version?: string;
};
"rec-number"?: string | number;
"foreign-keys"?: {
key?: {
"#text"?: string;
app?: string;
"db-id"?: string;
timestamp?: string;
} | Array<{
"#text"?: string;
app?: string;
"db-id"?: string;
timestamp?: string;
}>;
};
"ref-type"?: string | {
"#text"?: string;
name?: string;
};
contributors?: {
authors?: {
author?: EndNoteAuthor | EndNoteAuthor[];
};
"secondary-authors"?: {
author?: EndNoteAuthor | EndNoteAuthor[];
};
"tertiary-authors"?: {
author?: EndNoteAuthor | EndNoteAuthor[];
};
"subsidiary-authors"?: {
author?: EndNoteAuthor | EndNoteAuthor[];
};
"translated-authors"?: {
author?: EndNoteAuthor | EndNoteAuthor[];
};
editors?: {
editor?: EndNoteAuth