onix-parser
Version:
Parse ONIX 3.0 XML files and extract structured product data for publishing and digital books
283 lines (245 loc) • 9.37 kB
JavaScript
const fs = require('fs')
const XMLMapping = require('xml-mapping')
const { header } = require('./Composites/header')
const { product } = require('./Composites/product')
const onix = onixPath => {
return new Promise((resolve, reject) => {
try {
// Input validation
if (!onixPath || typeof onixPath !== 'string') {
return resolve({
status: false,
message: ['Invalid file path provided']
})
}
// Check if file exists
if (!fs.existsSync(onixPath)) {
return resolve({
status: false,
message: [`File not found: ${onixPath}`]
})
}
// Read file content
const onixContentRaw = fs.readFileSync(onixPath, 'utf8')
if (!onixContentRaw || onixContentRaw.trim() === '') {
return resolve({
status: false,
message: ['File is empty or contains no content']
})
}
// Parse XML to JSON
let onixJSON
try {
onixJSON = XMLMapping.load(onixContentRaw, {
nested: true,
arrays: [
'/ONIXmessage/product',
'/ONIXmessage/product/collateraldetail/textcontent',
'/ONIXmessage/product/collateraldetail/supportingresource',
'/ONIXmessage/product/collateraldetail/supportingresource/resourceversion/resourceversionfeature',
'/ONIXmessage/product/contentdetail/contentitem',
'/ONIXmessage/product/contentdetail/contentitem/supportingresource',
'/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourcefeature',
'/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourceversion',
'/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourceversion/resourceversionfeature',
'/ONIXmessage/product/descriptivedetail/contributor',
'/ONIXmessage/product/descriptivedetail/collection',
'/ONIXmessage/product/descriptivedetail/collection/titledetail',
'/ONIXmessage/product/descriptivedetail/collection/titledetail/titleelement',
'/ONIXmessage/product/descriptivedetail/extent',
'/ONIXmessage/product/descriptivedetail/language',
'/ONIXmessage/product/descriptivedetail/audiencerange',
'/ONIXmessage/product/descriptivedetail/audiencerange/b075',
'/ONIXmessage/product/descriptivedetail/audiencerange/b076',
'/ONIXmessage/product/descriptivedetail/subject',
'/ONIXmessage/product/descriptivedetail/b333', // ProductFormDetail
'/ONIXmessage/product/productidentifier',
'/ONIXmessage/product/productsupply/supplydetail/price',
'/ONIXmessage/product/productsupply/supplydetail/price/pricedate',
'/ONIXmessage/product/publishingdetail/publishingdate',
'/ONIXmessage/product/publishingdetail/salesrights',
'/ONIXmessage/product/publishingdetail/salesrights/territory',
'/ONIXmessage/product/publishingdetail/salesrights/salesrestriction',
'/ONIXmessage/product/relatedmaterial/relatedproduct',
]
})
} catch (xmlError) {
return resolve({
status: false,
message: [`Invalid XML format: ${xmlError.message}`]
})
}
// Validate ONIX structure
if (!onixJSON || !onixJSON.ONIXmessage) {
return resolve({
status: false,
message: ['Invalid ONIX file structure - missing ONIXmessage element']
})
}
const { release, header: Header, product: Product } = onixJSON.ONIXmessage
// Validate ONIX version
if (!release || release !== '3.0') {
return resolve({
status: false,
message: [`Unsupported ONIX version: ${release || 'unknown'} (only 3.0 is supported)`]
})
}
// Validate products exist
if (!Product || !Array.isArray(Product) || Product.length === 0) {
return resolve({
status: false,
message: ['No products found in ONIX file']
})
}
// Find digital product (prefer digital over print versions)
const digitalProductResult = findDigitalProduct(Product)
if (!digitalProductResult.found) {
return resolve({
status: false,
message: [digitalProductResult.error]
})
}
const { index: digitalIndex, form: productForm } = digitalProductResult
// Validate ProductFormDetail for the selected product
const validationResult = validateProductFormDetail(Product[digitalIndex], productForm)
if (!validationResult.valid) {
return resolve({
status: false,
message: [validationResult.error]
})
}
// Build product JSON
const productJSON = {
...header(Header),
...product(Product[digitalIndex], onixContentRaw)
}
// Validate required fields
const validationErrors = validateRequiredFields(productJSON, productForm)
if (validationErrors.length > 0) {
return resolve({
status: false,
message: validationErrors
})
}
return resolve({
status: true,
data: productJSON
})
} catch (error) {
return resolve({
status: false,
message: [`Unexpected error: ${error.message}`]
})
}
})
}
/**
* Finds the first digital product in the product array
* @param {Array} products - Array of product objects
* @returns {Object} - Result object with found status, index, form, and error
*/
function findDigitalProduct(products) {
const validForms = ['EA', 'ED', 'AJ'] // Digital formats we support
for (let i = 0; i < products.length; i++) {
const element = products[i]
if (!element.descriptivedetail || !element.descriptivedetail.b012) {
continue
}
const productForm = element.descriptivedetail.b012.$t
if (validForms.includes(productForm)) {
return {
found: true,
index: i,
form: productForm,
error: null
}
}
}
return {
found: false,
index: -1,
form: null,
error: 'No supported digital product format found (EA, ED, or AJ required)'
}
}
/**
* Validates ProductFormDetail for the selected product
* @param {Object} product - Product object
* @param {string} productForm - Product form code
* @returns {Object} - Validation result
*/
function validateProductFormDetail(product, productForm) {
if (!product.descriptivedetail || !product.descriptivedetail.b333) {
return {
valid: false,
error: 'Missing ProductFormDetail (b333) in product'
}
}
const validDetails = ['A103', 'E101'] // MP3 and EPUB formats
const productFormDetails = Array.isArray(product.descriptivedetail.b333)
? product.descriptivedetail.b333
: [product.descriptivedetail.b333]
const hasValidDetail = productFormDetails.some(detail =>
detail && detail.$t && validDetails.includes(detail.$t)
)
if (!hasValidDetail) {
return {
valid: false,
error: 'Unsupported ProductFormDetail (only A103/MP3 and E101/EPUB are supported)'
}
}
return { valid: true, error: null }
}
/**
* Validates required fields in the parsed product JSON
* @param {Object} productJSON - Parsed product data
* @param {string} productForm - Product form code
* @returns {Array} - Array of validation error messages
*/
function validateRequiredFields(productJSON, productForm) {
const errors = []
// Check identifiers
if (!productJSON.identifiers || productJSON.identifiers.length === 0) {
errors.push('No identifiers found')
} else {
const hasISBN = productJSON.identifiers.some(id =>
id.productIDTypeCode === '15' || id.productIDTypeCode === '03'
)
if (!hasISBN) {
errors.push('No ISBN-13 identifier found')
}
}
// Check title
if (!productJSON.title || !productJSON.title.titleText || productJSON.title.titleText.trim() === '') {
errors.push('No title found')
}
// Check description
if (!productJSON.details || !productJSON.details.description || productJSON.details.description.trim() === '') {
errors.push('No description found')
}
// Check contributors
if (!productJSON.contributors || productJSON.contributors.length === 0) {
errors.push('No contributors found')
}
// Check pricing
if (!productJSON.price || productJSON.price.length === 0) {
errors.push('No pricing information found')
} else {
const hasBRLPrice = productJSON.price.some(price =>
price.currencyCode === 'BRL' &&
price.countriesIncluded &&
price.countriesIncluded.includes('BR')
)
if (!hasBRLPrice) {
errors.push('No BRL pricing found for Brazilian market')
}
}
// Audiobook-specific validation
if (productForm === 'AJ') {
if (!productJSON.chapters || productJSON.chapters.length === 0) {
errors.push('No chapters found (required for audiobooks)')
}
}
return errors
}
module.exports = onix