UNPKG

onix-parser

Version:

Parse ONIX 3.0 XML files and extract structured product data for publishing and digital books

283 lines (245 loc) 9.37 kB
const fs = require('fs') const XMLMapping = require('xml-mapping') const { header } = require('./Composites/header') const { product } = require('./Composites/product') const onix = onixPath => { return new Promise((resolve, reject) => { try { // Input validation if (!onixPath || typeof onixPath !== 'string') { return resolve({ status: false, message: ['Invalid file path provided'] }) } // Check if file exists if (!fs.existsSync(onixPath)) { return resolve({ status: false, message: [`File not found: ${onixPath}`] }) } // Read file content const onixContentRaw = fs.readFileSync(onixPath, 'utf8') if (!onixContentRaw || onixContentRaw.trim() === '') { return resolve({ status: false, message: ['File is empty or contains no content'] }) } // Parse XML to JSON let onixJSON try { onixJSON = XMLMapping.load(onixContentRaw, { nested: true, arrays: [ '/ONIXmessage/product', '/ONIXmessage/product/collateraldetail/textcontent', '/ONIXmessage/product/collateraldetail/supportingresource', '/ONIXmessage/product/collateraldetail/supportingresource/resourceversion/resourceversionfeature', '/ONIXmessage/product/contentdetail/contentitem', '/ONIXmessage/product/contentdetail/contentitem/supportingresource', '/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourcefeature', '/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourceversion', '/ONIXmessage/product/contentdetail/contentitem/supportingresource/resourceversion/resourceversionfeature', '/ONIXmessage/product/descriptivedetail/contributor', '/ONIXmessage/product/descriptivedetail/collection', '/ONIXmessage/product/descriptivedetail/collection/titledetail', '/ONIXmessage/product/descriptivedetail/collection/titledetail/titleelement', '/ONIXmessage/product/descriptivedetail/extent', '/ONIXmessage/product/descriptivedetail/language', '/ONIXmessage/product/descriptivedetail/audiencerange', '/ONIXmessage/product/descriptivedetail/audiencerange/b075', '/ONIXmessage/product/descriptivedetail/audiencerange/b076', '/ONIXmessage/product/descriptivedetail/subject', '/ONIXmessage/product/descriptivedetail/b333', // ProductFormDetail '/ONIXmessage/product/productidentifier', '/ONIXmessage/product/productsupply/supplydetail/price', '/ONIXmessage/product/productsupply/supplydetail/price/pricedate', '/ONIXmessage/product/publishingdetail/publishingdate', '/ONIXmessage/product/publishingdetail/salesrights', '/ONIXmessage/product/publishingdetail/salesrights/territory', '/ONIXmessage/product/publishingdetail/salesrights/salesrestriction', '/ONIXmessage/product/relatedmaterial/relatedproduct', ] }) } catch (xmlError) { return resolve({ status: false, message: [`Invalid XML format: ${xmlError.message}`] }) } // Validate ONIX structure if (!onixJSON || !onixJSON.ONIXmessage) { return resolve({ status: false, message: ['Invalid ONIX file structure - missing ONIXmessage element'] }) } const { release, header: Header, product: Product } = onixJSON.ONIXmessage // Validate ONIX version if (!release || release !== '3.0') { return resolve({ status: false, message: [`Unsupported ONIX version: ${release || 'unknown'} (only 3.0 is supported)`] }) } // Validate products exist if (!Product || !Array.isArray(Product) || Product.length === 0) { return resolve({ status: false, message: ['No products found in ONIX file'] }) } // Find digital product (prefer digital over print versions) const digitalProductResult = findDigitalProduct(Product) if (!digitalProductResult.found) { return resolve({ status: false, message: [digitalProductResult.error] }) } const { index: digitalIndex, form: productForm } = digitalProductResult // Validate ProductFormDetail for the selected product const validationResult = validateProductFormDetail(Product[digitalIndex], productForm) if (!validationResult.valid) { return resolve({ status: false, message: [validationResult.error] }) } // Build product JSON const productJSON = { ...header(Header), ...product(Product[digitalIndex], onixContentRaw) } // Validate required fields const validationErrors = validateRequiredFields(productJSON, productForm) if (validationErrors.length > 0) { return resolve({ status: false, message: validationErrors }) } return resolve({ status: true, data: productJSON }) } catch (error) { return resolve({ status: false, message: [`Unexpected error: ${error.message}`] }) } }) } /** * Finds the first digital product in the product array * @param {Array} products - Array of product objects * @returns {Object} - Result object with found status, index, form, and error */ function findDigitalProduct(products) { const validForms = ['EA', 'ED', 'AJ'] // Digital formats we support for (let i = 0; i < products.length; i++) { const element = products[i] if (!element.descriptivedetail || !element.descriptivedetail.b012) { continue } const productForm = element.descriptivedetail.b012.$t if (validForms.includes(productForm)) { return { found: true, index: i, form: productForm, error: null } } } return { found: false, index: -1, form: null, error: 'No supported digital product format found (EA, ED, or AJ required)' } } /** * Validates ProductFormDetail for the selected product * @param {Object} product - Product object * @param {string} productForm - Product form code * @returns {Object} - Validation result */ function validateProductFormDetail(product, productForm) { if (!product.descriptivedetail || !product.descriptivedetail.b333) { return { valid: false, error: 'Missing ProductFormDetail (b333) in product' } } const validDetails = ['A103', 'E101'] // MP3 and EPUB formats const productFormDetails = Array.isArray(product.descriptivedetail.b333) ? product.descriptivedetail.b333 : [product.descriptivedetail.b333] const hasValidDetail = productFormDetails.some(detail => detail && detail.$t && validDetails.includes(detail.$t) ) if (!hasValidDetail) { return { valid: false, error: 'Unsupported ProductFormDetail (only A103/MP3 and E101/EPUB are supported)' } } return { valid: true, error: null } } /** * Validates required fields in the parsed product JSON * @param {Object} productJSON - Parsed product data * @param {string} productForm - Product form code * @returns {Array} - Array of validation error messages */ function validateRequiredFields(productJSON, productForm) { const errors = [] // Check identifiers if (!productJSON.identifiers || productJSON.identifiers.length === 0) { errors.push('No identifiers found') } else { const hasISBN = productJSON.identifiers.some(id => id.productIDTypeCode === '15' || id.productIDTypeCode === '03' ) if (!hasISBN) { errors.push('No ISBN-13 identifier found') } } // Check title if (!productJSON.title || !productJSON.title.titleText || productJSON.title.titleText.trim() === '') { errors.push('No title found') } // Check description if (!productJSON.details || !productJSON.details.description || productJSON.details.description.trim() === '') { errors.push('No description found') } // Check contributors if (!productJSON.contributors || productJSON.contributors.length === 0) { errors.push('No contributors found') } // Check pricing if (!productJSON.price || productJSON.price.length === 0) { errors.push('No pricing information found') } else { const hasBRLPrice = productJSON.price.some(price => price.currencyCode === 'BRL' && price.countriesIncluded && price.countriesIncluded.includes('BR') ) if (!hasBRLPrice) { errors.push('No BRL pricing found for Brazilian market') } } // Audiobook-specific validation if (productForm === 'AJ') { if (!productJSON.chapters || productJSON.chapters.length === 0) { errors.push('No chapters found (required for audiobooks)') } } return errors } module.exports = onix