UNPKG

@rpidanny/pdf2md

Version:
63 lines (58 loc) 2.11 kB
// @flow const ToLineItemTransformation = require('../ToLineItemTransformation') const ParseResult = require('../../ParseResult') const LineItem = require('../../LineItem') const Word = require('../../Word') const { REMOVED_ANNOTATION, ADDED_ANNOTATION, DETECTED_ANNOTATION } = require('../../Annotation') const BlockType = require('../../markdown/BlockType') const { isListItemCharacter, isNumberedListItem } = require('../../../util/string-functions') // Detect items starting with -, •, etc... module.exports = class DetectListItems extends ToLineItemTransformation { constructor () { super('Detect List Items') } transform (parseResult /*: ParseResult */) /*: ParseResult */ { var foundListItems = 0 var foundNumberedItems = 0 parseResult.pages.forEach(page => { const newItems = [] page.items.forEach(item => { newItems.push(item) if (!item.type) { var text = item.text() if (isListItemCharacter(item.words[0].string)) { foundListItems++ if (item.words[0].string === '-') { item.annotation = DETECTED_ANNOTATION item.type = BlockType.LIST } else { item.annotation = REMOVED_ANNOTATION const newWords = item.words.map(word => new Word({ ...word, })) newWords[0].string = '-' newItems.push(new LineItem({ ...item, words: newWords, annotation: ADDED_ANNOTATION, type: BlockType.LIST, })) } } else if (isNumberedListItem(text)) { // TODO check that starts with 1 (kala chakra) foundNumberedItems++ item.annotation = DETECTED_ANNOTATION item.type = BlockType.LIST } } }) page.items = newItems }) return new ParseResult({ ...parseResult, messages: [ 'Detected ' + foundListItems + ' plain list items.', 'Detected ' + foundNumberedItems + ' numbered list items.', ], }) } }