UNPKG

pdf-viewer

Version:

Build including the viewer in PDF.JS

1,336 lines (1,227 loc) 99.8 kB
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ /* Copyright 2012 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* globals assert, CMapFactory, ColorSpace, DecodeStream, Dict, Encodings, error, ErrorFont, Font, FONT_IDENTITY_MATRIX, fontCharsToUnicode, FontFlags, ImageKind, info, isArray, isCmd, isDict, isEOF, isName, isNum, isStream, isString, JpegStream, Lexer, Metrics, IdentityCMap, MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise, RefSetCache, isRef, TextRenderingMode, IdentityToUnicodeMap, OPS, UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes, IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability, ToUnicodeMap, getFontType */ 'use strict'; var PartialEvaluator = (function PartialEvaluatorClosure() { function PartialEvaluator(pdfManager, xref, handler, pageIndex, uniquePrefix, idCounters, fontCache) { this.pdfManager = pdfManager; this.xref = xref; this.handler = handler; this.pageIndex = pageIndex; this.uniquePrefix = uniquePrefix; this.idCounters = idCounters; this.fontCache = fontCache; } // Trying to minimize Date.now() usage and check every 100 time var TIME_SLOT_DURATION_MS = 20; var CHECK_TIME_EVERY = 100; function TimeSlotManager() { this.reset(); } TimeSlotManager.prototype = { check: function TimeSlotManager_check() { if (++this.checked < CHECK_TIME_EVERY) { return false; } this.checked = 0; return this.endTime <= Date.now(); }, reset: function TimeSlotManager_reset() { this.endTime = Date.now() + TIME_SLOT_DURATION_MS; this.checked = 0; } }; var deferred = Promise.resolve(); var TILING_PATTERN = 1, SHADING_PATTERN = 2; PartialEvaluator.prototype = { hasBlendModes: function PartialEvaluator_hasBlendModes(resources) { if (!isDict(resources)) { return false; } var processed = Object.create(null); if (resources.objId) { processed[resources.objId] = true; } var nodes = [resources]; while (nodes.length) { var key; var node = nodes.shift(); // First check the current resources for blend modes. var graphicStates = node.get('ExtGState'); if (isDict(graphicStates)) { graphicStates = graphicStates.getAll(); for (key in graphicStates) { var graphicState = graphicStates[key]; var bm = graphicState['BM']; if (isName(bm) && bm.name !== 'Normal') { return true; } } } // Descend into the XObjects to look for more resources and blend modes. var xObjects = node.get('XObject'); if (!isDict(xObjects)) { continue; } xObjects = xObjects.getAll(); for (key in xObjects) { var xObject = xObjects[key]; if (!isStream(xObject)) { continue; } if (xObject.dict.objId) { if (processed[xObject.dict.objId]) { // stream has objId and is processed already continue; } processed[xObject.dict.objId] = true; } var xResources = xObject.dict.get('Resources'); // Checking objId to detect an infinite loop. if (isDict(xResources) && (!xResources.objId || !processed[xResources.objId])) { nodes.push(xResources); if (xResources.objId) { processed[xResources.objId] = true; } } } } return false; }, buildFormXObject: function PartialEvaluator_buildFormXObject(resources, xobj, smask, operatorList, initialState) { var matrix = xobj.dict.getArray('Matrix'); var bbox = xobj.dict.getArray('BBox'); var group = xobj.dict.get('Group'); if (group) { var groupOptions = { matrix: matrix, bbox: bbox, smask: smask, isolated: false, knockout: false }; var groupSubtype = group.get('S'); var colorSpace; if (isName(groupSubtype) && groupSubtype.name === 'Transparency') { groupOptions.isolated = (group.get('I') || false); groupOptions.knockout = (group.get('K') || false); colorSpace = (group.has('CS') ? ColorSpace.parse(group.get('CS'), this.xref, resources) : null); } if (smask && smask.backdrop) { colorSpace = colorSpace || ColorSpace.singletons.rgb; smask.backdrop = colorSpace.getRgb(smask.backdrop, 0); } operatorList.addOp(OPS.beginGroup, [groupOptions]); } operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); return this.getOperatorList(xobj, (xobj.dict.get('Resources') || resources), operatorList, initialState). then(function () { operatorList.addOp(OPS.paintFormXObjectEnd, []); if (group) { operatorList.addOp(OPS.endGroup, [groupOptions]); } }); }, buildPaintImageXObject: function PartialEvaluator_buildPaintImageXObject(resources, image, inline, operatorList, cacheKey, imageCache) { var self = this; var dict = image.dict; var w = dict.get('Width', 'W'); var h = dict.get('Height', 'H'); if (!(w && isNum(w)) || !(h && isNum(h))) { warn('Image dimensions are missing, or not numbers.'); return; } if (PDFJS.maxImageSize !== -1 && w * h > PDFJS.maxImageSize) { warn('Image exceeded maximum allowed size and was removed.'); return; } var imageMask = (dict.get('ImageMask', 'IM') || false); var imgData, args; if (imageMask) { // This depends on a tmpCanvas being filled with the // current fillStyle, such that processing the pixel // data can't be done here. Instead of creating a // complete PDFImage, only read the information needed // for later. var width = dict.get('Width', 'W'); var height = dict.get('Height', 'H'); var bitStrideLength = (width + 7) >> 3; var imgArray = image.getBytes(bitStrideLength * height); var decode = dict.get('Decode', 'D'); var inverseDecode = (!!decode && decode[0] > 0); imgData = PDFImage.createMask(imgArray, width, height, image instanceof DecodeStream, inverseDecode); imgData.cached = true; args = [imgData]; operatorList.addOp(OPS.paintImageMaskXObject, args); if (cacheKey) { imageCache[cacheKey] = { fn: OPS.paintImageMaskXObject, args: args }; } return; } var softMask = (dict.get('SMask', 'SM') || false); var mask = (dict.get('Mask') || false); var SMALL_IMAGE_DIMENSIONS = 200; // Inlining small images into the queue as RGB data if (inline && !softMask && !mask && !(image instanceof JpegStream) && (w + h) < SMALL_IMAGE_DIMENSIONS) { var imageObj = new PDFImage(this.xref, resources, image, inline, null, null); // We force the use of RGBA_32BPP images here, because we can't handle // any other kind. imgData = imageObj.createImageData(/* forceRGBA = */ true); operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); return; } // If there is no imageMask, create the PDFImage and a lot // of image processing can be done here. var uniquePrefix = (this.uniquePrefix || ''); var objId = 'img_' + uniquePrefix + (++this.idCounters.obj); operatorList.addDependency(objId); args = [objId, w, h]; if (!softMask && !mask && image instanceof JpegStream && image.isNativelySupported(this.xref, resources)) { // These JPEGs don't need any more processing so we can just send it. operatorList.addOp(OPS.paintJpegXObject, args); this.handler.send('obj', [objId, this.pageIndex, 'JpegStream', image.getIR()]); return; } PDFImage.buildImage(self.handler, self.xref, resources, image, inline). then(function(imageObj) { var imgData = imageObj.createImageData(/* forceRGBA = */ false); self.handler.send('obj', [objId, self.pageIndex, 'Image', imgData], [imgData.data.buffer]); }).then(undefined, function (reason) { warn('Unable to decode image: ' + reason); self.handler.send('obj', [objId, self.pageIndex, 'Image', null]); }); operatorList.addOp(OPS.paintImageXObject, args); if (cacheKey) { imageCache[cacheKey] = { fn: OPS.paintImageXObject, args: args }; } }, handleSMask: function PartialEvaluator_handleSmask(smask, resources, operatorList, stateManager) { var smaskContent = smask.get('G'); var smaskOptions = { subtype: smask.get('S').name, backdrop: smask.get('BC') }; return this.buildFormXObject(resources, smaskContent, smaskOptions, operatorList, stateManager.state.clone()); }, handleTilingType: function PartialEvaluator_handleTilingType(fn, args, resources, pattern, patternDict, operatorList) { // Create an IR of the pattern code. var tilingOpList = new OperatorList(); return this.getOperatorList(pattern, (patternDict.get('Resources') || resources), tilingOpList). then(function () { // Add the dependencies to the parent operator list so they are // resolved before sub operator list is executed synchronously. operatorList.addDependencies(tilingOpList.dependencies); operatorList.addOp(fn, getTilingPatternIR({ fnArray: tilingOpList.fnArray, argsArray: tilingOpList.argsArray }, patternDict, args)); }); }, handleSetFont: function PartialEvaluator_handleSetFont(resources, fontArgs, fontRef, operatorList, state) { // TODO(mack): Not needed? var fontName; if (fontArgs) { fontArgs = fontArgs.slice(); fontName = fontArgs[0].name; } var self = this; return this.loadFont(fontName, fontRef, this.xref, resources).then( function (translated) { if (!translated.font.isType3Font) { return translated; } return translated.loadType3Data(self, resources, operatorList).then( function () { return translated; }); }).then(function (translated) { state.font = translated.font; translated.send(self.handler); return translated.loadedName; }); }, handleText: function PartialEvaluator_handleText(chars, state) { var font = state.font; var glyphs = font.charsToGlyphs(chars); var isAddToPathSet = !!(state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG); if (font.data && (isAddToPathSet || PDFJS.disableFontFace)) { var buildPath = function (fontChar) { if (!font.renderer.hasBuiltPath(fontChar)) { var path = font.renderer.getPathJs(fontChar); this.handler.send('commonobj', [ font.loadedName + '_path_' + fontChar, 'FontPath', path ]); } }.bind(this); for (var i = 0, ii = glyphs.length; i < ii; i++) { var glyph = glyphs[i]; if (glyph === null) { continue; } buildPath(glyph.fontChar); // If the glyph has an accent we need to build a path for its // fontChar too, otherwise CanvasGraphics_paintChar will fail. var accent = glyph.accent; if (accent && accent.fontChar) { buildPath(accent.fontChar); } } } return glyphs; }, setGState: function PartialEvaluator_setGState(resources, gState, operatorList, xref, stateManager) { // This array holds the converted/processed state data. var gStateObj = []; var gStateMap = gState.map; var self = this; var promise = Promise.resolve(); for (var key in gStateMap) { var value = gStateMap[key]; switch (key) { case 'Type': break; case 'LW': case 'LC': case 'LJ': case 'ML': case 'D': case 'RI': case 'FL': case 'CA': case 'ca': gStateObj.push([key, value]); break; case 'Font': promise = promise.then(function () { return self.handleSetFont(resources, null, value[0], operatorList, stateManager.state). then(function (loadedName) { operatorList.addDependency(loadedName); gStateObj.push([key, [loadedName, value[1]]]); }); }); break; case 'BM': gStateObj.push([key, value]); break; case 'SMask': if (isName(value) && value.name === 'None') { gStateObj.push([key, false]); break; } var dict = xref.fetchIfRef(value); if (isDict(dict)) { promise = promise.then(function () { return self.handleSMask(dict, resources, operatorList, stateManager); }); gStateObj.push([key, true]); } else { warn('Unsupported SMask type'); } break; // Only generate info log messages for the following since // they are unlikely to have a big impact on the rendering. case 'OP': case 'op': case 'OPM': case 'BG': case 'BG2': case 'UCR': case 'UCR2': case 'TR': case 'TR2': case 'HT': case 'SM': case 'SA': case 'AIS': case 'TK': // TODO implement these operators. info('graphic state operator ' + key); break; default: info('Unknown graphic state operator ' + key); break; } } return promise.then(function () { if (gStateObj.length >= 0) { operatorList.addOp(OPS.setGState, [gStateObj]); } }); }, loadFont: function PartialEvaluator_loadFont(fontName, font, xref, resources) { function errorFont() { return Promise.resolve(new TranslatedFont('g_font_error', new ErrorFont('Font ' + fontName + ' is not available'), font)); } var fontRef; if (font) { // Loading by ref. assert(isRef(font)); fontRef = font; } else { // Loading by name. var fontRes = resources.get('Font'); if (fontRes) { fontRef = fontRes.getRaw(fontName); } else { warn('fontRes not available'); return errorFont(); } } if (!fontRef) { warn('fontRef not available'); return errorFont(); } if (this.fontCache.has(fontRef)) { return this.fontCache.get(fontRef); } font = xref.fetchIfRef(fontRef); if (!isDict(font)) { return errorFont(); } // We are holding font.translated references just for fontRef that are not // dictionaries (Dict). See explanation below. if (font.translated) { return font.translated; } var fontCapability = createPromiseCapability(); var preEvaluatedFont = this.preEvaluateFont(font, xref); var descriptor = preEvaluatedFont.descriptor; var fontID = fontRef.num + '_' + fontRef.gen; if (isDict(descriptor)) { if (!descriptor.fontAliases) { descriptor.fontAliases = Object.create(null); } var fontAliases = descriptor.fontAliases; var hash = preEvaluatedFont.hash; if (fontAliases[hash]) { var aliasFontRef = fontAliases[hash].aliasRef; if (aliasFontRef && this.fontCache.has(aliasFontRef)) { this.fontCache.putAlias(fontRef, aliasFontRef); return this.fontCache.get(fontRef); } } if (!fontAliases[hash]) { fontAliases[hash] = { fontID: Font.getFontID() }; } fontAliases[hash].aliasRef = fontRef; fontID = fontAliases[hash].fontID; } // Workaround for bad PDF generators that don't reference fonts // properly, i.e. by not using an object identifier. // Check if the fontRef is a Dict (as opposed to a standard object), // in which case we don't cache the font and instead reference it by // fontName in font.loadedName below. var fontRefIsDict = isDict(fontRef); if (!fontRefIsDict) { this.fontCache.put(fontRef, fontCapability.promise); } // Keep track of each font we translated so the caller can // load them asynchronously before calling display on a page. font.loadedName = 'g_font_' + (fontRefIsDict ? fontName.replace(/\W/g, '') : fontID); font.translated = fontCapability.promise; // TODO move promises into translate font var translatedPromise; try { translatedPromise = Promise.resolve( this.translateFont(preEvaluatedFont, xref)); } catch (e) { translatedPromise = Promise.reject(e); } translatedPromise.then(function (translatedFont) { if (translatedFont.fontType !== undefined) { var xrefFontStats = xref.stats.fontTypes; xrefFontStats[translatedFont.fontType] = true; } fontCapability.resolve(new TranslatedFont(font.loadedName, translatedFont, font)); }, function (reason) { // TODO fontCapability.reject? UnsupportedManager.notify(UNSUPPORTED_FEATURES.font); try { // error, but it's still nice to have font type reported var descriptor = preEvaluatedFont.descriptor; var fontFile3 = descriptor && descriptor.get('FontFile3'); var subtype = fontFile3 && fontFile3.get('Subtype'); var fontType = getFontType(preEvaluatedFont.type, subtype && subtype.name); var xrefFontStats = xref.stats.fontTypes; xrefFontStats[fontType] = true; } catch (ex) { } fontCapability.resolve(new TranslatedFont(font.loadedName, new ErrorFont(reason instanceof Error ? reason.message : reason), font)); }); return fontCapability.promise; }, buildPath: function PartialEvaluator_buildPath(operatorList, fn, args) { var lastIndex = operatorList.length - 1; if (!args) { args = []; } if (lastIndex < 0 || operatorList.fnArray[lastIndex] !== OPS.constructPath) { operatorList.addOp(OPS.constructPath, [[fn], args]); } else { var opArgs = operatorList.argsArray[lastIndex]; opArgs[0].push(fn); Array.prototype.push.apply(opArgs[1], args); } }, handleColorN: function PartialEvaluator_handleColorN(operatorList, fn, args, cs, patterns, resources, xref) { // compile tiling patterns var patternName = args[args.length - 1]; // SCN/scn applies patterns along with normal colors var pattern; if (isName(patternName) && (pattern = patterns.get(patternName.name))) { var dict = (isStream(pattern) ? pattern.dict : pattern); var typeNum = dict.get('PatternType'); if (typeNum === TILING_PATTERN) { var color = cs.base ? cs.base.getRgb(args, 0) : null; return this.handleTilingType(fn, color, resources, pattern, dict, operatorList); } else if (typeNum === SHADING_PATTERN) { var shading = dict.get('Shading'); var matrix = dict.get('Matrix'); pattern = Pattern.parseShading(shading, matrix, xref, resources); operatorList.addOp(fn, pattern.getIR()); return Promise.resolve(); } else { return Promise.reject('Unknown PatternType: ' + typeNum); } } // TODO shall we fail here? operatorList.addOp(fn, args); return Promise.resolve(); }, getOperatorList: function PartialEvaluator_getOperatorList(stream, resources, operatorList, initialState) { var self = this; var xref = this.xref; var imageCache = {}; assert(operatorList); resources = (resources || Dict.empty); var xobjs = (resources.get('XObject') || Dict.empty); var patterns = (resources.get('Pattern') || Dict.empty); var stateManager = new StateManager(initialState || new EvalState()); var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var timeSlotManager = new TimeSlotManager(); return new Promise(function next(resolve, reject) { timeSlotManager.reset(); var stop, operation = {}, i, ii, cs; while (!(stop = timeSlotManager.check())) { // The arguments parsed by read() are used beyond this loop, so we // cannot reuse the same array on each iteration. Therefore we pass // in |null| as the initial value (see the comment on // EvaluatorPreprocessor_read() for why). operation.args = null; if (!(preprocessor.read(operation))) { break; } var args = operation.args; var fn = operation.fn; switch (fn | 0) { case OPS.paintXObject: if (args[0].code) { break; } // eagerly compile XForm objects var name = args[0].name; if (!name) { warn('XObject must be referred to by name.'); continue; } if (imageCache[name] !== undefined) { operatorList.addOp(imageCache[name].fn, imageCache[name].args); args = null; continue; } var xobj = xobjs.get(name); if (xobj) { assert(isStream(xobj), 'XObject should be a stream'); var type = xobj.dict.get('Subtype'); assert(isName(type), 'XObject should have a Name subtype'); if (type.name === 'Form') { stateManager.save(); return self.buildFormXObject(resources, xobj, null, operatorList, stateManager.state.clone()). then(function () { stateManager.restore(); next(resolve, reject); }, reject); } else if (type.name === 'Image') { self.buildPaintImageXObject(resources, xobj, false, operatorList, name, imageCache); args = null; continue; } else if (type.name === 'PS') { // PostScript XObjects are unused when viewing documents. // See section 4.7.1 of Adobe's PDF reference. info('Ignored XObject subtype PS'); continue; } else { error('Unhandled XObject subtype ' + type.name); } } break; case OPS.setFont: var fontSize = args[1]; // eagerly collect all fonts return self.handleSetFont(resources, args, null, operatorList, stateManager.state). then(function (loadedName) { operatorList.addDependency(loadedName); operatorList.addOp(OPS.setFont, [loadedName, fontSize]); next(resolve, reject); }, reject); case OPS.endInlineImage: var cacheKey = args[0].cacheKey; if (cacheKey) { var cacheEntry = imageCache[cacheKey]; if (cacheEntry !== undefined) { operatorList.addOp(cacheEntry.fn, cacheEntry.args); args = null; continue; } } self.buildPaintImageXObject(resources, args[0], true, operatorList, cacheKey, imageCache); args = null; continue; case OPS.showText: args[0] = self.handleText(args[0], stateManager.state); break; case OPS.showSpacedText: var arr = args[0]; var combinedGlyphs = []; var arrLength = arr.length; var state = stateManager.state; for (i = 0; i < arrLength; ++i) { var arrItem = arr[i]; if (isString(arrItem)) { Array.prototype.push.apply(combinedGlyphs, self.handleText(arrItem, state)); } else if (isNum(arrItem)) { combinedGlyphs.push(arrItem); } } args[0] = combinedGlyphs; fn = OPS.showText; break; case OPS.nextLineShowText: operatorList.addOp(OPS.nextLine); args[0] = self.handleText(args[0], stateManager.state); fn = OPS.showText; break; case OPS.nextLineSetSpacingShowText: operatorList.addOp(OPS.nextLine); operatorList.addOp(OPS.setWordSpacing, [args.shift()]); operatorList.addOp(OPS.setCharSpacing, [args.shift()]); args[0] = self.handleText(args[0], stateManager.state); fn = OPS.showText; break; case OPS.setTextRenderingMode: stateManager.state.textRenderingMode = args[0]; break; case OPS.setFillColorSpace: stateManager.state.fillColorSpace = ColorSpace.parse(args[0], xref, resources); continue; case OPS.setStrokeColorSpace: stateManager.state.strokeColorSpace = ColorSpace.parse(args[0], xref, resources); continue; case OPS.setFillColor: cs = stateManager.state.fillColorSpace; args = cs.getRgb(args, 0); fn = OPS.setFillRGBColor; break; case OPS.setStrokeColor: cs = stateManager.state.strokeColorSpace; args = cs.getRgb(args, 0); fn = OPS.setStrokeRGBColor; break; case OPS.setFillGray: stateManager.state.fillColorSpace = ColorSpace.singletons.gray; args = ColorSpace.singletons.gray.getRgb(args, 0); fn = OPS.setFillRGBColor; break; case OPS.setStrokeGray: stateManager.state.strokeColorSpace = ColorSpace.singletons.gray; args = ColorSpace.singletons.gray.getRgb(args, 0); fn = OPS.setStrokeRGBColor; break; case OPS.setFillCMYKColor: stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk; args = ColorSpace.singletons.cmyk.getRgb(args, 0); fn = OPS.setFillRGBColor; break; case OPS.setStrokeCMYKColor: stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk; args = ColorSpace.singletons.cmyk.getRgb(args, 0); fn = OPS.setStrokeRGBColor; break; case OPS.setFillRGBColor: stateManager.state.fillColorSpace = ColorSpace.singletons.rgb; args = ColorSpace.singletons.rgb.getRgb(args, 0); break; case OPS.setStrokeRGBColor: stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb; args = ColorSpace.singletons.rgb.getRgb(args, 0); break; case OPS.setFillColorN: cs = stateManager.state.fillColorSpace; if (cs.name === 'Pattern') { return self.handleColorN(operatorList, OPS.setFillColorN, args, cs, patterns, resources, xref).then(function() { next(resolve, reject); }, reject); } args = cs.getRgb(args, 0); fn = OPS.setFillRGBColor; break; case OPS.setStrokeColorN: cs = stateManager.state.strokeColorSpace; if (cs.name === 'Pattern') { return self.handleColorN(operatorList, OPS.setStrokeColorN, args, cs, patterns, resources, xref).then(function() { next(resolve, reject); }, reject); } args = cs.getRgb(args, 0); fn = OPS.setStrokeRGBColor; break; case OPS.shadingFill: var shadingRes = resources.get('Shading'); if (!shadingRes) { error('No shading resource found'); } var shading = shadingRes.get(args[0].name); if (!shading) { error('No shading object found'); } var shadingFill = Pattern.parseShading(shading, null, xref, resources); var patternIR = shadingFill.getIR(); args = [patternIR]; fn = OPS.shadingFill; break; case OPS.setGState: var dictName = args[0]; var extGState = resources.get('ExtGState'); if (!isDict(extGState) || !extGState.has(dictName.name)) { break; } var gState = extGState.get(dictName.name); return self.setGState(resources, gState, operatorList, xref, stateManager).then(function() { next(resolve, reject); }, reject); case OPS.moveTo: case OPS.lineTo: case OPS.curveTo: case OPS.curveTo2: case OPS.curveTo3: case OPS.closePath: self.buildPath(operatorList, fn, args); continue; case OPS.rectangle: self.buildPath(operatorList, fn, args); continue; } operatorList.addOp(fn, args); } if (stop) { deferred.then(function () { next(resolve, reject); }); return; } // Some PDFs don't close all restores inside object/form. // Closing those for them. for (i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { operatorList.addOp(OPS.restore, []); } resolve(); }); }, getTextContent: function PartialEvaluator_getTextContent(stream, resources, stateManager) { stateManager = (stateManager || new StateManager(new TextState())); var textContent = { items: [], styles: Object.create(null) }; var bidiTexts = textContent.items; var SPACE_FACTOR = 0.3; var MULTI_SPACE_FACTOR = 1.5; var self = this; var xref = this.xref; resources = (xref.fetchIfRef(resources) || Dict.empty); // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. var xobjs = null; var xobjsCache = {}; var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var textState; function newTextChunk() { var font = textState.font; if (!(font.loadedName in textContent.styles)) { textContent.styles[font.loadedName] = { fontFamily: font.fallbackName, ascent: font.ascent, descent: font.descent, vertical: font.vertical }; } return { // |str| is initially an array which we push individual chars to, and // then runBidi() overwrites it with the final string. str: [], dir: null, width: 0, height: 0, transform: null, fontName: font.loadedName }; } function runBidi(textChunk) { var str = textChunk.str.join(''); var bidiResult = PDFJS.bidi(str, -1, textState.font.vertical); textChunk.str = bidiResult.str; textChunk.dir = bidiResult.dir; return textChunk; } function handleSetFont(fontName, fontRef) { return self.loadFont(fontName, fontRef, xref, resources). then(function (translated) { textState.font = translated.font; textState.fontMatrix = translated.font.fontMatrix || FONT_IDENTITY_MATRIX; }); } function buildTextGeometry(chars, textChunk) { var font = textState.font; textChunk = textChunk || newTextChunk(); if (!textChunk.transform) { // 9.4.4 Text Space Details var tsm = [textState.fontSize * textState.textHScale, 0, 0, textState.fontSize, 0, textState.textRise]; if (font.isType3Font && textState.fontMatrix !== FONT_IDENTITY_MATRIX && textState.fontSize === 1) { var glyphHeight = font.bbox[3] - font.bbox[1]; if (glyphHeight > 0) { glyphHeight = glyphHeight * textState.fontMatrix[3]; tsm[3] *= glyphHeight; } } var trm = textChunk.transform = Util.transform(textState.ctm, Util.transform(textState.textMatrix, tsm)); if (!font.vertical) { textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); } else { textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); } } var width = 0; var height = 0; var glyphs = font.charsToGlyphs(chars); var defaultVMetrics = font.defaultVMetrics; for (var i = 0; i < glyphs.length; i++) { var glyph = glyphs[i]; if (!glyph) { // Previous glyph was a space. width += textState.wordSpacing * textState.textHScale; continue; } var vMetricX = null; var vMetricY = null; var glyphWidth = null; if (font.vertical) { if (glyph.vmetric) { glyphWidth = glyph.vmetric[0]; vMetricX = glyph.vmetric[1]; vMetricY = glyph.vmetric[2]; } else { glyphWidth = glyph.width; vMetricX = glyph.width * 0.5; vMetricY = defaultVMetrics[2]; } } else { glyphWidth = glyph.width; } var glyphUnicode = glyph.unicode; if (NormalizedUnicodes[glyphUnicode] !== undefined) { glyphUnicode = NormalizedUnicodes[glyphUnicode]; } glyphUnicode = reverseIfRtl(glyphUnicode); // The following will calculate the x and y of the individual glyphs. // if (font.vertical) { // tsm[4] -= vMetricX * Math.abs(textState.fontSize) * // textState.fontMatrix[0]; // tsm[5] -= vMetricY * textState.fontSize * // textState.fontMatrix[0]; // } // var trm = Util.transform(textState.textMatrix, tsm); // var pt = Util.applyTransform([trm[4], trm[5]], textState.ctm); // var x = pt[0]; // var y = pt[1]; var charSpacing = 0; if (textChunk.str.length > 0) { // Apply char spacing only when there are chars. // As a result there is only spacing between glyphs. charSpacing = textState.charSpacing; } var tx = 0; var ty = 0; if (!font.vertical) { var w0 = glyphWidth * textState.fontMatrix[0]; tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale; width += tx; } else { var w1 = glyphWidth * textState.fontMatrix[0]; ty = w1 * textState.fontSize + charSpacing; height += ty; } textState.translateTextMatrix(tx, ty); textChunk.str.push(glyphUnicode); } var a = textState.textLineMatrix[0]; var b = textState.textLineMatrix[1]; var scaleLineX = Math.sqrt(a * a + b * b); a = textState.ctm[0]; b = textState.ctm[1]; var scaleCtmX = Math.sqrt(a * a + b * b); if (!font.vertical) { textChunk.width += width * scaleCtmX * scaleLineX; } else { textChunk.height += Math.abs(height * scaleCtmX * scaleLineX); } return textChunk; } var timeSlotManager = new TimeSlotManager(); return new Promise(function next(resolve, reject) { timeSlotManager.reset(); var stop, operation = {}, args = []; while (!(stop = timeSlotManager.check())) { // The arguments parsed by read() are not used beyond this loop, so // we can reuse the same array on every iteration, thus avoiding // unnecessary allocations. args.length = 0; operation.args = args; if (!(preprocessor.read(operation))) { break; } textState = stateManager.state; var fn = operation.fn; args = operation.args; switch (fn | 0) { case OPS.setFont: textState.fontSize = args[1]; return handleSetFont(args[0].name).then(function() { next(resolve, reject); }, reject); case OPS.setTextRise: textState.textRise = args[0]; break; case OPS.setHScale: textState.textHScale = args[0] / 100; break; case OPS.setLeading: textState.leading = args[0]; break; case OPS.moveText: textState.translateTextLineMatrix(args[0], args[1]); textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.setLeadingMoveText: textState.leading = -args[1]; textState.translateTextLineMatrix(args[0], args[1]); textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.nextLine: textState.carriageReturn(); break; case OPS.setTextMatrix: textState.setTextMatrix(args[0], args[1], args[2], args[3], args[4], args[5]); textState.setTextLineMatrix(args[0], args[1], args[2], args[3], args[4], args[5]); break; case OPS.setCharSpacing: textState.charSpacing = args[0]; break; case OPS.setWordSpacing: textState.wordSpacing = args[0]; break; case OPS.beginText: textState.textMatrix = IDENTITY_MATRIX.slice(); textState.textLineMatrix = IDENTITY_MATRIX.slice(); break; case OPS.showSpacedText: var items = args[0]; var textChunk = newTextChunk(); var offset; for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { buildTextGeometry(items[j], textChunk); } else { // PDF Specification 5.3.2 states: // The number is expressed in thousandths of a unit of text // space. // This amount is subtracted from the current horizontal or // vertical coordinate, depending on the writing mode. // In the default coordinate system, a positive adjustment // has the effect of moving the next glyph painted either to // the left or down by the given amount. var val = items[j] * textState.fontSize / 1000; if (textState.font.vertical) { offset = val * textState.textMatrix[3]; textState.translateTextMatrix(0, offset); // Value needs to be added to height to paint down. textChunk.height += offset; } else { offset = val * textState.textHScale * textState.textMatrix[0]; textState.translateTextMatrix(offset, 0); // Value needs to be subtracted from width to paint left. textChunk.width -= offset; } if (items[j] < 0 && textState.font.spaceWidth > 0) { var fakeSpaces = -items[j] / textState.font.spaceWidth; if (fakeSpaces > MULTI_SPACE_FACTOR) { fakeSpaces = Math.round(fakeSpaces); while (fakeSpaces--) { textChunk.str.push(' '); } } else if (fakeSpaces > SPACE_FACTOR) { textChunk.str.push(' '); } } } } bidiTexts.push(runBidi(textChunk)); break; case OPS.showText: bidiTexts.push(runBidi(buildTextGeometry(args[0]))); break; case OPS.nextLineShowText: textState.carriageReturn(); bidiTexts.push(runBidi(buildTextGeometry(args[0]))); break; case OPS.nextLineSetSpacingShowText: textState.wordSpacing = args[0]; textState.charSpacing = args[1]; textState.carriageReturn(); bidiTexts.push(runBidi(buildTextGeometry(args[2]))); break; case OPS.paintXObject: if (args[0].code) { break; } if (!xobjs) { xobjs = (resources.get('XObject') || Dict.empty); } var name = args[0].name; if (xobjsCache.key === name) { if (xobjsCache.texts) { Util.appendToArray(bidiTexts, xobjsCache.texts.items); Util.extendObj(textContent.styles, xobjsCache.texts.styles); } break; } var xobj = xobjs.get(name); if (!xobj) { break; } assert(isStream(xobj), 'XObject should be a stream'); var type = xobj.dict.get('Subtype'); assert(isName(type), 'XObject should have a Name subtype'); if ('Form' !== type.name) { xobjsCache.key = name; xobjsCache.texts = null; break; } stateManager.save(); var matrix = xobj.dict.get('Matrix'); if (isArray(matrix) && matrix.length === 6) { stateManager.transform(matrix); } return self.getTextContent(xobj, xobj.dict.get('Resources') || resources, stateManager). then(function (formTextContent) { Util.appendToArray(bidiTexts, formTextContent.items); Util.extendObj(textContent.styles, formTextContent.styles); stateManager.restore(); xobjsCache.key = name; xobjsCache.texts = formTextContent; next(resolve, reject); }, reject); case OPS.setGState: var dictName = args[0]; var extGState = resources.get('ExtGState'); if (!isDict(extGState) || !extGState.has(dictName.name)) { break; } var gsStateMap = extGState.get(dictName.name); var gsStateFont = null; for (var key in gsStateMap) { if (key === 'Font') { assert(!gsStateFont); gsStateFont = gsStateMap[key]; } } if (gsStateFont) { textState.fontSize = gsStateFont[1]; return handleSetFont(gsStateFont[0]).then(function() { next(resolve, reject); }, reject); } break; } // switch } // while if (stop) { deferred.then(function () { next(resolve, reject); }); return; } resolve(textContent); }); }, extractDataStructures: function partialEvaluatorExtractDataStructures(dict, baseDict, xref, properties) { // 9.10.2 var toUnicode = (dict.get('ToUnicode') || baseDict.get('ToUnicode')); if (toUnicode) { properties.toUnicode = this.readToUnicode(toUnicode); } if (properties.composite) { // CIDSystemInfo helps to match CID to glyphs var cidSystemInfo = dict.get('CIDSystemInfo'); if (isDict(cidSystemInfo)) { properties.cidSystemInfo = { registry: cidSystemInfo.get('Registry'), ordering: cidSystemInfo.get('Ordering'), supplement: cidSystemInfo.get('Supplement') }; } var cidToGidMap = dict.get('CIDToGIDMap'); if (isStream(cidToGidMap)) { properties.cidToGidMap = this.readCidToGidMap(cidToGidMap); } } // Based on 9.6.6 of the spec the encoding can come from multiple places // and depends on the font type. The base encoding and differences are // read here, but the encoding that is actually used is chosen during // glyph mapping in the font. // TODO: Loading the built in encoding in the font would allow the // differences to be merged in here not require us to hold on to it. var differences = []; var baseEncodingName = null; var encoding; if (dict.has('Encoding')) { encoding = dict.get('Encoding'); if (isDict(encoding)) { baseEncodingName = encoding.get('BaseEncoding'); baseEncodingName = (isName(baseEncodingName) ? baseEncodingName.name : null); // Load the differences between the base and original if (encoding.has('Differences')) { var diffEncoding = encoding.get('Differences'); var index = 0; for (var j = 0, jj = diffEncoding.length; j < jj; j++) { var data = diffEncoding[j]; if (isNum(data)) { index = data; } else if (isName(data)) { differences[index++] = data.name; } else if (isRef(data)) { diffEncoding[j--] = xref.fetch(data); continue; } else { error('Invalid entry in \'Differences\' array: ' + data); } } } } else if (isName(encoding)) { baseEncodingName = encoding.name; } else { error('Encoding is not a Name nor a Dict'); } // According to table 114 if the encoding is a named encoding it must be // one of these predefined encodings. if ((baseEncodingNam