UNPKG

pdf3json

Version:

A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js

1,182 lines (1,154 loc) • 252 kB
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ /* Copyright 2012 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, ExpertSubsetCharset, FileReaderSync, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, Stream, stringToBytes, TextDecoder, TODO, warn, Lexer, Util, FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */ 'use strict'; // Unicode Private Use Area var CMAP_GLYPH_OFFSET = 0xE000; var GLYPH_AREA_SIZE = 0x1900; var SYMBOLIC_FONT_GLYPH_OFFSET = 0xF000; // PDF Glyph Space Units are one Thousandth of a TextSpace Unit // except for Type 3 fonts var PDF_GLYPH_SPACE_UNITS = 1000; // Hinting is currently disabled due to unknown problems on windows // in tracemonkey and various other pdfs with type1 fonts. var HINTING_ENABLED = false; // Accented charactars are not displayed properly on windows, using this flag // to control analysis of seac charstrings. var SEAC_ANALYSIS_ENABLED = false; var FontFlags = { FixedPitch: 1, Serif: 2, Symbolic: 4, Script: 8, Nonsymbolic: 32, Italic: 64, AllCap: 65536, SmallCap: 131072, ForceBold: 262144 }; var Encodings = { ExpertEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclamsmall', 'Hungarumlautsmall', '', 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall', 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader', 'comma', 'hyphen', 'period', 'fraction', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', 'colon', 'semicolon', 'commasuperior', 'threequartersemdash', 'periodsuperior', 'questionsmall', '', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior', 'esuperior', '', '', 'isuperior', '', '', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior', '', '', 'rsuperior', 'ssuperior', 'tsuperior', '', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'parenleftinferior', '', 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', '', '', 'Scaronsmall', 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', '', 'Dotaccentsmall', '', '', 'Macronsmall', '', '', 'figuredash', 'hypheninferior', '', '', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', '', '', '', 'onequarter', 'onehalf', 'threequarters', 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', 'onethird', 'twothirds', '', '', 'zerosuperior', 'onesuperior', 'twosuperior', 'threesuperior', 'foursuperior', 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior', 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior', 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall', 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', 'Yacutesmall', 'Thornsmall', 'Ydieresissmall'], MacExpertEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclamsmall', 'Hungarumlautsmall', 'centoldstyle', 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall', 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader', 'comma', 'hyphen', 'period', 'fraction', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', 'colon', 'semicolon', '', 'threequartersemdash', '', 'questionsmall', '', '', '', '', 'Ethsmall', '', '', 'onequarter', 'onehalf', 'threequarters', 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', 'onethird', 'twothirds', '', '', '', '', '', '', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'parenleftinferior', '', 'parenrightinferior', 'Circumflexsmall', 'hypheninferior', 'Gravesmall', 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', '', '', 'asuperior', 'centsuperior', '', '', '', '', 'Aacutesmall', 'Agravesmall', 'Acircumflexsmall', 'Adieresissmall', 'Atildesmall', 'Aringsmall', 'Ccedillasmall', 'Eacutesmall', 'Egravesmall', 'Ecircumflexsmall', 'Edieresissmall', 'Iacutesmall', 'Igravesmall', 'Icircumflexsmall', 'Idieresissmall', 'Ntildesmall', 'Oacutesmall', 'Ogravesmall', 'Ocircumflexsmall', 'Odieresissmall', 'Otildesmall', 'Uacutesmall', 'Ugravesmall', 'Ucircumflexsmall', 'Udieresissmall', '', 'eightsuperior', 'fourinferior', 'threeinferior', 'sixinferior', 'eightinferior', 'seveninferior', 'Scaronsmall', '', 'centinferior', 'twoinferior', '', 'Dieresissmall', '', 'Caronsmall', 'osuperior', 'fiveinferior', '', 'commainferior', 'periodinferior', 'Yacutesmall', '', 'dollarinferior', '', 'Thornsmall', '', 'nineinferior', 'zeroinferior', 'Zcaronsmall', 'AEsmall', 'Oslashsmall', 'questiondownsmall', 'oneinferior', 'Lslashsmall', '', '', '', '', '', '', 'Cedillasmall', '', '', '', '', '', 'OEsmall', 'figuredash', 'hyphensuperior', '', '', '', '', 'exclamdownsmall', '', 'Ydieresissmall', '', 'onesuperior', 'twosuperior', 'threesuperior', 'foursuperior', 'fivesuperior', 'sixsuperior', 'sevensuperior', 'ninesuperior', 'zerosuperior', '', 'esuperior', 'rsuperior', 'tsuperior', '', '', 'isuperior', 'ssuperior', 'dsuperior', '', '', '', '', '', 'lsuperior', 'Ogoneksmall', 'Brevesmall', 'Macronsmall', 'bsuperior', 'nsuperior', 'msuperior', 'commasuperior', 'periodsuperior', 'Dotaccentsmall', 'Ringsmall'], MacRomanEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', '', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', 'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', 'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis', 'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve', 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex', 'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet', 'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute', 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal', 'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi', 'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash', 'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin', 'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis', '', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash', 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright', 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency', 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered', 'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex', 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute', 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron'], StandardEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'exclamdown', 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl', '', 'endash', 'dagger', 'daggerdbl', 'periodcentered', '', 'paragraph', 'bullet', 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright', 'ellipsis', 'perthousand', '', 'questiondown', '', 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', '', 'ring', 'cedilla', '', 'hungarumlaut', 'ogonek', 'caron', 'emdash', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'AE', '', 'ordfeminine', '', '', '', '', 'Lslash', 'Oslash', 'OE', 'ordmasculine', '', '', '', '', '', 'ae', '', '', '', 'dotlessi', '', '', 'lslash', 'oslash', 'oe', 'germandbls'], WinAnsiEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', 'bullet', 'Euro', 'bullet', 'quotesinglbase', 'florin', 'quotedblbase', 'ellipsis', 'dagger', 'daggerdbl', 'circumflex', 'perthousand', 'Scaron', 'guilsinglleft', 'OE', 'bullet', 'Zcaron', 'bullet', 'bullet', 'quoteleft', 'quoteright', 'quotedblleft', 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark', 'scaron', 'guilsinglright', 'oe', 'bullet', 'zcaron', 'Ydieresis', '', 'exclamdown', 'cent', 'sterling', 'currency', 'yen', 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine', 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron', 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu', 'paragraph', 'periodcentered', 'cedilla', 'onesuperior', 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf', 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex', 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex', 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex', 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls', 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring', 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis', 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde', 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide', 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute', 'thorn', 'ydieresis'], SymbolSetEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'universal', 'numbersign', 'existential', 'percent', 'ampersand', 'suchthat', 'parenleft', 'parenright', 'asteriskmath', 'plus', 'comma', 'minus', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'congruent', 'Alpha', 'Beta', 'Chi', 'Delta', 'Epsilon', 'Phi', 'Gamma', 'Eta', 'Iota', 'theta1', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Omicron', 'Pi', 'Theta', 'Rho', 'Sigma', 'Tau', 'Upsilon', 'sigma1', 'Omega', 'Xi', 'Psi', 'Zeta', 'bracketleft', 'therefore', 'bracketright', 'perpendicular', 'underscore', 'radicalex', 'alpha', 'beta', 'chi', 'delta', 'epsilon', 'phi', 'gamma', 'eta', 'iota', 'phi1', 'kappa', 'lambda', 'mu', 'nu', 'omicron', 'pi', 'theta', 'rho', 'sigma', 'tau', 'upsilon', 'omega1', 'omega', 'xi', 'psi', 'zeta', 'braceleft', 'bar', 'braceright', 'similar', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Euro', 'Upsilon1', 'minute', 'lessequal', 'fraction', 'infinity', 'florin', 'club', 'diamond', 'heart', 'spade', 'arrowboth', 'arrowleft', 'arrowup', 'arrowright', 'arrowdown', 'degree', 'plusminus', 'second', 'greaterequal', 'multiply', 'proportional', 'partialdiff', 'bullet', 'divide', 'notequal', 'equivalence', 'approxequal', 'ellipsis', 'arrowvertex', 'arrowhorizex', 'carriagereturn', 'aleph', 'Ifraktur', 'Rfraktur', 'weierstrass', 'circlemultiply', 'circleplus', 'emptyset', 'intersection', 'union', 'propersuperset', 'reflexsuperset', 'notsubset', 'propersubset', 'reflexsubset', 'element', 'notelement', 'angle', 'gradient', 'registerserif', 'copyrightserif', 'trademarkserif', 'product', 'radical', 'dotmath', 'logicalnot', 'logicaland', 'logicalor', 'arrowdblboth', 'arrowdblleft', 'arrowdblup', 'arrowdblright', 'arrowdbldown', 'lozenge', 'angleleft', 'registersans', 'copyrightsans', 'trademarksans', 'summation', 'parenlefttp', 'parenleftex', 'parenleftbt', 'bracketlefttp', 'bracketleftex', 'bracketleftbt', 'bracelefttp', 'braceleftmid', 'braceleftbt', 'braceex', '', 'angleright', 'integral', 'integraltp', 'integralex', 'integralbt', 'parenrighttp', 'parenrightex', 'parenrightbt', 'bracketrighttp', 'bracketrightex', 'bracketrightbt', 'bracerighttp', 'bracerightmid', 'bracerightbt'], zapfDingbatsEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'a1', 'a2', 'a202', 'a3', 'a4', 'a5', 'a119', 'a118', 'a117', 'a11', 'a12', 'a13', 'a14', 'a15', 'a16', 'a105', 'a17', 'a18', 'a19', 'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26', 'a27', 'a28', 'a6', 'a7', 'a8', 'a9', 'a10', 'a29', 'a30', 'a31', 'a32', 'a33', 'a34', 'a35', 'a36', 'a37', 'a38', 'a39', 'a40', 'a41', 'a42', 'a43', 'a44', 'a45', 'a46', 'a47', 'a48', 'a49', 'a50', 'a51', 'a52', 'a53', 'a54', 'a55', 'a56', 'a57', 'a58', 'a59', 'a60', 'a61', 'a62', 'a63', 'a64', 'a65', 'a66', 'a67', 'a68', 'a69', 'a70', 'a71', 'a72', 'a73', 'a74', 'a203', 'a75', 'a204', 'a76', 'a77', 'a78', 'a79', 'a81', 'a82', 'a83', 'a84', 'a97', 'a98', 'a99', 'a100', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'a101', 'a102', 'a103', 'a104', 'a106', 'a107', 'a108', 'a112', 'a111', 'a110', 'a109', 'a120', 'a121', 'a122', 'a123', 'a124', 'a125', 'a126', 'a127', 'a128', 'a129', 'a130', 'a131', 'a132', 'a133', 'a134', 'a135', 'a136', 'a137', 'a138', 'a139', 'a140', 'a141', 'a142', 'a143', 'a144', 'a145', 'a146', 'a147', 'a148', 'a149', 'a150', 'a151', 'a152', 'a153', 'a154', 'a155', 'a156', 'a157', 'a158', 'a159', 'a160', 'a161', 'a163', 'a164', 'a196', 'a165', 'a192', 'a166', 'a167', 'a168', 'a169', 'a170', 'a171', 'a172', 'a173', 'a162', 'a174', 'a175', 'a176', 'a177', 'a178', 'a179', 'a193', 'a180', 'a199', 'a181', 'a200', 'a182', '', 'a201', 'a183', 'a184', 'a197', 'a185', 'a194', 'a198', 'a186', 'a195', 'a187', 'a188', 'a189', 'a190', 'a191'] }; /** * Hold a map of decoded fonts and of the standard fourteen Type1 * fonts and their acronyms. */ var stdFontMap = { 'ArialNarrow': 'Helvetica', 'ArialNarrow-Bold': 'Helvetica-Bold', 'ArialNarrow-BoldItalic': 'Helvetica-BoldOblique', 'ArialNarrow-Italic': 'Helvetica-Oblique', 'ArialBlack': 'Helvetica', 'ArialBlack-Bold': 'Helvetica-Bold', 'ArialBlack-BoldItalic': 'Helvetica-BoldOblique', 'ArialBlack-Italic': 'Helvetica-Oblique', 'Arial': 'Helvetica', 'Arial-Bold': 'Helvetica-Bold', 'Arial-BoldItalic': 'Helvetica-BoldOblique', 'Arial-Italic': 'Helvetica-Oblique', 'Arial-BoldItalicMT': 'Helvetica-BoldOblique', 'Arial-BoldMT': 'Helvetica-Bold', 'Arial-ItalicMT': 'Helvetica-Oblique', 'ArialMT': 'Helvetica', 'Courier-Bold': 'Courier-Bold', 'Courier-BoldItalic': 'Courier-BoldOblique', 'Courier-Italic': 'Courier-Oblique', 'CourierNew': 'Courier', 'CourierNew-Bold': 'Courier-Bold', 'CourierNew-BoldItalic': 'Courier-BoldOblique', 'CourierNew-Italic': 'Courier-Oblique', 'CourierNewPS-BoldItalicMT': 'Courier-BoldOblique', 'CourierNewPS-BoldMT': 'Courier-Bold', 'CourierNewPS-ItalicMT': 'Courier-Oblique', 'CourierNewPSMT': 'Courier', 'Helvetica-Bold': 'Helvetica-Bold', 'Helvetica-BoldItalic': 'Helvetica-BoldOblique', 'Helvetica-Italic': 'Helvetica-Oblique', 'Symbol-Bold': 'Symbol', 'Symbol-BoldItalic': 'Symbol', 'Symbol-Italic': 'Symbol', 'TimesNewRoman': 'Times-Roman', 'TimesNewRoman-Bold': 'Times-Bold', 'TimesNewRoman-BoldItalic': 'Times-BoldItalic', 'TimesNewRoman-Italic': 'Times-Italic', 'TimesNewRomanPS': 'Times-Roman', 'TimesNewRomanPS-Bold': 'Times-Bold', 'TimesNewRomanPS-BoldItalic': 'Times-BoldItalic', 'TimesNewRomanPS-BoldItalicMT': 'Times-BoldItalic', 'TimesNewRomanPS-BoldMT': 'Times-Bold', 'TimesNewRomanPS-Italic': 'Times-Italic', 'TimesNewRomanPS-ItalicMT': 'Times-Italic', 'TimesNewRomanPSMT': 'Times-Roman', 'TimesNewRomanPSMT-Bold': 'Times-Bold', 'TimesNewRomanPSMT-BoldItalic': 'Times-BoldItalic', 'TimesNewRomanPSMT-Italic': 'Times-Italic' }; /** * Holds the map of the non-standard fonts that might be included as a standard * fonts without glyph data. */ var nonStdFontMap = { 'ComicSansMS': 'Comic Sans MS', 'ComicSansMS-Bold': 'Comic Sans MS-Bold', 'ComicSansMS-BoldItalic': 'Comic Sans MS-BoldItalic', 'ComicSansMS-Italic': 'Comic Sans MS-Italic', 'LucidaConsole': 'Courier', 'LucidaConsole-Bold': 'Courier-Bold', 'LucidaConsole-BoldItalic': 'Courier-BoldOblique', 'LucidaConsole-Italic': 'Courier-Oblique', 'MS-Gothic': 'MS Gothic', 'MS-Gothic-Bold': 'MS Gothic-Bold', 'MS-Gothic-BoldItalic': 'MS Gothic-BoldItalic', 'MS-Gothic-Italic': 'MS Gothic-Italic', 'MS-Mincho': 'MS Mincho', 'MS-Mincho-Bold': 'MS Mincho-Bold', 'MS-Mincho-BoldItalic': 'MS Mincho-BoldItalic', 'MS-Mincho-Italic': 'MS Mincho-Italic', 'MS-PGothic': 'MS PGothic', 'MS-PGothic-Bold': 'MS PGothic-Bold', 'MS-PGothic-BoldItalic': 'MS PGothic-BoldItalic', 'MS-PGothic-Italic': 'MS PGothic-Italic', 'MS-PMincho': 'MS PMincho', 'MS-PMincho-Bold': 'MS PMincho-Bold', 'MS-PMincho-BoldItalic': 'MS PMincho-BoldItalic', 'MS-PMincho-Italic': 'MS PMincho-Italic', }; var serifFonts = { 'Adobe Jenson': true, 'Adobe Text': true, 'Albertus': true, 'Aldus': true, 'Alexandria': true, 'Algerian': true, 'American Typewriter': true, 'Antiqua': true, 'Apex': true, 'Arno': true, 'Aster': true, 'Aurora': true, 'Baskerville': true, 'Bell': true, 'Bembo': true, 'Bembo Schoolbook': true, 'Benguiat': true, 'Berkeley Old Style': true, 'Bernhard Modern': true, 'Berthold City': true, 'Bodoni': true, 'Bauer Bodoni': true, 'Book Antiqua': true, 'Bookman': true, 'Bordeaux Roman': true, 'Californian FB': true, 'Calisto': true, 'Calvert': true, 'Capitals': true, 'Cambria': true, 'Cartier': true, 'Caslon': true, 'Catull': true, 'Centaur': true, 'Century Old Style': true, 'Century Schoolbook': true, 'Chaparral': true, 'Charis SIL': true, 'Cheltenham': true, 'Cholla Slab': true, 'Clarendon': true, 'Clearface': true, 'Cochin': true, 'Colonna': true, 'Computer Modern': true, 'Concrete Roman': true, 'Constantia': true, 'Cooper Black': true, 'Corona': true, 'Ecotype': true, 'Egyptienne': true, 'Elephant': true, 'Excelsior': true, 'Fairfield': true, 'FF Scala': true, 'Folkard': true, 'Footlight': true, 'FreeSerif': true, 'Friz Quadrata': true, 'Garamond': true, 'Gentium': true, 'Georgia': true, 'Gloucester': true, 'Goudy Old Style': true, 'Goudy Schoolbook': true, 'Goudy Pro Font': true, 'Granjon': true, 'Guardian Egyptian': true, 'Heather': true, 'Hercules': true, 'High Tower Text': true, 'Hiroshige': true, 'Hoefler Text': true, 'Humana Serif': true, 'Imprint': true, 'Ionic No. 5': true, 'Janson': true, 'Joanna': true, 'Korinna': true, 'Lexicon': true, 'Liberation Serif': true, 'Linux Libertine': true, 'Literaturnaya': true, 'Lucida': true, 'Lucida Bright': true, 'Melior': true, 'Memphis': true, 'Miller': true, 'Minion': true, 'Modern': true, 'Mona Lisa': true, 'Mrs Eaves': true, 'MS Serif': true, 'Museo Slab': true, 'New York': true, 'Nimbus Roman': true, 'NPS Rawlinson Roadway': true, 'Palatino': true, 'Perpetua': true, 'Plantin': true, 'Plantin Schoolbook': true, 'Playbill': true, 'Poor Richard': true, 'Rawlinson Roadway': true, 'Renault': true, 'Requiem': true, 'Rockwell': true, 'Roman': true, 'Rotis Serif': true, 'Sabon': true, 'Scala': true, 'Seagull': true, 'Sistina': true, 'Souvenir': true, 'STIX': true, 'Stone Informal': true, 'Stone Serif': true, 'Sylfaen': true, 'Times': true, 'Trajan': true, 'Trinité': true, 'Trump Mediaeval': true, 'Utopia': true, 'Vale Type': true, 'Bitstream Vera': true, 'Vera Serif': true, 'Versailles': true, 'Wanted': true, 'Weiss': true, 'Wide Latin': true, 'Windsor': true, 'XITS': true }; var symbolsFonts = { 'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true }; var CMapConverterList = { 'H': jis7ToUnicode, 'V': jis7ToUnicode, 'EUC-H': eucjpToUnicode, 'EUC-V': eucjpToUnicode, '83pv-RKSJ-H': sjis83pvToUnicode, '90pv-RKSJ-H': sjis90pvToUnicode, '90ms-RKSJ-H': sjisToUnicode, '90ms-RKSJ-V': sjisToUnicode, '90msp-RKSJ-H': sjisToUnicode, '90msp-RKSJ-V': sjisToUnicode, 'GBK-EUC-H': gbkToUnicode, 'GBKp-EUC-H': gbkToUnicode, 'B5pc-H': big5ToUnicode, 'ETenms-B5-H': big5ToUnicode, 'ETenms-B5-V': big5ToUnicode, }; // CMaps using Hankaku (Halfwidth) Latin glyphs instead of proportional one. // We need to distinguish them to get correct widths from CIDFont dicts. var HalfwidthCMaps = { 'H': true, 'V': true, 'EUC-H': true, 'EUC-V': true, '90ms-RKSJ-H': true, '90ms-RKSJ-V': true, 'UniJIS-UCS2-HW-H': true, 'UniJIS-UCS2-HW-V': true }; var decodeBytes; if (typeof TextDecoder !== 'undefined') { // The encodings supported by TextDecoder can be found at: // http://encoding.spec.whatwg.org/#concept-encoding-get decodeBytes = function(bytes, encoding, fatal) { return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes); }; } else if (typeof FileReaderSync !== 'undefined') { decodeBytes = function(bytes, encoding) { return new FileReaderSync().readAsText(new Blob([bytes]), encoding); }; } else { // Clear the list so that decodeBytes will never be called. CMapConverterList = {}; } function jis7ToUnicode(str) { var bytes = stringToBytes(str); var length = bytes.length; for (var i = 0; i < length; ++i) { bytes[i] |= 0x80; } return decodeBytes(bytes, 'euc-jp'); } function eucjpToUnicode(str) { return decodeBytes(stringToBytes(str), 'euc-jp'); } function sjisToUnicode(str) { return decodeBytes(stringToBytes(str), 'shift_jis'); } function sjis83pvToUnicode(str) { var bytes = stringToBytes(str); try { // TODO: 83pv has incompatible mappings in ed40..ee9c range. return decodeBytes(bytes, 'shift_jis', true); } catch (e) { TODO('Unsupported 83pv character found'); // Just retry without checking errors for now. return decodeBytes(bytes, 'shift_jis'); } } function sjis90pvToUnicode(str) { var bytes = stringToBytes(str); try { // TODO: 90pv has incompatible mappings in 8740..879c and eb41..ee9c. return decodeBytes(bytes, 'shift_jis', true); } catch (e) { TODO('Unsupported 90pv character found'); // Just retry without checking errors for now. return decodeBytes(bytes, 'shift_jis'); } } function gbkToUnicode(str) { return decodeBytes(stringToBytes(str), 'gbk'); } function big5ToUnicode(str) { return decodeBytes(stringToBytes(str), 'big5'); } // Some characters, e.g. copyrightserif, mapped to the private use area and // might not be displayed using standard fonts. Mapping/hacking well-known chars // to the similar equivalents in the normal characters range. function mapPrivateUseChars(code) { switch (code) { case 0xF8E9: // copyrightsans case 0xF6D9: // copyrightserif return 0x00A9; // copyright default: return code; } } var UnicodeRanges = [ { 'begin': 0x0000, 'end': 0x007F }, // Basic Latin { 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement { 'begin': 0x0100, 'end': 0x017F }, // Latin Extended-A { 'begin': 0x0180, 'end': 0x024F }, // Latin Extended-B { 'begin': 0x0250, 'end': 0x02AF }, // IPA Extensions { 'begin': 0x02B0, 'end': 0x02FF }, // Spacing Modifier Letters { 'begin': 0x0300, 'end': 0x036F }, // Combining Diacritical Marks { 'begin': 0x0370, 'end': 0x03FF }, // Greek and Coptic { 'begin': 0x2C80, 'end': 0x2CFF }, // Coptic { 'begin': 0x0400, 'end': 0x04FF }, // Cyrillic { 'begin': 0x0530, 'end': 0x058F }, // Armenian { 'begin': 0x0590, 'end': 0x05FF }, // Hebrew { 'begin': 0xA500, 'end': 0xA63F }, // Vai { 'begin': 0x0600, 'end': 0x06FF }, // Arabic { 'begin': 0x07C0, 'end': 0x07FF }, // NKo { 'begin': 0x0900, 'end': 0x097F }, // Devanagari { 'begin': 0x0980, 'end': 0x09FF }, // Bengali { 'begin': 0x0A00, 'end': 0x0A7F }, // Gurmukhi { 'begin': 0x0A80, 'end': 0x0AFF }, // Gujarati { 'begin': 0x0B00, 'end': 0x0B7F }, // Oriya { 'begin': 0x0B80, 'end': 0x0BFF }, // Tamil { 'begin': 0x0C00, 'end': 0x0C7F }, // Telugu { 'begin': 0x0C80, 'end': 0x0CFF }, // Kannada { 'begin': 0x0D00, 'end': 0x0D7F }, // Malayalam { 'begin': 0x0E00, 'end': 0x0E7F }, // Thai { 'begin': 0x0E80, 'end': 0x0EFF }, // Lao { 'begin': 0x10A0, 'end': 0x10FF }, // Georgian { 'begin': 0x1B00, 'end': 0x1B7F }, // Balinese { 'begin': 0x1100, 'end': 0x11FF }, // Hangul Jamo { 'begin': 0x1E00, 'end': 0x1EFF }, // Latin Extended Additional { 'begin': 0x1F00, 'end': 0x1FFF }, // Greek Extended { 'begin': 0x2000, 'end': 0x206F }, // General Punctuation { 'begin': 0x2070, 'end': 0x209F }, // Superscripts And Subscripts { 'begin': 0x20A0, 'end': 0x20CF }, // Currency Symbol { 'begin': 0x20D0, 'end': 0x20FF }, // Combining Diacritical Marks For Symbols { 'begin': 0x2100, 'end': 0x214F }, // Letterlike Symbols { 'begin': 0x2150, 'end': 0x218F }, // Number Forms { 'begin': 0x2190, 'end': 0x21FF }, // Arrows { 'begin': 0x2200, 'end': 0x22FF }, // Mathematical Operators { 'begin': 0x2300, 'end': 0x23FF }, // Miscellaneous Technical { 'begin': 0x2400, 'end': 0x243F }, // Control Pictures { 'begin': 0x2440, 'end': 0x245F }, // Optical Character Recognition { 'begin': 0x2460, 'end': 0x24FF }, // Enclosed Alphanumerics { 'begin': 0x2500, 'end': 0x257F }, // Box Drawing { 'begin': 0x2580, 'end': 0x259F }, // Block Elements { 'begin': 0x25A0, 'end': 0x25FF }, // Geometric Shapes { 'begin': 0x2600, 'end': 0x26FF }, // Miscellaneous Symbols { 'begin': 0x2700, 'end': 0x27BF }, // Dingbats { 'begin': 0x3000, 'end': 0x303F }, // CJK Symbols And Punctuation { 'begin': 0x3040, 'end': 0x309F }, // Hiragana { 'begin': 0x30A0, 'end': 0x30FF }, // Katakana { 'begin': 0x3100, 'end': 0x312F }, // Bopomofo { 'begin': 0x3130, 'end': 0x318F }, // Hangul Compatibility Jamo { 'begin': 0xA840, 'end': 0xA87F }, // Phags-pa { 'begin': 0x3200, 'end': 0x32FF }, // Enclosed CJK Letters And Months { 'begin': 0x3300, 'end': 0x33FF }, // CJK Compatibility { 'begin': 0xAC00, 'end': 0xD7AF }, // Hangul Syllables { 'begin': 0xD800, 'end': 0xDFFF }, // Non-Plane 0 * { 'begin': 0x10900, 'end': 0x1091F }, // Phoenicia { 'begin': 0x4E00, 'end': 0x9FFF }, // CJK Unified Ideographs { 'begin': 0xE000, 'end': 0xF8FF }, // Private Use Area (plane 0) { 'begin': 0x31C0, 'end': 0x31EF }, // CJK Strokes { 'begin': 0xFB00, 'end': 0xFB4F }, // Alphabetic Presentation Forms { 'begin': 0xFB50, 'end': 0xFDFF }, // Arabic Presentation Forms-A { 'begin': 0xFE20, 'end': 0xFE2F }, // Combining Half Marks { 'begin': 0xFE10, 'end': 0xFE1F }, // Vertical Forms { 'begin': 0xFE50, 'end': 0xFE6F }, // Small Form Variants { 'begin': 0xFE70, 'end': 0xFEFF }, // Arabic Presentation Forms-B { 'begin': 0xFF00, 'end': 0xFFEF }, // Halfwidth And Fullwidth Forms { 'begin': 0xFFF0, 'end': 0xFFFF }, // Specials { 'begin': 0x0F00, 'end': 0x0FFF }, // Tibetan { 'begin': 0x0700, 'end': 0x074F }, // Syriac { 'begin': 0x0780, 'end': 0x07BF }, // Thaana { 'begin': 0x0D80, 'end': 0x0DFF }, // Sinhala { 'begin': 0x1000, 'end': 0x109F }, // Myanmar { 'begin': 0x1200, 'end': 0x137F }, // Ethiopic { 'begin': 0x13A0, 'end': 0x13FF }, // Cherokee { 'begin': 0x1400, 'end': 0x167F }, // Unified Canadian Aboriginal Syllabics { 'begin': 0x1680, 'end': 0x169F }, // Ogham { 'begin': 0x16A0, 'end': 0x16FF }, // Runic { 'begin': 0x1780, 'end': 0x17FF }, // Khmer { 'begin': 0x1800, 'end': 0x18AF }, // Mongolian { 'begin': 0x2800, 'end': 0x28FF }, // Braille Patterns { 'begin': 0xA000, 'end': 0xA48F }, // Yi Syllables { 'begin': 0x1700, 'end': 0x171F }, // Tagalog { 'begin': 0x10300, 'end': 0x1032F }, // Old Italic { 'begin': 0x10330, 'end': 0x1034F }, // Gothic { 'begin': 0x10400, 'end': 0x1044F }, // Deseret { 'begin': 0x1D000, 'end': 0x1D0FF }, // Byzantine Musical Symbols { 'begin': 0x1D400, 'end': 0x1D7FF }, // Mathematical Alphanumeric Symbols { 'begin': 0xFF000, 'end': 0xFFFFD }, // Private Use (plane 15) { 'begin': 0xFE00, 'end': 0xFE0F }, // Variation Selectors { 'begin': 0xE0000, 'end': 0xE007F }, // Tags { 'begin': 0x1900, 'end': 0x194F }, // Limbu { 'begin': 0x1950, 'end': 0x197F }, // Tai Le { 'begin': 0x1980, 'end': 0x19DF }, // New Tai Lue { 'begin': 0x1A00, 'end': 0x1A1F }, // Buginese { 'begin': 0x2C00, 'end': 0x2C5F }, // Glagolitic { 'begin': 0x2D30, 'end': 0x2D7F }, // Tifinagh { 'begin': 0x4DC0, 'end': 0x4DFF }, // Yijing Hexagram Symbols { 'begin': 0xA800, 'end': 0xA82F }, // Syloti Nagri { 'begin': 0x10000, 'end': 0x1007F }, // Linear B Syllabary { 'begin': 0x10140, 'end': 0x1018F }, // Ancient Greek Numbers { 'begin': 0x10380, 'end': 0x1039F }, // Ugaritic { 'begin': 0x103A0, 'end': 0x103DF }, // Old Persian { 'begin': 0x10450, 'end': 0x1047F }, // Shavian { 'begin': 0x10480, 'end': 0x104AF }, // Osmanya { 'begin': 0x10800, 'end': 0x1083F }, // Cypriot Syllabary { 'begin': 0x10A00, 'end': 0x10A5F }, // Kharoshthi { 'begin': 0x1D300, 'end': 0x1D35F }, // Tai Xuan Jing Symbols { 'begin': 0x12000, 'end': 0x123FF }, // Cuneiform { 'begin': 0x1D360, 'end': 0x1D37F }, // Counting Rod Numerals { 'begin': 0x1B80, 'end': 0x1BBF }, // Sundanese { 'begin': 0x1C00, 'end': 0x1C4F }, // Lepcha { 'begin': 0x1C50, 'end': 0x1C7F }, // Ol Chiki { 'begin': 0xA880, 'end': 0xA8DF }, // Saurashtra { 'begin': 0xA900, 'end': 0xA92F }, // Kayah Li { 'begin': 0xA930, 'end': 0xA95F }, // Rejang { 'begin': 0xAA00, 'end': 0xAA5F }, // Cham { 'begin': 0x10190, 'end': 0x101CF }, // Ancient Symbols { 'begin': 0x101D0, 'end': 0x101FF }, // Phaistos Disc { 'begin': 0x102A0, 'end': 0x102DF }, // Carian { 'begin': 0x1F030, 'end': 0x1F09F } // Domino Tiles ]; var MacStandardGlyphOrdering = [ '.notdef', '.null', 'nonmarkingreturn', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', 'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', 'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis', 'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve', 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex', 'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet', 'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute', 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal', 'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi', 'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash', 'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin', 'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis', 'nonbreakingspace', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash', 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright', 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency', 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered', 'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex', 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute', 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron', 'Lslash', 'lslash', 'Scaron', 'scaron', 'Zcaron', 'zcaron', 'brokenbar', 'Eth', 'eth', 'Yacute', 'yacute', 'Thorn', 'thorn', 'minus', 'multiply', 'onesuperior', 'twosuperior', 'threesuperior', 'onehalf', 'onequarter', 'threequarters', 'franc', 'Gbreve', 'gbreve', 'Idotaccent', 'Scedilla', 'scedilla', 'Cacute', 'cacute', 'Ccaron', 'ccaron', 'dcroat']; function getUnicodeRangeFor(value) { for (var i = 0, ii = UnicodeRanges.length; i < ii; i++) { var range = UnicodeRanges[i]; if (value >= range.begin && value < range.end) return i; } return -1; } function isRTLRangeFor(value) { var range = UnicodeRanges[13]; if (value >= range.begin && value < range.end) return true; range = UnicodeRanges[11]; if (value >= range.begin && value < range.end) return true; return false; } function isSpecialUnicode(unicode) { return (unicode <= 0x1F || (unicode >= 127 && unicode < GLYPH_AREA_SIZE)) || (unicode >= CMAP_GLYPH_OFFSET && unicode < CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE); } // The normalization table is obtained by filtering the Unicode characters // database with <compat> entries. var NormalizedUnicodes = { '\u00A8': '\u0020\u0308', '\u00AF': '\u0020\u0304', '\u00B4': '\u0020\u0301', '\u00B5': '\u03BC', '\u00B8': '\u0020\u0327', '\u0132': '\u0049\u004A', '\u0133': '\u0069\u006A', '\u013F': '\u004C\u00B7', '\u0140': '\u006C\u00B7', '\u0149': '\u02BC\u006E', '\u017F': '\u0073', '\u01C4': '\u0044\u017D', '\u01C5': '\u0044\u017E', '\u01C6': '\u0064\u017E', '\u01C7': '\u004C\u004A', '\u01C8': '\u004C\u006A', '\u01C9': '\u006C\u006A', '\u01CA': '\u004E\u004A', '\u01CB': '\u004E\u006A', '\u01CC': '\u006E\u006A', '\u01F1': '\u0044\u005A', '\u01F2': '\u0044\u007A', '\u01F3': '\u0064\u007A', '\u02D8': '\u0020\u0306', '\u02D9': '\u0020\u0307', '\u02DA': '\u0020\u030A', '\u02DB': '\u0020\u0328', '\u02DC': '\u0020\u0303', '\u02DD': '\u0020\u030B', '\u037A': '\u0020\u0345', '\u0384': '\u0020\u0301', '\u03D0': '\u03B2', '\u03D1': '\u03B8', '\u03D2': '\u03A5', '\u03D5': '\u03C6', '\u03D6': '\u03C0', '\u03F0': '\u03BA', '\u03F1': '\u03C1', '\u03F2': '\u03C2', '\u03F4': '\u0398', '\u03F5': '\u03B5', '\u03F9': '\u03A3', '\u0587': '\u0565\u0582', '\u0675': '\u0627\u0674', '\u0676': '\u0648\u0674', '\u0677': '\u06C7\u0674', '\u0678': '\u064A\u0674', '\u0E33': '\u0E4D\u0E32', '\u0EB3': '\u0ECD\u0EB2', '\u0EDC': '\u0EAB\u0E99', '\u0EDD': '\u0EAB\u0EA1', '\u0F77': '\u0FB2\u0F81', '\u0F79': '\u0FB3\u0F81', '\u1E9A': '\u0061\u02BE', '\u1FBD': '\u0020\u0313', '\u1FBF': '\u0020\u0313', '\u1FC0': '\u0020\u0342', '\u1FFE': '\u0020\u0314', '\u2002': '\u0020', '\u2003': '\u0020', '\u2004': '\u0020', '\u2005': '\u0020', '\u2006': '\u0020', '\u2008': '\u0020', '\u2009': '\u0020', '\u200A': '\u0020', '\u2017': '\u0020\u0333', '\u2024': '\u002E', '\u2025': '\u002E\u002E', '\u2026': '\u002E\u002E\u002E', '\u2033': '\u2032\u2032', '\u2034': '\u2032\u2032\u2032', '\u2036': '\u2035\u2035', '\u2037': '\u2035\u2035\u2035', '\u203C': '\u0021\u0021', '\u203E': '\u0020\u0305', '\u2047': '\u003F\u003F', '\u2048': '\u003F\u0021', '\u2049': '\u0021\u003F', '\u2057': '\u2032\u2032\u2032\u2032', '\u205F': '\u0020', '\u20A8': '\u0052\u0073', '\u2100': '\u0061\u002F\u0063', '\u2101': '\u0061\u002F\u0073', '\u2103': '\u00B0\u0043', '\u2105': '\u0063\u002F\u006F', '\u2106': '\u0063\u002F\u0075', '\u2107': '\u0190', '\u2109': '\u00B0\u0046', '\u2116': '\u004E\u006F', '\u2121': '\u0054\u0045\u004C', '\u2135': '\u05D0', '\u2136': '\u05D1', '\u2137': '\u05D2', '\u2138': '\u05D3', '\u213B': '\u0046\u0041\u0058', '\u2160': '\u0049', '\u2161': '\u0049\u0049', '\u2162': '\u0049\u0049\u0049', '\u2163': '\u0049\u0056', '\u2164': '\u0056', '\u2165': '\u0056\u0049', '\u2166': '\u0056\u0049\u0049', '\u2167': '\u0056\u0049\u0049\u0049', '\u2168': '\u0049\u0058', '\u2169': '\u0058', '\u216A': '\u0058\u0049', '\u216B': '\u0058\u0049\u0049', '\u216C': '\u004C', '\u216D': '\u0043', '\u216E': '\u0044', '\u216F': '\u004D', '\u2170': '\u0069', '\u2171': '\u0069\u0069', '\u2172': '\u0069\u0069\u0069', '\u2173': '\u0069\u0076', '\u2174': '\u0076', '\u2175': '\u0076\u0069', '\u2176': '\u0076\u0069\u0069', '\u2177': '\u0076\u0069\u0069\u0069', '\u2178': '\u0069\u0078', '\u2179': '\u0078', '\u217A': '\u0078\u0069', '\u217B': '\u0078\u0069\u0069', '\u217C': '\u006C', '\u217D': '\u0063', '\u217E': '\u0064', '\u217F': '\u006D', '\u222C': '\u222B\u222B', '\u222D': '\u222B\u222B\u222B', '\u222F': '\u222E\u222E', '\u2230': '\u222E\u222E\u222E', '\u2474': '\u0028\u0031\u0029', '\u2475': '\u0028\u0032\u0029', '\u2476': '\u0028\u0033\u0029', '\u2477': '\u0028\u0034\u0029', '\u2478': '\u0028\u0035\u0029', '\u2479': '\u0028\u0036\u0029', '\u247A': '\u0028\u0037\u0029', '\u247B': '\u0028\u0038\u0029', '\u247C': '\u0028\u0039\u0029', '\u247D': '\u0028\u0031\u0030\u0029', '\u247E': '\u0028\u0031\u0031\u0029', '\u247F': '\u0028\u0031\u0032\u0029', '\u2480': '\u0028\u0031\u0033\u0029', '\u2481': '\u0028\u0031\u0034\u0029', '\u2482': '\u0028\u0031\u0035\u0029', '\u2483': '\u0028\u0031\u0036\u0029', '\u2484': '\u0028\u0031\u0037\u0029', '\u2485': '\u0028\u0031\u0038\u0029', '\u2486': '\u0028\u0031\u0039\u0029', '\u2487': '\u0028\u0032\u0030\u0029', '\u2488': '\u0031\u002E', '\u2489': '\u0032\u002E', '\u248A': '\u0033\u002E', '\u248B': '\u0034\u002E', '\u248C': '\u0035\u002E', '\u248D': '\u0036\u002E', '\u248E': '\u0037\u002E', '\u248F': '\u0038\u002E', '\u2490': '\u0039\u002E', '\u2491': '\u0031\u0030\u002E', '\u2492': '\u0031\u0031\u002E', '\u2493': '\u0031\u0032\u002E', '\u2494': '\u0031\u0033\u002E', '\u2495': '\u0031\u0034\u002E', '\u2496': '\u0031\u0035\u002E', '\u2497': '\u0031\u0036\u002E', '\u2498': '\u0031\u0037\u002E', '\u2499': '\u0031\u0038\u002E', '\u249A': '\u0031\u0039\u002E', '\u249B': '\u0032\u0030\u002E', '\u249C': '\u0028\u0061\u0029', '\u249D': '\u0028\u0062\u0029', '\u249E': '\u0028\u0063\u0029', '\u249F': '\u0028\u0064\u0029', '\u24A0': '\u0028\u0065\u0029', '\u24A1': '\u0028\u0066\u0029', '\u24A2': '\u0028\u0067\u0029', '\u24A3': '\u0028\u0068\u0029', '\u24A4': '\u0028\u0069\u0029', '\u24A5': '\u0028\u006A\u0029', '\u24A6': '\u0028\u006B\u0029', '\u24A7': '\u0028\u006C\u0029', '\u24A8': '\u0028\u006D\u0029', '\u24A9': '\u0028\u006E\u0029', '\u24AA': '\u0028\u006F\u0029', '\u24AB': '\u0028\u0070\u0029', '\u24AC': '\u0028\u0071\u0029', '\u24AD': '\u0028\u0072\u0029', '\u24AE': '\u0028\u0073\u0029', '\u24AF': '\u0028\u0074\u0029', '\u24B0': '\u0028\u0075\u0029', '\u24B1': '\u0028\u0076\u0029', '\u24B2': '\u0028\u0077\u0029', '\u24B3': '\u0028\u0078\u0029', '\u24B4': '\u0028\u0079\u0029', '\u24B5': '\u0028\u007A\u0029', '\u2A0C': '\u222B\u222B\u222B\u222B', '\u2A74': '\u003A\u003A\u003D', '\u2A75': '\u003D\u003D', '\u2A76': '\u003D\u003D\u003D', '\u2E9F': '\u6BCD', '\u2EF3': '\u9F9F', '\u2F00': '\u4E00', '\u2F01': '\u4E28', '\u2F02': '\u4E36', '\u2F03': '\u4E3F', '\u2F04': '\u4E59', '\u2F05': '\u4E85', '\u2F06': '\u4E8C', '\u2F07': '\u4EA0', '\u2F08': '\u4EBA', '\u2F09': '\u513F', '\u2F0A': '\u5165', '\u2F0B': '\u516B', '\u2F0C': '\u5182', '\u2F0D': '\u5196', '\u2F0E': '\u51AB', '\u2F0F': '\u51E0', '\u2F10': '\u51F5', '\u2F11': '\u5200', '\u2F12': '\u529B', '\u2F13': '\u52F9', '\u2F14': '\u5315', '\u2F15': '\u531A', '\u2F16': '\u5338', '\u2F17': '\u5341', '\u2F18': '\u535C', '\u2F19': '\u5369', '\u2F1A': '\u5382', '\u2F1B': '\u53B6', '\u2F1C': '\u53C8', '\u2F1D': '\u53E3', '\u2F1E': '\u56D7', '\u2F1F': '\u571F', '\u2F20': '\u58EB', '\u2F21': '\u5902', '\u2F22': '\u590A', '\u2F23': '\u5915', '\u2F24': '\u5927', '\u2F25': '\u5973', '\u2F26': '\u5B50', '\u2F27': '\u5B80', '\u2F28': '\u5BF8', '\u2F29': '\u5C0F', '\u2F2A': '\u5C22', '\u2F2B': '\u5C38', '\u2F2C': '\u5C6E', '\u2F2D': '\u5C71', '\u2F2E': '\u5DDB', '\u2F2F': '\u5DE5', '\u2F30': '\u5DF1', '\u2F31': '\u5DFE', '\u2F32': '\u5E72', '\u2F33': '\u5E7A', '\u2F34': '\u5E7F', '\u2F35': '\u5EF4', '\u2F36': '\u5EFE', '\u2F37': '\u5F0B', '\u2F38': '\u5F13', '\u2F39': '\u5F50', '\u2F3A': '\u5F61', '\u2F3B': '\u5F73', '\u2F3C': '\u5FC3', '\u2F3D': '\u6208', '\u2F3E': '\u6236', '\u2F3F': '\u624B', '\u2F40': '\u652F', '\u2F41': '\u6534', '\u2F42': '\u6587', '\u2F43': '\u6597', '\u2F44': '\u65A4', '\u2F45': '\u65B9', '\u2F46': '\u65E0', '\u2F47': '\u65E5', '\u2F48': '\u66F0', '\u2F49': '\u6708', '\u2F4A': '\u6728', '\u2F4B': '\u6B20', '\u2F4C': '\u6B62', '\u2F4D': '\u6B79', '\u2F4E': '\u6BB3', '\u2F4F': '\u6BCB', '\u2F50': '\u6BD4', '\u2F51': '\u6BDB', '\u2F52': '\u6C0F', '\u2F53': '\u6C14', '\u2F54': '\u6C34', '\u2F55': '\u706B', '\u2F56': '\u722A', '\u2F57': '\u7236', '\u2F58': '\u723B', '\u2F59': '\u723F', '\u2F5A': '\u7247', '\u2F5B': '\u7259', '\u2F5C': '\u725B', '\u2F5D': '\u72AC', '\u2F5E': '\u7384', '\u2F5F': '\u7389', '\u2F60': '\u74DC', '\u2F61': '\u74E6', '\u2F62': '\u7518', '\u2F63': '\u751F', '\u2F64': '\u7528', '\u2F65': '\u7530', '\u2F66': '\u758B', '\u2F67': '\u7592', '\u2F68': '\u7676', '\u2F69': '\u767D', '\u2F6A': '\u76AE', '\u2F6B': '\u76BF', '\u2F6C': '\u76EE', '\u2F6D': '\u77DB', '\u2F6E': '\u77E2', '\u2F6F': '\u77F3', '\u2F70': '\u793A', '\u2F71': '\u79B8', '\u2F72': '\u79BE', '\u2F73': '\u7A74', '\u2F74': '\u7ACB', '\u2F75': '\u7AF9', '\u2F76': '\u7C73', '\u2F77': '\u7CF8', '\u2F78': '\u7F36', '\u2F79': '\u7F51', '\u2F7A': '\u7F8A', '\u2F7B': '\u7FBD', '\u2F7C': '\u8001', '\u2F7D': '\u800C', '\u2F7E': '\u8012', '\u2F7F': '\u8033', '\u2F80': '\u807F', '\u2F81': '\u8089', '\u2F82': '\u81E3', '\u2F83': '\u81EA', '\u2F84': '\u81F3', '\u2F85': '\u81FC', '\u2F86': '\u820C', '\u2F87': '\u821B', '\u2F88': '\u821F', '\u2F89': '\u826E', '\u2F8A': '\u8272', '\u2F8B': '\u8278', '\u2F8C': '\u864D', '\u2F8D': '\u866B', '\u2F8E': '\u8840', '\u2F8F': '\u884C', '\u2F90': '\u8863', '\u2F91': '\u897E', '\u2F92': '\u898B', '\u2F93': '\u89D2', '\u2F94': '\u8A00', '\u2F95': '\u8C37', '\u2F96': '\u8C46', '\u2F97': '\u8C55', '\u2F98': '\u8C78', '\u2F99': '\u8C9D', '\u2F9A': '\u8D64', '\u2F9B': '\u8D70', '\u2F9C': '\u8DB3', '\u2F9D': '\u8EAB', '\u2F9E': '\u8ECA', '\u2F9F': '\u8F9B', '\u2FA0': '\u8FB0', '\u2FA1': '\u8FB5', '\u2FA2': '\u9091', '\u2FA3': '\u9149', '\u2FA4': '\u91C6', '\u2FA5': '\u91CC', '\u2FA6': '\u91D1', '\u2FA7': '\u9577', '\u2FA8': '\u9580', '\u2FA9': '\u961C', '\u2FAA': '\u96B6', '\u2FAB': '\u96B9', '\u2FAC': '\u96E8', '\u2FAD': '\u9751', '\u2FAE': '\u975E', '\u2FAF': '\u9762', '\u2FB0': '\u9769', '\u2FB1': '\u97CB', '\u2FB2': '\u97ED', '\u2FB3': '\u97F3', '\u2FB4': '\u9801', '\u2FB5': '\u98A8', '\u2FB6': '\u98DB', '\u2FB7': '\u98DF', '\u2FB8': '\u9996', '\u2FB9': '\u9999', '\u2FBA': '\u99AC', '\u2FBB': '\u9AA8', '\u2FBC': '\u9AD8', '\u2FBD': '\u9ADF', '\u2FBE': '\u9B25', '\u2FBF': '\u9B2F', '\u2FC0': '\u9B32', '\u2FC1': '\u9B3C', '\u2FC2': '\u9B5A', '\u2FC3': '\u9CE5', '\u2FC4': '\u9E75', '\u2FC5': '\u9E7F', '\u2FC6': '\u9EA5', '\u2FC7': '\u9EBB', '\u2FC8': '\u9EC3', '\u2FC9': '\u9ECD', '\u2FCA': '\u9ED1', '\u2FCB': '\u9EF9', '\u2FCC': '\u9EFD', '\u2FCD': '\u9F0E', '\u2FCE': '\u9F13', '\u2FCF': '\u9F20', '\u2FD0': '\u9F3B', '\u2FD1': '\u9F4A', '\u2FD2': '\u9F52', '\u2FD3': '\u9F8D', '\u2FD4': '\u9F9C', '\u2FD5': '\u9FA0', '\u3036': '\u3012', '\u3038': '\u5341', '\u3039': '\u5344', '\u303A': '\u5345', '\u309B': '\u0020\u3099', '\u309C': '\u0020\u309A', '\u3131': '\u1100', '\u3132': '\u1101', '\u3133': '\u11AA', '\u3134': '\u1102', '\u3135': '\u11AC', '\u3136': '\u11AD', '\u3137': '\u1103', '\u3138': '\u1104', '\u3139': '\u1105', '\u313A': '\u11B0', '\u313B': '\u11B1', '\u313C': '\u11B2', '\u313D': '\u11B3', '\u313E': '\u11B4', '\u313F': '\u11B5', '\u3140': '\u111A', '\u3141': '\u1106', '\u3142': '\u1107', '\u3143': '\u1108', '\u3144': '\u1121', '\u3145': '\u1109', '\u3146': '\u110A', '\u3147': '\u110B', '\u3148': '\u110C', '\u3149': '\u110D', '\u314A': '\u110E', '\u314B': '\u110F', '\u314C': '\u1110', '\u314D': '\u1111', '\u314E': '\u1112',