pdf3json
Version:
A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js
1,182 lines (1,154 loc) • 252 kB
JavaScript
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
ExpertSubsetCharset, FileReaderSync, GlyphsUnicode,
info, isArray, isNum, ISOAdobeCharset, Stream,
stringToBytes, TextDecoder, TODO, warn, Lexer, Util,
FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */
'use strict';
// Unicode Private Use Area
var CMAP_GLYPH_OFFSET = 0xE000;
var GLYPH_AREA_SIZE = 0x1900;
var SYMBOLIC_FONT_GLYPH_OFFSET = 0xF000;
// PDF Glyph Space Units are one Thousandth of a TextSpace Unit
// except for Type 3 fonts
var PDF_GLYPH_SPACE_UNITS = 1000;
// Hinting is currently disabled due to unknown problems on windows
// in tracemonkey and various other pdfs with type1 fonts.
var HINTING_ENABLED = false;
// Accented charactars are not displayed properly on windows, using this flag
// to control analysis of seac charstrings.
var SEAC_ANALYSIS_ENABLED = false;
var FontFlags = {
FixedPitch: 1,
Serif: 2,
Symbolic: 4,
Script: 8,
Nonsymbolic: 32,
Italic: 64,
AllCap: 65536,
SmallCap: 131072,
ForceBold: 262144
};
var Encodings = {
ExpertEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclamsmall', 'Hungarumlautsmall', '', 'dollaroldstyle',
'dollarsuperior', 'ampersandsmall', 'Acutesmall', 'parenleftsuperior',
'parenrightsuperior', 'twodotenleader', 'onedotenleader', 'comma',
'hyphen', 'period', 'fraction', 'zerooldstyle', 'oneoldstyle',
'twooldstyle', 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle',
'sixoldstyle', 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', 'colon',
'semicolon', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', '', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', '', '', 'isuperior', '', '', 'lsuperior', 'msuperior',
'nsuperior', 'osuperior', '', '', 'rsuperior', 'ssuperior', 'tsuperior',
'', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'parenleftinferior', '',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall',
'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall',
'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall',
'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', 'Zsmall', 'colonmonetary',
'onefitted', 'rupiah', 'Tildesmall', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', 'exclamdownsmall', 'centoldstyle', 'Lslashsmall',
'', '', 'Scaronsmall', 'Zcaronsmall', 'Dieresissmall', 'Brevesmall',
'Caronsmall', '', 'Dotaccentsmall', '', '', 'Macronsmall', '', '',
'figuredash', 'hypheninferior', '', '', 'Ogoneksmall', 'Ringsmall',
'Cedillasmall', '', '', '', 'onequarter', 'onehalf', 'threequarters',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', '', '', 'zerosuperior',
'onesuperior', 'twosuperior', 'threesuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior',
'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior',
'seveninferior', 'eightinferior', 'nineinferior', 'centinferior',
'dollarinferior', 'periodinferior', 'commainferior', 'Agravesmall',
'Aacutesmall', 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall',
'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', 'Eacutesmall',
'Ecircumflexsmall', 'Edieresissmall', 'Igravesmall', 'Iacutesmall',
'Icircumflexsmall', 'Idieresissmall', 'Ethsmall', 'Ntildesmall',
'Ogravesmall', 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall',
'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', 'Uacutesmall',
'Ucircumflexsmall', 'Udieresissmall', 'Yacutesmall', 'Thornsmall',
'Ydieresissmall'],
MacExpertEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclamsmall', 'Hungarumlautsmall', 'centoldstyle',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader',
'onedotenleader', 'comma', 'hyphen', 'period', 'fraction', 'zerooldstyle',
'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'colon', 'semicolon', '', 'threequartersemdash', '',
'questionsmall', '', '', '', '', 'Ethsmall', '', '', 'onequarter',
'onehalf', 'threequarters', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', '', '', '', '', '', '', 'ff',
'fi', 'fl', 'ffi', 'ffl', 'parenleftinferior', '', 'parenrightinferior',
'Circumflexsmall', 'hypheninferior', 'Gravesmall', 'Asmall', 'Bsmall',
'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall', 'Ismall',
'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall',
'Xsmall', 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah',
'Tildesmall', '', '', 'asuperior', 'centsuperior', '', '', '', '',
'Aacutesmall', 'Agravesmall', 'Acircumflexsmall', 'Adieresissmall',
'Atildesmall', 'Aringsmall', 'Ccedillasmall', 'Eacutesmall', 'Egravesmall',
'Ecircumflexsmall', 'Edieresissmall', 'Iacutesmall', 'Igravesmall',
'Icircumflexsmall', 'Idieresissmall', 'Ntildesmall', 'Oacutesmall',
'Ogravesmall', 'Ocircumflexsmall', 'Odieresissmall', 'Otildesmall',
'Uacutesmall', 'Ugravesmall', 'Ucircumflexsmall', 'Udieresissmall', '',
'eightsuperior', 'fourinferior', 'threeinferior', 'sixinferior',
'eightinferior', 'seveninferior', 'Scaronsmall', '', 'centinferior',
'twoinferior', '', 'Dieresissmall', '', 'Caronsmall', 'osuperior',
'fiveinferior', '', 'commainferior', 'periodinferior', 'Yacutesmall', '',
'dollarinferior', '', 'Thornsmall', '', 'nineinferior', 'zeroinferior',
'Zcaronsmall', 'AEsmall', 'Oslashsmall', 'questiondownsmall',
'oneinferior', 'Lslashsmall', '', '', '', '', '', '', 'Cedillasmall', '',
'', '', '', '', 'OEsmall', 'figuredash', 'hyphensuperior', '', '', '', '',
'exclamdownsmall', '', 'Ydieresissmall', '', 'onesuperior', 'twosuperior',
'threesuperior', 'foursuperior', 'fivesuperior', 'sixsuperior',
'sevensuperior', 'ninesuperior', 'zerosuperior', '', 'esuperior',
'rsuperior', 'tsuperior', '', '', 'isuperior', 'ssuperior', 'dsuperior',
'', '', '', '', '', 'lsuperior', 'Ogoneksmall', 'Brevesmall',
'Macronsmall', 'bsuperior', 'nsuperior', 'msuperior', 'commasuperior',
'periodsuperior', 'Dotaccentsmall', 'Ringsmall'],
MacRomanEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three',
'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon',
'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
'asciicircum', 'underscore', 'grave', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', '',
'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', 'Odieresis',
'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', 'atilde',
'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis',
'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute',
'ograve', 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave',
'ucircumflex', 'udieresis', 'dagger', 'degree', 'cent', 'sterling',
'section', 'bullet', 'paragraph', 'germandbls', 'registered', 'copyright',
'trademark', 'acute', 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity',
'plusminus', 'lessequal', 'greaterequal', 'yen', 'mu', 'partialdiff',
'summation', 'product', 'pi', 'integral', 'ordfeminine', 'ordmasculine',
'Omega', 'ae', 'oslash', 'questiondown', 'exclamdown', 'logicalnot',
'radical', 'florin', 'approxequal', 'Delta', 'guillemotleft',
'guillemotright', 'ellipsis', '', 'Agrave', 'Atilde', 'Otilde', 'OE',
'oe', 'endash', 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft',
'quoteright', 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction',
'currency', 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl',
'periodcentered', 'quotesinglbase', 'quotedblbase', 'perthousand',
'Acircumflex', 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute',
'Icircumflex', 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple',
'Ograve', 'Uacute', 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut',
'ogonek', 'caron'],
StandardEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three',
'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon',
'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'exclamdown',
'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', 'currency',
'quotesingle', 'quotedblleft', 'guillemotleft', 'guilsinglleft',
'guilsinglright', 'fi', 'fl', '', 'endash', 'dagger', 'daggerdbl',
'periodcentered', '', 'paragraph', 'bullet', 'quotesinglbase',
'quotedblbase', 'quotedblright', 'guillemotright', 'ellipsis',
'perthousand', '', 'questiondown', '', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', '', 'ring', 'cedilla',
'', 'hungarumlaut', 'ogonek', 'caron', 'emdash', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', 'AE', '', 'ordfeminine', '', '',
'', '', 'Lslash', 'Oslash', 'OE', 'ordmasculine', '', '', '', '', '', 'ae',
'', '', '', 'dotlessi', '', '', 'lslash', 'oslash', 'oe', 'germandbls'],
WinAnsiEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three',
'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon',
'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
'asciicircum', 'underscore', 'grave', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde',
'bullet', 'Euro', 'bullet', 'quotesinglbase', 'florin', 'quotedblbase',
'ellipsis', 'dagger', 'daggerdbl', 'circumflex', 'perthousand', 'Scaron',
'guilsinglleft', 'OE', 'bullet', 'Zcaron', 'bullet', 'bullet', 'quoteleft',
'quoteright', 'quotedblleft', 'quotedblright', 'bullet', 'endash',
'emdash', 'tilde', 'trademark', 'scaron', 'guilsinglright', 'oe', 'bullet',
'zcaron', 'Ydieresis', '', 'exclamdown', 'cent', 'sterling',
'currency', 'yen', 'brokenbar', 'section', 'dieresis', 'copyright',
'ordfeminine', 'guillemotleft', 'logicalnot', 'hyphen', 'registered',
'macron', 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute',
'mu', 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
'ordmasculine', 'guillemotright', 'onequarter', 'onehalf', 'threequarters',
'questiondown', 'Agrave', 'Aacute', 'Acircumflex', 'Atilde', 'Adieresis',
'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis',
'Igrave', 'Iacute', 'Icircumflex', 'Idieresis', 'Eth', 'Ntilde', 'Ograve',
'Oacute', 'Ocircumflex', 'Otilde', 'Odieresis', 'multiply', 'Oslash',
'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn',
'germandbls', 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis',
'aring', 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde', 'ograve',
'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide', 'oslash',
'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute', 'thorn',
'ydieresis'],
SymbolSetEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'exclam', 'universal', 'numbersign', 'existential', 'percent',
'ampersand', 'suchthat', 'parenleft', 'parenright', 'asteriskmath', 'plus',
'comma', 'minus', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less',
'equal', 'greater', 'question', 'congruent', 'Alpha', 'Beta', 'Chi',
'Delta', 'Epsilon', 'Phi', 'Gamma', 'Eta', 'Iota', 'theta1', 'Kappa',
'Lambda', 'Mu', 'Nu', 'Omicron', 'Pi', 'Theta', 'Rho', 'Sigma', 'Tau',
'Upsilon', 'sigma1', 'Omega', 'Xi', 'Psi', 'Zeta', 'bracketleft',
'therefore', 'bracketright', 'perpendicular', 'underscore', 'radicalex',
'alpha', 'beta', 'chi', 'delta', 'epsilon', 'phi', 'gamma', 'eta', 'iota',
'phi1', 'kappa', 'lambda', 'mu', 'nu', 'omicron', 'pi', 'theta', 'rho',
'sigma', 'tau', 'upsilon', 'omega1', 'omega', 'xi', 'psi', 'zeta',
'braceleft', 'bar', 'braceright', 'similar', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', 'Euro', 'Upsilon1', 'minute', 'lessequal',
'fraction', 'infinity', 'florin', 'club', 'diamond', 'heart', 'spade',
'arrowboth', 'arrowleft', 'arrowup', 'arrowright', 'arrowdown', 'degree',
'plusminus', 'second', 'greaterequal', 'multiply', 'proportional',
'partialdiff', 'bullet', 'divide', 'notequal', 'equivalence',
'approxequal', 'ellipsis', 'arrowvertex', 'arrowhorizex', 'carriagereturn',
'aleph', 'Ifraktur', 'Rfraktur', 'weierstrass', 'circlemultiply',
'circleplus', 'emptyset', 'intersection', 'union', 'propersuperset',
'reflexsuperset', 'notsubset', 'propersubset', 'reflexsubset', 'element',
'notelement', 'angle', 'gradient', 'registerserif', 'copyrightserif',
'trademarkserif', 'product', 'radical', 'dotmath', 'logicalnot',
'logicaland', 'logicalor', 'arrowdblboth', 'arrowdblleft', 'arrowdblup',
'arrowdblright', 'arrowdbldown', 'lozenge', 'angleleft', 'registersans',
'copyrightsans', 'trademarksans', 'summation', 'parenlefttp',
'parenleftex', 'parenleftbt', 'bracketlefttp', 'bracketleftex',
'bracketleftbt', 'bracelefttp', 'braceleftmid', 'braceleftbt', 'braceex',
'', 'angleright', 'integral', 'integraltp', 'integralex', 'integralbt',
'parenrighttp', 'parenrightex', 'parenrightbt', 'bracketrighttp',
'bracketrightex', 'bracketrightbt', 'bracerighttp', 'bracerightmid',
'bracerightbt'],
zapfDingbatsEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'space', 'a1', 'a2', 'a202', 'a3', 'a4', 'a5', 'a119', 'a118', 'a117',
'a11', 'a12', 'a13', 'a14', 'a15', 'a16', 'a105', 'a17', 'a18', 'a19',
'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26', 'a27', 'a28', 'a6', 'a7',
'a8', 'a9', 'a10', 'a29', 'a30', 'a31', 'a32', 'a33', 'a34', 'a35', 'a36',
'a37', 'a38', 'a39', 'a40', 'a41', 'a42', 'a43', 'a44', 'a45', 'a46',
'a47', 'a48', 'a49', 'a50', 'a51', 'a52', 'a53', 'a54', 'a55', 'a56',
'a57', 'a58', 'a59', 'a60', 'a61', 'a62', 'a63', 'a64', 'a65', 'a66',
'a67', 'a68', 'a69', 'a70', 'a71', 'a72', 'a73', 'a74', 'a203', 'a75',
'a204', 'a76', 'a77', 'a78', 'a79', 'a81', 'a82', 'a83', 'a84', 'a97',
'a98', 'a99', 'a100', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', 'a101', 'a102', 'a103', 'a104', 'a106', 'a107', 'a108', 'a112',
'a111', 'a110', 'a109', 'a120', 'a121', 'a122', 'a123', 'a124', 'a125',
'a126', 'a127', 'a128', 'a129', 'a130', 'a131', 'a132', 'a133', 'a134',
'a135', 'a136', 'a137', 'a138', 'a139', 'a140', 'a141', 'a142', 'a143',
'a144', 'a145', 'a146', 'a147', 'a148', 'a149', 'a150', 'a151', 'a152',
'a153', 'a154', 'a155', 'a156', 'a157', 'a158', 'a159', 'a160', 'a161',
'a163', 'a164', 'a196', 'a165', 'a192', 'a166', 'a167', 'a168', 'a169',
'a170', 'a171', 'a172', 'a173', 'a162', 'a174', 'a175', 'a176', 'a177',
'a178', 'a179', 'a193', 'a180', 'a199', 'a181', 'a200', 'a182', '', 'a201',
'a183', 'a184', 'a197', 'a185', 'a194', 'a198', 'a186', 'a195', 'a187',
'a188', 'a189', 'a190', 'a191']
};
/**
* Hold a map of decoded fonts and of the standard fourteen Type1
* fonts and their acronyms.
*/
var stdFontMap = {
'ArialNarrow': 'Helvetica',
'ArialNarrow-Bold': 'Helvetica-Bold',
'ArialNarrow-BoldItalic': 'Helvetica-BoldOblique',
'ArialNarrow-Italic': 'Helvetica-Oblique',
'ArialBlack': 'Helvetica',
'ArialBlack-Bold': 'Helvetica-Bold',
'ArialBlack-BoldItalic': 'Helvetica-BoldOblique',
'ArialBlack-Italic': 'Helvetica-Oblique',
'Arial': 'Helvetica',
'Arial-Bold': 'Helvetica-Bold',
'Arial-BoldItalic': 'Helvetica-BoldOblique',
'Arial-Italic': 'Helvetica-Oblique',
'Arial-BoldItalicMT': 'Helvetica-BoldOblique',
'Arial-BoldMT': 'Helvetica-Bold',
'Arial-ItalicMT': 'Helvetica-Oblique',
'ArialMT': 'Helvetica',
'Courier-Bold': 'Courier-Bold',
'Courier-BoldItalic': 'Courier-BoldOblique',
'Courier-Italic': 'Courier-Oblique',
'CourierNew': 'Courier',
'CourierNew-Bold': 'Courier-Bold',
'CourierNew-BoldItalic': 'Courier-BoldOblique',
'CourierNew-Italic': 'Courier-Oblique',
'CourierNewPS-BoldItalicMT': 'Courier-BoldOblique',
'CourierNewPS-BoldMT': 'Courier-Bold',
'CourierNewPS-ItalicMT': 'Courier-Oblique',
'CourierNewPSMT': 'Courier',
'Helvetica-Bold': 'Helvetica-Bold',
'Helvetica-BoldItalic': 'Helvetica-BoldOblique',
'Helvetica-Italic': 'Helvetica-Oblique',
'Symbol-Bold': 'Symbol',
'Symbol-BoldItalic': 'Symbol',
'Symbol-Italic': 'Symbol',
'TimesNewRoman': 'Times-Roman',
'TimesNewRoman-Bold': 'Times-Bold',
'TimesNewRoman-BoldItalic': 'Times-BoldItalic',
'TimesNewRoman-Italic': 'Times-Italic',
'TimesNewRomanPS': 'Times-Roman',
'TimesNewRomanPS-Bold': 'Times-Bold',
'TimesNewRomanPS-BoldItalic': 'Times-BoldItalic',
'TimesNewRomanPS-BoldItalicMT': 'Times-BoldItalic',
'TimesNewRomanPS-BoldMT': 'Times-Bold',
'TimesNewRomanPS-Italic': 'Times-Italic',
'TimesNewRomanPS-ItalicMT': 'Times-Italic',
'TimesNewRomanPSMT': 'Times-Roman',
'TimesNewRomanPSMT-Bold': 'Times-Bold',
'TimesNewRomanPSMT-BoldItalic': 'Times-BoldItalic',
'TimesNewRomanPSMT-Italic': 'Times-Italic'
};
/**
* Holds the map of the non-standard fonts that might be included as a standard
* fonts without glyph data.
*/
var nonStdFontMap = {
'ComicSansMS': 'Comic Sans MS',
'ComicSansMS-Bold': 'Comic Sans MS-Bold',
'ComicSansMS-BoldItalic': 'Comic Sans MS-BoldItalic',
'ComicSansMS-Italic': 'Comic Sans MS-Italic',
'LucidaConsole': 'Courier',
'LucidaConsole-Bold': 'Courier-Bold',
'LucidaConsole-BoldItalic': 'Courier-BoldOblique',
'LucidaConsole-Italic': 'Courier-Oblique',
'MS-Gothic': 'MS Gothic',
'MS-Gothic-Bold': 'MS Gothic-Bold',
'MS-Gothic-BoldItalic': 'MS Gothic-BoldItalic',
'MS-Gothic-Italic': 'MS Gothic-Italic',
'MS-Mincho': 'MS Mincho',
'MS-Mincho-Bold': 'MS Mincho-Bold',
'MS-Mincho-BoldItalic': 'MS Mincho-BoldItalic',
'MS-Mincho-Italic': 'MS Mincho-Italic',
'MS-PGothic': 'MS PGothic',
'MS-PGothic-Bold': 'MS PGothic-Bold',
'MS-PGothic-BoldItalic': 'MS PGothic-BoldItalic',
'MS-PGothic-Italic': 'MS PGothic-Italic',
'MS-PMincho': 'MS PMincho',
'MS-PMincho-Bold': 'MS PMincho-Bold',
'MS-PMincho-BoldItalic': 'MS PMincho-BoldItalic',
'MS-PMincho-Italic': 'MS PMincho-Italic',
};
var serifFonts = {
'Adobe Jenson': true, 'Adobe Text': true, 'Albertus': true,
'Aldus': true, 'Alexandria': true, 'Algerian': true,
'American Typewriter': true, 'Antiqua': true, 'Apex': true,
'Arno': true, 'Aster': true, 'Aurora': true,
'Baskerville': true, 'Bell': true, 'Bembo': true,
'Bembo Schoolbook': true, 'Benguiat': true, 'Berkeley Old Style': true,
'Bernhard Modern': true, 'Berthold City': true, 'Bodoni': true,
'Bauer Bodoni': true, 'Book Antiqua': true, 'Bookman': true,
'Bordeaux Roman': true, 'Californian FB': true, 'Calisto': true,
'Calvert': true, 'Capitals': true, 'Cambria': true,
'Cartier': true, 'Caslon': true, 'Catull': true,
'Centaur': true, 'Century Old Style': true, 'Century Schoolbook': true,
'Chaparral': true, 'Charis SIL': true, 'Cheltenham': true,
'Cholla Slab': true, 'Clarendon': true, 'Clearface': true,
'Cochin': true, 'Colonna': true, 'Computer Modern': true,
'Concrete Roman': true, 'Constantia': true, 'Cooper Black': true,
'Corona': true, 'Ecotype': true, 'Egyptienne': true,
'Elephant': true, 'Excelsior': true, 'Fairfield': true,
'FF Scala': true, 'Folkard': true, 'Footlight': true,
'FreeSerif': true, 'Friz Quadrata': true, 'Garamond': true,
'Gentium': true, 'Georgia': true, 'Gloucester': true,
'Goudy Old Style': true, 'Goudy Schoolbook': true, 'Goudy Pro Font': true,
'Granjon': true, 'Guardian Egyptian': true, 'Heather': true,
'Hercules': true, 'High Tower Text': true, 'Hiroshige': true,
'Hoefler Text': true, 'Humana Serif': true, 'Imprint': true,
'Ionic No. 5': true, 'Janson': true, 'Joanna': true,
'Korinna': true, 'Lexicon': true, 'Liberation Serif': true,
'Linux Libertine': true, 'Literaturnaya': true, 'Lucida': true,
'Lucida Bright': true, 'Melior': true, 'Memphis': true,
'Miller': true, 'Minion': true, 'Modern': true,
'Mona Lisa': true, 'Mrs Eaves': true, 'MS Serif': true,
'Museo Slab': true, 'New York': true, 'Nimbus Roman': true,
'NPS Rawlinson Roadway': true, 'Palatino': true, 'Perpetua': true,
'Plantin': true, 'Plantin Schoolbook': true, 'Playbill': true,
'Poor Richard': true, 'Rawlinson Roadway': true, 'Renault': true,
'Requiem': true, 'Rockwell': true, 'Roman': true,
'Rotis Serif': true, 'Sabon': true, 'Scala': true,
'Seagull': true, 'Sistina': true, 'Souvenir': true,
'STIX': true, 'Stone Informal': true, 'Stone Serif': true,
'Sylfaen': true, 'Times': true, 'Trajan': true,
'Trinité': true, 'Trump Mediaeval': true, 'Utopia': true,
'Vale Type': true, 'Bitstream Vera': true, 'Vera Serif': true,
'Versailles': true, 'Wanted': true, 'Weiss': true,
'Wide Latin': true, 'Windsor': true, 'XITS': true
};
var symbolsFonts = {
'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true
};
var CMapConverterList = {
'H': jis7ToUnicode,
'V': jis7ToUnicode,
'EUC-H': eucjpToUnicode,
'EUC-V': eucjpToUnicode,
'83pv-RKSJ-H': sjis83pvToUnicode,
'90pv-RKSJ-H': sjis90pvToUnicode,
'90ms-RKSJ-H': sjisToUnicode,
'90ms-RKSJ-V': sjisToUnicode,
'90msp-RKSJ-H': sjisToUnicode,
'90msp-RKSJ-V': sjisToUnicode,
'GBK-EUC-H': gbkToUnicode,
'GBKp-EUC-H': gbkToUnicode,
'B5pc-H': big5ToUnicode,
'ETenms-B5-H': big5ToUnicode,
'ETenms-B5-V': big5ToUnicode,
};
// CMaps using Hankaku (Halfwidth) Latin glyphs instead of proportional one.
// We need to distinguish them to get correct widths from CIDFont dicts.
var HalfwidthCMaps = {
'H': true,
'V': true,
'EUC-H': true,
'EUC-V': true,
'90ms-RKSJ-H': true,
'90ms-RKSJ-V': true,
'UniJIS-UCS2-HW-H': true,
'UniJIS-UCS2-HW-V': true
};
var decodeBytes;
if (typeof TextDecoder !== 'undefined') {
// The encodings supported by TextDecoder can be found at:
// http://encoding.spec.whatwg.org/#concept-encoding-get
decodeBytes = function(bytes, encoding, fatal) {
return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes);
};
} else if (typeof FileReaderSync !== 'undefined') {
decodeBytes = function(bytes, encoding) {
return new FileReaderSync().readAsText(new Blob([bytes]), encoding);
};
} else {
// Clear the list so that decodeBytes will never be called.
CMapConverterList = {};
}
function jis7ToUnicode(str) {
var bytes = stringToBytes(str);
var length = bytes.length;
for (var i = 0; i < length; ++i) {
bytes[i] |= 0x80;
}
return decodeBytes(bytes, 'euc-jp');
}
function eucjpToUnicode(str) {
return decodeBytes(stringToBytes(str), 'euc-jp');
}
function sjisToUnicode(str) {
return decodeBytes(stringToBytes(str), 'shift_jis');
}
function sjis83pvToUnicode(str) {
var bytes = stringToBytes(str);
try {
// TODO: 83pv has incompatible mappings in ed40..ee9c range.
return decodeBytes(bytes, 'shift_jis', true);
} catch (e) {
TODO('Unsupported 83pv character found');
// Just retry without checking errors for now.
return decodeBytes(bytes, 'shift_jis');
}
}
function sjis90pvToUnicode(str) {
var bytes = stringToBytes(str);
try {
// TODO: 90pv has incompatible mappings in 8740..879c and eb41..ee9c.
return decodeBytes(bytes, 'shift_jis', true);
} catch (e) {
TODO('Unsupported 90pv character found');
// Just retry without checking errors for now.
return decodeBytes(bytes, 'shift_jis');
}
}
function gbkToUnicode(str) {
return decodeBytes(stringToBytes(str), 'gbk');
}
function big5ToUnicode(str) {
return decodeBytes(stringToBytes(str), 'big5');
}
// Some characters, e.g. copyrightserif, mapped to the private use area and
// might not be displayed using standard fonts. Mapping/hacking well-known chars
// to the similar equivalents in the normal characters range.
function mapPrivateUseChars(code) {
switch (code) {
case 0xF8E9: // copyrightsans
case 0xF6D9: // copyrightserif
return 0x00A9; // copyright
default:
return code;
}
}
var UnicodeRanges = [
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
{ 'begin': 0x0100, 'end': 0x017F }, // Latin Extended-A
{ 'begin': 0x0180, 'end': 0x024F }, // Latin Extended-B
{ 'begin': 0x0250, 'end': 0x02AF }, // IPA Extensions
{ 'begin': 0x02B0, 'end': 0x02FF }, // Spacing Modifier Letters
{ 'begin': 0x0300, 'end': 0x036F }, // Combining Diacritical Marks
{ 'begin': 0x0370, 'end': 0x03FF }, // Greek and Coptic
{ 'begin': 0x2C80, 'end': 0x2CFF }, // Coptic
{ 'begin': 0x0400, 'end': 0x04FF }, // Cyrillic
{ 'begin': 0x0530, 'end': 0x058F }, // Armenian
{ 'begin': 0x0590, 'end': 0x05FF }, // Hebrew
{ 'begin': 0xA500, 'end': 0xA63F }, // Vai
{ 'begin': 0x0600, 'end': 0x06FF }, // Arabic
{ 'begin': 0x07C0, 'end': 0x07FF }, // NKo
{ 'begin': 0x0900, 'end': 0x097F }, // Devanagari
{ 'begin': 0x0980, 'end': 0x09FF }, // Bengali
{ 'begin': 0x0A00, 'end': 0x0A7F }, // Gurmukhi
{ 'begin': 0x0A80, 'end': 0x0AFF }, // Gujarati
{ 'begin': 0x0B00, 'end': 0x0B7F }, // Oriya
{ 'begin': 0x0B80, 'end': 0x0BFF }, // Tamil
{ 'begin': 0x0C00, 'end': 0x0C7F }, // Telugu
{ 'begin': 0x0C80, 'end': 0x0CFF }, // Kannada
{ 'begin': 0x0D00, 'end': 0x0D7F }, // Malayalam
{ 'begin': 0x0E00, 'end': 0x0E7F }, // Thai
{ 'begin': 0x0E80, 'end': 0x0EFF }, // Lao
{ 'begin': 0x10A0, 'end': 0x10FF }, // Georgian
{ 'begin': 0x1B00, 'end': 0x1B7F }, // Balinese
{ 'begin': 0x1100, 'end': 0x11FF }, // Hangul Jamo
{ 'begin': 0x1E00, 'end': 0x1EFF }, // Latin Extended Additional
{ 'begin': 0x1F00, 'end': 0x1FFF }, // Greek Extended
{ 'begin': 0x2000, 'end': 0x206F }, // General Punctuation
{ 'begin': 0x2070, 'end': 0x209F }, // Superscripts And Subscripts
{ 'begin': 0x20A0, 'end': 0x20CF }, // Currency Symbol
{ 'begin': 0x20D0, 'end': 0x20FF }, // Combining Diacritical Marks For Symbols
{ 'begin': 0x2100, 'end': 0x214F }, // Letterlike Symbols
{ 'begin': 0x2150, 'end': 0x218F }, // Number Forms
{ 'begin': 0x2190, 'end': 0x21FF }, // Arrows
{ 'begin': 0x2200, 'end': 0x22FF }, // Mathematical Operators
{ 'begin': 0x2300, 'end': 0x23FF }, // Miscellaneous Technical
{ 'begin': 0x2400, 'end': 0x243F }, // Control Pictures
{ 'begin': 0x2440, 'end': 0x245F }, // Optical Character Recognition
{ 'begin': 0x2460, 'end': 0x24FF }, // Enclosed Alphanumerics
{ 'begin': 0x2500, 'end': 0x257F }, // Box Drawing
{ 'begin': 0x2580, 'end': 0x259F }, // Block Elements
{ 'begin': 0x25A0, 'end': 0x25FF }, // Geometric Shapes
{ 'begin': 0x2600, 'end': 0x26FF }, // Miscellaneous Symbols
{ 'begin': 0x2700, 'end': 0x27BF }, // Dingbats
{ 'begin': 0x3000, 'end': 0x303F }, // CJK Symbols And Punctuation
{ 'begin': 0x3040, 'end': 0x309F }, // Hiragana
{ 'begin': 0x30A0, 'end': 0x30FF }, // Katakana
{ 'begin': 0x3100, 'end': 0x312F }, // Bopomofo
{ 'begin': 0x3130, 'end': 0x318F }, // Hangul Compatibility Jamo
{ 'begin': 0xA840, 'end': 0xA87F }, // Phags-pa
{ 'begin': 0x3200, 'end': 0x32FF }, // Enclosed CJK Letters And Months
{ 'begin': 0x3300, 'end': 0x33FF }, // CJK Compatibility
{ 'begin': 0xAC00, 'end': 0xD7AF }, // Hangul Syllables
{ 'begin': 0xD800, 'end': 0xDFFF }, // Non-Plane 0 *
{ 'begin': 0x10900, 'end': 0x1091F }, // Phoenicia
{ 'begin': 0x4E00, 'end': 0x9FFF }, // CJK Unified Ideographs
{ 'begin': 0xE000, 'end': 0xF8FF }, // Private Use Area (plane 0)
{ 'begin': 0x31C0, 'end': 0x31EF }, // CJK Strokes
{ 'begin': 0xFB00, 'end': 0xFB4F }, // Alphabetic Presentation Forms
{ 'begin': 0xFB50, 'end': 0xFDFF }, // Arabic Presentation Forms-A
{ 'begin': 0xFE20, 'end': 0xFE2F }, // Combining Half Marks
{ 'begin': 0xFE10, 'end': 0xFE1F }, // Vertical Forms
{ 'begin': 0xFE50, 'end': 0xFE6F }, // Small Form Variants
{ 'begin': 0xFE70, 'end': 0xFEFF }, // Arabic Presentation Forms-B
{ 'begin': 0xFF00, 'end': 0xFFEF }, // Halfwidth And Fullwidth Forms
{ 'begin': 0xFFF0, 'end': 0xFFFF }, // Specials
{ 'begin': 0x0F00, 'end': 0x0FFF }, // Tibetan
{ 'begin': 0x0700, 'end': 0x074F }, // Syriac
{ 'begin': 0x0780, 'end': 0x07BF }, // Thaana
{ 'begin': 0x0D80, 'end': 0x0DFF }, // Sinhala
{ 'begin': 0x1000, 'end': 0x109F }, // Myanmar
{ 'begin': 0x1200, 'end': 0x137F }, // Ethiopic
{ 'begin': 0x13A0, 'end': 0x13FF }, // Cherokee
{ 'begin': 0x1400, 'end': 0x167F }, // Unified Canadian Aboriginal Syllabics
{ 'begin': 0x1680, 'end': 0x169F }, // Ogham
{ 'begin': 0x16A0, 'end': 0x16FF }, // Runic
{ 'begin': 0x1780, 'end': 0x17FF }, // Khmer
{ 'begin': 0x1800, 'end': 0x18AF }, // Mongolian
{ 'begin': 0x2800, 'end': 0x28FF }, // Braille Patterns
{ 'begin': 0xA000, 'end': 0xA48F }, // Yi Syllables
{ 'begin': 0x1700, 'end': 0x171F }, // Tagalog
{ 'begin': 0x10300, 'end': 0x1032F }, // Old Italic
{ 'begin': 0x10330, 'end': 0x1034F }, // Gothic
{ 'begin': 0x10400, 'end': 0x1044F }, // Deseret
{ 'begin': 0x1D000, 'end': 0x1D0FF }, // Byzantine Musical Symbols
{ 'begin': 0x1D400, 'end': 0x1D7FF }, // Mathematical Alphanumeric Symbols
{ 'begin': 0xFF000, 'end': 0xFFFFD }, // Private Use (plane 15)
{ 'begin': 0xFE00, 'end': 0xFE0F }, // Variation Selectors
{ 'begin': 0xE0000, 'end': 0xE007F }, // Tags
{ 'begin': 0x1900, 'end': 0x194F }, // Limbu
{ 'begin': 0x1950, 'end': 0x197F }, // Tai Le
{ 'begin': 0x1980, 'end': 0x19DF }, // New Tai Lue
{ 'begin': 0x1A00, 'end': 0x1A1F }, // Buginese
{ 'begin': 0x2C00, 'end': 0x2C5F }, // Glagolitic
{ 'begin': 0x2D30, 'end': 0x2D7F }, // Tifinagh
{ 'begin': 0x4DC0, 'end': 0x4DFF }, // Yijing Hexagram Symbols
{ 'begin': 0xA800, 'end': 0xA82F }, // Syloti Nagri
{ 'begin': 0x10000, 'end': 0x1007F }, // Linear B Syllabary
{ 'begin': 0x10140, 'end': 0x1018F }, // Ancient Greek Numbers
{ 'begin': 0x10380, 'end': 0x1039F }, // Ugaritic
{ 'begin': 0x103A0, 'end': 0x103DF }, // Old Persian
{ 'begin': 0x10450, 'end': 0x1047F }, // Shavian
{ 'begin': 0x10480, 'end': 0x104AF }, // Osmanya
{ 'begin': 0x10800, 'end': 0x1083F }, // Cypriot Syllabary
{ 'begin': 0x10A00, 'end': 0x10A5F }, // Kharoshthi
{ 'begin': 0x1D300, 'end': 0x1D35F }, // Tai Xuan Jing Symbols
{ 'begin': 0x12000, 'end': 0x123FF }, // Cuneiform
{ 'begin': 0x1D360, 'end': 0x1D37F }, // Counting Rod Numerals
{ 'begin': 0x1B80, 'end': 0x1BBF }, // Sundanese
{ 'begin': 0x1C00, 'end': 0x1C4F }, // Lepcha
{ 'begin': 0x1C50, 'end': 0x1C7F }, // Ol Chiki
{ 'begin': 0xA880, 'end': 0xA8DF }, // Saurashtra
{ 'begin': 0xA900, 'end': 0xA92F }, // Kayah Li
{ 'begin': 0xA930, 'end': 0xA95F }, // Rejang
{ 'begin': 0xAA00, 'end': 0xAA5F }, // Cham
{ 'begin': 0x10190, 'end': 0x101CF }, // Ancient Symbols
{ 'begin': 0x101D0, 'end': 0x101FF }, // Phaistos Disc
{ 'begin': 0x102A0, 'end': 0x102DF }, // Carian
{ 'begin': 0x1F030, 'end': 0x1F09F } // Domino Tiles
];
var MacStandardGlyphOrdering = [
'.notdef', '.null', 'nonmarkingreturn', 'space', 'exclam', 'quotedbl',
'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft',
'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash',
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft',
'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b',
'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
'asciitilde', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde',
'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis',
'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis',
'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve',
'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex',
'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet',
'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute',
'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal',
'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi',
'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash',
'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin',
'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis',
'nonbreakingspace', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash',
'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright',
'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency',
'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered',
'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex',
'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex',
'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute',
'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron',
'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron',
'Lslash', 'lslash', 'Scaron', 'scaron', 'Zcaron', 'zcaron', 'brokenbar',
'Eth', 'eth', 'Yacute', 'yacute', 'Thorn', 'thorn', 'minus', 'multiply',
'onesuperior', 'twosuperior', 'threesuperior', 'onehalf', 'onequarter',
'threequarters', 'franc', 'Gbreve', 'gbreve', 'Idotaccent', 'Scedilla',
'scedilla', 'Cacute', 'cacute', 'Ccaron', 'ccaron', 'dcroat'];
function getUnicodeRangeFor(value) {
for (var i = 0, ii = UnicodeRanges.length; i < ii; i++) {
var range = UnicodeRanges[i];
if (value >= range.begin && value < range.end)
return i;
}
return -1;
}
function isRTLRangeFor(value) {
var range = UnicodeRanges[13];
if (value >= range.begin && value < range.end)
return true;
range = UnicodeRanges[11];
if (value >= range.begin && value < range.end)
return true;
return false;
}
function isSpecialUnicode(unicode) {
return (unicode <= 0x1F || (unicode >= 127 && unicode < GLYPH_AREA_SIZE)) ||
(unicode >= CMAP_GLYPH_OFFSET &&
unicode < CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE);
}
// The normalization table is obtained by filtering the Unicode characters
// database with <compat> entries.
var NormalizedUnicodes = {
'\u00A8': '\u0020\u0308',
'\u00AF': '\u0020\u0304',
'\u00B4': '\u0020\u0301',
'\u00B5': '\u03BC',
'\u00B8': '\u0020\u0327',
'\u0132': '\u0049\u004A',
'\u0133': '\u0069\u006A',
'\u013F': '\u004C\u00B7',
'\u0140': '\u006C\u00B7',
'\u0149': '\u02BC\u006E',
'\u017F': '\u0073',
'\u01C4': '\u0044\u017D',
'\u01C5': '\u0044\u017E',
'\u01C6': '\u0064\u017E',
'\u01C7': '\u004C\u004A',
'\u01C8': '\u004C\u006A',
'\u01C9': '\u006C\u006A',
'\u01CA': '\u004E\u004A',
'\u01CB': '\u004E\u006A',
'\u01CC': '\u006E\u006A',
'\u01F1': '\u0044\u005A',
'\u01F2': '\u0044\u007A',
'\u01F3': '\u0064\u007A',
'\u02D8': '\u0020\u0306',
'\u02D9': '\u0020\u0307',
'\u02DA': '\u0020\u030A',
'\u02DB': '\u0020\u0328',
'\u02DC': '\u0020\u0303',
'\u02DD': '\u0020\u030B',
'\u037A': '\u0020\u0345',
'\u0384': '\u0020\u0301',
'\u03D0': '\u03B2',
'\u03D1': '\u03B8',
'\u03D2': '\u03A5',
'\u03D5': '\u03C6',
'\u03D6': '\u03C0',
'\u03F0': '\u03BA',
'\u03F1': '\u03C1',
'\u03F2': '\u03C2',
'\u03F4': '\u0398',
'\u03F5': '\u03B5',
'\u03F9': '\u03A3',
'\u0587': '\u0565\u0582',
'\u0675': '\u0627\u0674',
'\u0676': '\u0648\u0674',
'\u0677': '\u06C7\u0674',
'\u0678': '\u064A\u0674',
'\u0E33': '\u0E4D\u0E32',
'\u0EB3': '\u0ECD\u0EB2',
'\u0EDC': '\u0EAB\u0E99',
'\u0EDD': '\u0EAB\u0EA1',
'\u0F77': '\u0FB2\u0F81',
'\u0F79': '\u0FB3\u0F81',
'\u1E9A': '\u0061\u02BE',
'\u1FBD': '\u0020\u0313',
'\u1FBF': '\u0020\u0313',
'\u1FC0': '\u0020\u0342',
'\u1FFE': '\u0020\u0314',
'\u2002': '\u0020',
'\u2003': '\u0020',
'\u2004': '\u0020',
'\u2005': '\u0020',
'\u2006': '\u0020',
'\u2008': '\u0020',
'\u2009': '\u0020',
'\u200A': '\u0020',
'\u2017': '\u0020\u0333',
'\u2024': '\u002E',
'\u2025': '\u002E\u002E',
'\u2026': '\u002E\u002E\u002E',
'\u2033': '\u2032\u2032',
'\u2034': '\u2032\u2032\u2032',
'\u2036': '\u2035\u2035',
'\u2037': '\u2035\u2035\u2035',
'\u203C': '\u0021\u0021',
'\u203E': '\u0020\u0305',
'\u2047': '\u003F\u003F',
'\u2048': '\u003F\u0021',
'\u2049': '\u0021\u003F',
'\u2057': '\u2032\u2032\u2032\u2032',
'\u205F': '\u0020',
'\u20A8': '\u0052\u0073',
'\u2100': '\u0061\u002F\u0063',
'\u2101': '\u0061\u002F\u0073',
'\u2103': '\u00B0\u0043',
'\u2105': '\u0063\u002F\u006F',
'\u2106': '\u0063\u002F\u0075',
'\u2107': '\u0190',
'\u2109': '\u00B0\u0046',
'\u2116': '\u004E\u006F',
'\u2121': '\u0054\u0045\u004C',
'\u2135': '\u05D0',
'\u2136': '\u05D1',
'\u2137': '\u05D2',
'\u2138': '\u05D3',
'\u213B': '\u0046\u0041\u0058',
'\u2160': '\u0049',
'\u2161': '\u0049\u0049',
'\u2162': '\u0049\u0049\u0049',
'\u2163': '\u0049\u0056',
'\u2164': '\u0056',
'\u2165': '\u0056\u0049',
'\u2166': '\u0056\u0049\u0049',
'\u2167': '\u0056\u0049\u0049\u0049',
'\u2168': '\u0049\u0058',
'\u2169': '\u0058',
'\u216A': '\u0058\u0049',
'\u216B': '\u0058\u0049\u0049',
'\u216C': '\u004C',
'\u216D': '\u0043',
'\u216E': '\u0044',
'\u216F': '\u004D',
'\u2170': '\u0069',
'\u2171': '\u0069\u0069',
'\u2172': '\u0069\u0069\u0069',
'\u2173': '\u0069\u0076',
'\u2174': '\u0076',
'\u2175': '\u0076\u0069',
'\u2176': '\u0076\u0069\u0069',
'\u2177': '\u0076\u0069\u0069\u0069',
'\u2178': '\u0069\u0078',
'\u2179': '\u0078',
'\u217A': '\u0078\u0069',
'\u217B': '\u0078\u0069\u0069',
'\u217C': '\u006C',
'\u217D': '\u0063',
'\u217E': '\u0064',
'\u217F': '\u006D',
'\u222C': '\u222B\u222B',
'\u222D': '\u222B\u222B\u222B',
'\u222F': '\u222E\u222E',
'\u2230': '\u222E\u222E\u222E',
'\u2474': '\u0028\u0031\u0029',
'\u2475': '\u0028\u0032\u0029',
'\u2476': '\u0028\u0033\u0029',
'\u2477': '\u0028\u0034\u0029',
'\u2478': '\u0028\u0035\u0029',
'\u2479': '\u0028\u0036\u0029',
'\u247A': '\u0028\u0037\u0029',
'\u247B': '\u0028\u0038\u0029',
'\u247C': '\u0028\u0039\u0029',
'\u247D': '\u0028\u0031\u0030\u0029',
'\u247E': '\u0028\u0031\u0031\u0029',
'\u247F': '\u0028\u0031\u0032\u0029',
'\u2480': '\u0028\u0031\u0033\u0029',
'\u2481': '\u0028\u0031\u0034\u0029',
'\u2482': '\u0028\u0031\u0035\u0029',
'\u2483': '\u0028\u0031\u0036\u0029',
'\u2484': '\u0028\u0031\u0037\u0029',
'\u2485': '\u0028\u0031\u0038\u0029',
'\u2486': '\u0028\u0031\u0039\u0029',
'\u2487': '\u0028\u0032\u0030\u0029',
'\u2488': '\u0031\u002E',
'\u2489': '\u0032\u002E',
'\u248A': '\u0033\u002E',
'\u248B': '\u0034\u002E',
'\u248C': '\u0035\u002E',
'\u248D': '\u0036\u002E',
'\u248E': '\u0037\u002E',
'\u248F': '\u0038\u002E',
'\u2490': '\u0039\u002E',
'\u2491': '\u0031\u0030\u002E',
'\u2492': '\u0031\u0031\u002E',
'\u2493': '\u0031\u0032\u002E',
'\u2494': '\u0031\u0033\u002E',
'\u2495': '\u0031\u0034\u002E',
'\u2496': '\u0031\u0035\u002E',
'\u2497': '\u0031\u0036\u002E',
'\u2498': '\u0031\u0037\u002E',
'\u2499': '\u0031\u0038\u002E',
'\u249A': '\u0031\u0039\u002E',
'\u249B': '\u0032\u0030\u002E',
'\u249C': '\u0028\u0061\u0029',
'\u249D': '\u0028\u0062\u0029',
'\u249E': '\u0028\u0063\u0029',
'\u249F': '\u0028\u0064\u0029',
'\u24A0': '\u0028\u0065\u0029',
'\u24A1': '\u0028\u0066\u0029',
'\u24A2': '\u0028\u0067\u0029',
'\u24A3': '\u0028\u0068\u0029',
'\u24A4': '\u0028\u0069\u0029',
'\u24A5': '\u0028\u006A\u0029',
'\u24A6': '\u0028\u006B\u0029',
'\u24A7': '\u0028\u006C\u0029',
'\u24A8': '\u0028\u006D\u0029',
'\u24A9': '\u0028\u006E\u0029',
'\u24AA': '\u0028\u006F\u0029',
'\u24AB': '\u0028\u0070\u0029',
'\u24AC': '\u0028\u0071\u0029',
'\u24AD': '\u0028\u0072\u0029',
'\u24AE': '\u0028\u0073\u0029',
'\u24AF': '\u0028\u0074\u0029',
'\u24B0': '\u0028\u0075\u0029',
'\u24B1': '\u0028\u0076\u0029',
'\u24B2': '\u0028\u0077\u0029',
'\u24B3': '\u0028\u0078\u0029',
'\u24B4': '\u0028\u0079\u0029',
'\u24B5': '\u0028\u007A\u0029',
'\u2A0C': '\u222B\u222B\u222B\u222B',
'\u2A74': '\u003A\u003A\u003D',
'\u2A75': '\u003D\u003D',
'\u2A76': '\u003D\u003D\u003D',
'\u2E9F': '\u6BCD',
'\u2EF3': '\u9F9F',
'\u2F00': '\u4E00',
'\u2F01': '\u4E28',
'\u2F02': '\u4E36',
'\u2F03': '\u4E3F',
'\u2F04': '\u4E59',
'\u2F05': '\u4E85',
'\u2F06': '\u4E8C',
'\u2F07': '\u4EA0',
'\u2F08': '\u4EBA',
'\u2F09': '\u513F',
'\u2F0A': '\u5165',
'\u2F0B': '\u516B',
'\u2F0C': '\u5182',
'\u2F0D': '\u5196',
'\u2F0E': '\u51AB',
'\u2F0F': '\u51E0',
'\u2F10': '\u51F5',
'\u2F11': '\u5200',
'\u2F12': '\u529B',
'\u2F13': '\u52F9',
'\u2F14': '\u5315',
'\u2F15': '\u531A',
'\u2F16': '\u5338',
'\u2F17': '\u5341',
'\u2F18': '\u535C',
'\u2F19': '\u5369',
'\u2F1A': '\u5382',
'\u2F1B': '\u53B6',
'\u2F1C': '\u53C8',
'\u2F1D': '\u53E3',
'\u2F1E': '\u56D7',
'\u2F1F': '\u571F',
'\u2F20': '\u58EB',
'\u2F21': '\u5902',
'\u2F22': '\u590A',
'\u2F23': '\u5915',
'\u2F24': '\u5927',
'\u2F25': '\u5973',
'\u2F26': '\u5B50',
'\u2F27': '\u5B80',
'\u2F28': '\u5BF8',
'\u2F29': '\u5C0F',
'\u2F2A': '\u5C22',
'\u2F2B': '\u5C38',
'\u2F2C': '\u5C6E',
'\u2F2D': '\u5C71',
'\u2F2E': '\u5DDB',
'\u2F2F': '\u5DE5',
'\u2F30': '\u5DF1',
'\u2F31': '\u5DFE',
'\u2F32': '\u5E72',
'\u2F33': '\u5E7A',
'\u2F34': '\u5E7F',
'\u2F35': '\u5EF4',
'\u2F36': '\u5EFE',
'\u2F37': '\u5F0B',
'\u2F38': '\u5F13',
'\u2F39': '\u5F50',
'\u2F3A': '\u5F61',
'\u2F3B': '\u5F73',
'\u2F3C': '\u5FC3',
'\u2F3D': '\u6208',
'\u2F3E': '\u6236',
'\u2F3F': '\u624B',
'\u2F40': '\u652F',
'\u2F41': '\u6534',
'\u2F42': '\u6587',
'\u2F43': '\u6597',
'\u2F44': '\u65A4',
'\u2F45': '\u65B9',
'\u2F46': '\u65E0',
'\u2F47': '\u65E5',
'\u2F48': '\u66F0',
'\u2F49': '\u6708',
'\u2F4A': '\u6728',
'\u2F4B': '\u6B20',
'\u2F4C': '\u6B62',
'\u2F4D': '\u6B79',
'\u2F4E': '\u6BB3',
'\u2F4F': '\u6BCB',
'\u2F50': '\u6BD4',
'\u2F51': '\u6BDB',
'\u2F52': '\u6C0F',
'\u2F53': '\u6C14',
'\u2F54': '\u6C34',
'\u2F55': '\u706B',
'\u2F56': '\u722A',
'\u2F57': '\u7236',
'\u2F58': '\u723B',
'\u2F59': '\u723F',
'\u2F5A': '\u7247',
'\u2F5B': '\u7259',
'\u2F5C': '\u725B',
'\u2F5D': '\u72AC',
'\u2F5E': '\u7384',
'\u2F5F': '\u7389',
'\u2F60': '\u74DC',
'\u2F61': '\u74E6',
'\u2F62': '\u7518',
'\u2F63': '\u751F',
'\u2F64': '\u7528',
'\u2F65': '\u7530',
'\u2F66': '\u758B',
'\u2F67': '\u7592',
'\u2F68': '\u7676',
'\u2F69': '\u767D',
'\u2F6A': '\u76AE',
'\u2F6B': '\u76BF',
'\u2F6C': '\u76EE',
'\u2F6D': '\u77DB',
'\u2F6E': '\u77E2',
'\u2F6F': '\u77F3',
'\u2F70': '\u793A',
'\u2F71': '\u79B8',
'\u2F72': '\u79BE',
'\u2F73': '\u7A74',
'\u2F74': '\u7ACB',
'\u2F75': '\u7AF9',
'\u2F76': '\u7C73',
'\u2F77': '\u7CF8',
'\u2F78': '\u7F36',
'\u2F79': '\u7F51',
'\u2F7A': '\u7F8A',
'\u2F7B': '\u7FBD',
'\u2F7C': '\u8001',
'\u2F7D': '\u800C',
'\u2F7E': '\u8012',
'\u2F7F': '\u8033',
'\u2F80': '\u807F',
'\u2F81': '\u8089',
'\u2F82': '\u81E3',
'\u2F83': '\u81EA',
'\u2F84': '\u81F3',
'\u2F85': '\u81FC',
'\u2F86': '\u820C',
'\u2F87': '\u821B',
'\u2F88': '\u821F',
'\u2F89': '\u826E',
'\u2F8A': '\u8272',
'\u2F8B': '\u8278',
'\u2F8C': '\u864D',
'\u2F8D': '\u866B',
'\u2F8E': '\u8840',
'\u2F8F': '\u884C',
'\u2F90': '\u8863',
'\u2F91': '\u897E',
'\u2F92': '\u898B',
'\u2F93': '\u89D2',
'\u2F94': '\u8A00',
'\u2F95': '\u8C37',
'\u2F96': '\u8C46',
'\u2F97': '\u8C55',
'\u2F98': '\u8C78',
'\u2F99': '\u8C9D',
'\u2F9A': '\u8D64',
'\u2F9B': '\u8D70',
'\u2F9C': '\u8DB3',
'\u2F9D': '\u8EAB',
'\u2F9E': '\u8ECA',
'\u2F9F': '\u8F9B',
'\u2FA0': '\u8FB0',
'\u2FA1': '\u8FB5',
'\u2FA2': '\u9091',
'\u2FA3': '\u9149',
'\u2FA4': '\u91C6',
'\u2FA5': '\u91CC',
'\u2FA6': '\u91D1',
'\u2FA7': '\u9577',
'\u2FA8': '\u9580',
'\u2FA9': '\u961C',
'\u2FAA': '\u96B6',
'\u2FAB': '\u96B9',
'\u2FAC': '\u96E8',
'\u2FAD': '\u9751',
'\u2FAE': '\u975E',
'\u2FAF': '\u9762',
'\u2FB0': '\u9769',
'\u2FB1': '\u97CB',
'\u2FB2': '\u97ED',
'\u2FB3': '\u97F3',
'\u2FB4': '\u9801',
'\u2FB5': '\u98A8',
'\u2FB6': '\u98DB',
'\u2FB7': '\u98DF',
'\u2FB8': '\u9996',
'\u2FB9': '\u9999',
'\u2FBA': '\u99AC',
'\u2FBB': '\u9AA8',
'\u2FBC': '\u9AD8',
'\u2FBD': '\u9ADF',
'\u2FBE': '\u9B25',
'\u2FBF': '\u9B2F',
'\u2FC0': '\u9B32',
'\u2FC1': '\u9B3C',
'\u2FC2': '\u9B5A',
'\u2FC3': '\u9CE5',
'\u2FC4': '\u9E75',
'\u2FC5': '\u9E7F',
'\u2FC6': '\u9EA5',
'\u2FC7': '\u9EBB',
'\u2FC8': '\u9EC3',
'\u2FC9': '\u9ECD',
'\u2FCA': '\u9ED1',
'\u2FCB': '\u9EF9',
'\u2FCC': '\u9EFD',
'\u2FCD': '\u9F0E',
'\u2FCE': '\u9F13',
'\u2FCF': '\u9F20',
'\u2FD0': '\u9F3B',
'\u2FD1': '\u9F4A',
'\u2FD2': '\u9F52',
'\u2FD3': '\u9F8D',
'\u2FD4': '\u9F9C',
'\u2FD5': '\u9FA0',
'\u3036': '\u3012',
'\u3038': '\u5341',
'\u3039': '\u5344',
'\u303A': '\u5345',
'\u309B': '\u0020\u3099',
'\u309C': '\u0020\u309A',
'\u3131': '\u1100',
'\u3132': '\u1101',
'\u3133': '\u11AA',
'\u3134': '\u1102',
'\u3135': '\u11AC',
'\u3136': '\u11AD',
'\u3137': '\u1103',
'\u3138': '\u1104',
'\u3139': '\u1105',
'\u313A': '\u11B0',
'\u313B': '\u11B1',
'\u313C': '\u11B2',
'\u313D': '\u11B3',
'\u313E': '\u11B4',
'\u313F': '\u11B5',
'\u3140': '\u111A',
'\u3141': '\u1106',
'\u3142': '\u1107',
'\u3143': '\u1108',
'\u3144': '\u1121',
'\u3145': '\u1109',
'\u3146': '\u110A',
'\u3147': '\u110B',
'\u3148': '\u110C',
'\u3149': '\u110D',
'\u314A': '\u110E',
'\u314B': '\u110F',
'\u314C': '\u1110',
'\u314D': '\u1111',
'\u314E': '\u1112',