@qooxdoo/framework
Version:
The JS Framework for Coders
445 lines (404 loc) • 19.8 kB
JavaScript
/* ************************************************************************
qooxdoo - the new era of web development
http://qooxdoo.org
Copyright:
2004-2008 1&1 Internet AG, Germany, http://www.1und1.de
License:
MIT: https://opensource.org/licenses/MIT
See the LICENSE file in the project's top-level directory for details.
Authors:
* Fabian Jakobs (fjakobs)
************************************************************************ */
/**
* A Collection of utility functions to escape and unescape strings.
*/
qx.Bootstrap.define("qx.bom.String", {
/*
*****************************************************************************
STATICS
*****************************************************************************
*/
statics: {
/** Mapping of HTML entity names to the corresponding char code */
TO_CHARCODE: {
quot: 34, // " - double-quote
amp: 38, // &
lt: 60, // <
gt: 62, // >
// http://www.w3.org/TR/REC-html40/sgml/entities.html
// ISO 8859-1 characters
nbsp: 160, // no-break space
iexcl: 161, // inverted exclamation mark
cent: 162, // cent sign
pound: 163, // pound sterling sign
curren: 164, // general currency sign
yen: 165, // yen sign
brvbar: 166, // broken (vertical) bar
sect: 167, // section sign
uml: 168, // umlaut (dieresis)
copy: 169, // copyright sign
ordf: 170, // ordinal indicator, feminine
laquo: 171, // angle quotation mark, left
not: 172, // not sign
shy: 173, // soft hyphen
reg: 174, // registered sign
macr: 175, // macron
deg: 176, // degree sign
plusmn: 177, // plus-or-minus sign
sup2: 178, // superscript two
sup3: 179, // superscript three
acute: 180, // acute accent
micro: 181, // micro sign
para: 182, // pilcrow (paragraph sign)
middot: 183, // middle dot
cedil: 184, // cedilla
sup1: 185, // superscript one
ordm: 186, // ordinal indicator, masculine
raquo: 187, // angle quotation mark, right
frac14: 188, // fraction one-quarter
frac12: 189, // fraction one-half
frac34: 190, // fraction three-quarters
iquest: 191, // inverted question mark
Agrave: 192, // capital A, grave accent
Aacute: 193, // capital A, acute accent
Acirc: 194, // capital A, circumflex accent
Atilde: 195, // capital A, tilde
Auml: 196, // capital A, dieresis or umlaut mark
Aring: 197, // capital A, ring
AElig: 198, // capital AE diphthong (ligature)
Ccedil: 199, // capital C, cedilla
Egrave: 200, // capital E, grave accent
Eacute: 201, // capital E, acute accent
Ecirc: 202, // capital E, circumflex accent
Euml: 203, // capital E, dieresis or umlaut mark
Igrave: 204, // capital I, grave accent
Iacute: 205, // capital I, acute accent
Icirc: 206, // capital I, circumflex accent
Iuml: 207, // capital I, dieresis or umlaut mark
ETH: 208, // capital Eth, Icelandic
Ntilde: 209, // capital N, tilde
Ograve: 210, // capital O, grave accent
Oacute: 211, // capital O, acute accent
Ocirc: 212, // capital O, circumflex accent
Otilde: 213, // capital O, tilde
Ouml: 214, // capital O, dieresis or umlaut mark
times: 215, // multiply sign
Oslash: 216, // capital O, slash
Ugrave: 217, // capital U, grave accent
Uacute: 218, // capital U, acute accent
Ucirc: 219, // capital U, circumflex accent
Uuml: 220, // capital U, dieresis or umlaut mark
Yacute: 221, // capital Y, acute accent
THORN: 222, // capital THORN, Icelandic
szlig: 223, // small sharp s, German (sz ligature)
agrave: 224, // small a, grave accent
aacute: 225, // small a, acute accent
acirc: 226, // small a, circumflex accent
atilde: 227, // small a, tilde
auml: 228, // small a, dieresis or umlaut mark
aring: 229, // small a, ring
aelig: 230, // small ae diphthong (ligature)
ccedil: 231, // small c, cedilla
egrave: 232, // small e, grave accent
eacute: 233, // small e, acute accent
ecirc: 234, // small e, circumflex accent
euml: 235, // small e, dieresis or umlaut mark
igrave: 236, // small i, grave accent
iacute: 237, // small i, acute accent
icirc: 238, // small i, circumflex accent
iuml: 239, // small i, dieresis or umlaut mark
eth: 240, // small eth, Icelandic
ntilde: 241, // small n, tilde
ograve: 242, // small o, grave accent
oacute: 243, // small o, acute accent
ocirc: 244, // small o, circumflex accent
otilde: 245, // small o, tilde
ouml: 246, // small o, dieresis or umlaut mark
divide: 247, // divide sign
oslash: 248, // small o, slash
ugrave: 249, // small u, grave accent
uacute: 250, // small u, acute accent
ucirc: 251, // small u, circumflex accent
uuml: 252, // small u, dieresis or umlaut mark
yacute: 253, // small y, acute accent
thorn: 254, // small thorn, Icelandic
yuml: 255, // small y, dieresis or umlaut mark
// Latin Extended-B
fnof: 402, // latin small f with hook = function= florin, U+0192 ISOtech
// Greek
Alpha: 913, // greek capital letter alpha, U+0391
Beta: 914, // greek capital letter beta, U+0392
Gamma: 915, // greek capital letter gamma,U+0393 ISOgrk3
Delta: 916, // greek capital letter delta,U+0394 ISOgrk3
Epsilon: 917, // greek capital letter epsilon, U+0395
Zeta: 918, // greek capital letter zeta, U+0396
Eta: 919, // greek capital letter eta, U+0397
Theta: 920, // greek capital letter theta,U+0398 ISOgrk3
Iota: 921, // greek capital letter iota, U+0399
Kappa: 922, // greek capital letter kappa, U+039A
Lambda: 923, // greek capital letter lambda,U+039B ISOgrk3
Mu: 924, // greek capital letter mu, U+039C
Nu: 925, // greek capital letter nu, U+039D
Xi: 926, // greek capital letter xi, U+039E ISOgrk3
Omicron: 927, // greek capital letter omicron, U+039F
Pi: 928, // greek capital letter pi, U+03A0 ISOgrk3
Rho: 929, // greek capital letter rho, U+03A1
// there is no Sigmaf, and no U+03A2 character either
Sigma: 931, // greek capital letter sigma,U+03A3 ISOgrk3
Tau: 932, // greek capital letter tau, U+03A4
Upsilon: 933, // greek capital letter upsilon,U+03A5 ISOgrk3
Phi: 934, // greek capital letter phi,U+03A6 ISOgrk3
Chi: 935, // greek capital letter chi, U+03A7
Psi: 936, // greek capital letter psi,U+03A8 ISOgrk3
Omega: 937, // greek capital letter omega,U+03A9 ISOgrk3
alpha: 945, // greek small letter alpha,U+03B1 ISOgrk3
beta: 946, // greek small letter beta, U+03B2 ISOgrk3
gamma: 947, // greek small letter gamma,U+03B3 ISOgrk3
delta: 948, // greek small letter delta,U+03B4 ISOgrk3
epsilon: 949, // greek small letter epsilon,U+03B5 ISOgrk3
zeta: 950, // greek small letter zeta, U+03B6 ISOgrk3
eta: 951, // greek small letter eta, U+03B7 ISOgrk3
theta: 952, // greek small letter theta,U+03B8 ISOgrk3
iota: 953, // greek small letter iota, U+03B9 ISOgrk3
kappa: 954, // greek small letter kappa,U+03BA ISOgrk3
lambda: 955, // greek small letter lambda,U+03BB ISOgrk3
mu: 956, // greek small letter mu, U+03BC ISOgrk3
nu: 957, // greek small letter nu, U+03BD ISOgrk3
xi: 958, // greek small letter xi, U+03BE ISOgrk3
omicron: 959, // greek small letter omicron, U+03BF NEW
pi: 960, // greek small letter pi, U+03C0 ISOgrk3
rho: 961, // greek small letter rho, U+03C1 ISOgrk3
sigmaf: 962, // greek small letter final sigma,U+03C2 ISOgrk3
sigma: 963, // greek small letter sigma,U+03C3 ISOgrk3
tau: 964, // greek small letter tau, U+03C4 ISOgrk3
upsilon: 965, // greek small letter upsilon,U+03C5 ISOgrk3
phi: 966, // greek small letter phi, U+03C6 ISOgrk3
chi: 967, // greek small letter chi, U+03C7 ISOgrk3
psi: 968, // greek small letter psi, U+03C8 ISOgrk3
omega: 969, // greek small letter omega,U+03C9 ISOgrk3
thetasym: 977, // greek small letter theta symbol,U+03D1 NEW
upsih: 978, // greek upsilon with hook symbol,U+03D2 NEW
piv: 982, // greek pi symbol, U+03D6 ISOgrk3
// General Punctuation
bull: 8226, // bullet = black small circle,U+2022 ISOpub
// bullet is NOT the same as bullet operator, U+2219
hellip: 8230, // horizontal ellipsis = three dot leader,U+2026 ISOpub
prime: 8242, // prime = minutes = feet, U+2032 ISOtech
Prime: 8243, // double prime = seconds = inches,U+2033 ISOtech
oline: 8254, // overline = spacing overscore,U+203E NEW
frasl: 8260, // fraction slash, U+2044 NEW
// Letterlike Symbols
weierp: 8472, // script capital P = power set= Weierstrass p, U+2118 ISOamso
image: 8465, // blackletter capital I = imaginary part,U+2111 ISOamso
real: 8476, // blackletter capital R = real part symbol,U+211C ISOamso
trade: 8482, // trade mark sign, U+2122 ISOnum
alefsym: 8501, // alef symbol = first transfinite cardinal,U+2135 NEW
// alef symbol is NOT the same as hebrew letter alef,U+05D0 although the same glyph could be used to depict both characters
// Arrows
larr: 8592, // leftwards arrow, U+2190 ISOnum
uarr: 8593, // upwards arrow, U+2191 ISOnum-->
rarr: 8594, // rightwards arrow, U+2192 ISOnum
darr: 8595, // downwards arrow, U+2193 ISOnum
harr: 8596, // left right arrow, U+2194 ISOamsa
crarr: 8629, // downwards arrow with corner leftwards= carriage return, U+21B5 NEW
lArr: 8656, // leftwards double arrow, U+21D0 ISOtech
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests
uArr: 8657, // upwards double arrow, U+21D1 ISOamsa
rArr: 8658, // rightwards double arrow,U+21D2 ISOtech
// ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ?rArr can be used for 'implies' as ISOtech suggests
dArr: 8659, // downwards double arrow, U+21D3 ISOamsa
hArr: 8660, // left right double arrow,U+21D4 ISOamsa
// Mathematical Operators
forall: 8704, // for all, U+2200 ISOtech
part: 8706, // partial differential, U+2202 ISOtech
exist: 8707, // there exists, U+2203 ISOtech
empty: 8709, // empty set = null set = diameter,U+2205 ISOamso
nabla: 8711, // nabla = backward difference,U+2207 ISOtech
isin: 8712, // element of, U+2208 ISOtech
notin: 8713, // not an element of, U+2209 ISOtech
ni: 8715, // contains as member, U+220B ISOtech
// should there be a more memorable name than 'ni'?
prod: 8719, // n-ary product = product sign,U+220F ISOamsb
// prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both
sum: 8721, // n-ary summation, U+2211 ISOamsb
// sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both
minus: 8722, // minus sign, U+2212 ISOtech
lowast: 8727, // asterisk operator, U+2217 ISOtech
radic: 8730, // square root = radical sign,U+221A ISOtech
prop: 8733, // proportional to, U+221D ISOtech
infin: 8734, // infinity, U+221E ISOtech
ang: 8736, // angle, U+2220 ISOamso
and: 8743, // logical and = wedge, U+2227 ISOtech
or: 8744, // logical or = vee, U+2228 ISOtech
cap: 8745, // intersection = cap, U+2229 ISOtech
cup: 8746, // union = cup, U+222A ISOtech
int: 8747, // integral, U+222B ISOtech
there4: 8756, // therefore, U+2234 ISOtech
sim: 8764, // tilde operator = varies with = similar to,U+223C ISOtech
// tilde operator is NOT the same character as the tilde, U+007E,although the same glyph might be used to represent both
cong: 8773, // approximately equal to, U+2245 ISOtech
asymp: 8776, // almost equal to = asymptotic to,U+2248 ISOamsr
ne: 8800, // not equal to, U+2260 ISOtech
equiv: 8801, // identical to, U+2261 ISOtech
le: 8804, // less-than or equal to, U+2264 ISOtech
ge: 8805, // greater-than or equal to,U+2265 ISOtech
sub: 8834, // subset of, U+2282 ISOtech
sup: 8835, // superset of, U+2283 ISOtech
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry?It is in ISOamsn --> <!ENTITY nsub": 8836, //not a subset of, U+2284 ISOamsn
sube: 8838, // subset of or equal to, U+2286 ISOtech
supe: 8839, // superset of or equal to,U+2287 ISOtech
oplus: 8853, // circled plus = direct sum,U+2295 ISOamsb
otimes: 8855, // circled times = vector product,U+2297 ISOamsb
perp: 8869, // up tack = orthogonal to = perpendicular,U+22A5 ISOtech
sdot: 8901, // dot operator, U+22C5 ISOamsb
// dot operator is NOT the same character as U+00B7 middle dot
// Miscellaneous Technical
lceil: 8968, // left ceiling = apl upstile,U+2308 ISOamsc
rceil: 8969, // right ceiling, U+2309 ISOamsc
lfloor: 8970, // left floor = apl downstile,U+230A ISOamsc
rfloor: 8971, // right floor, U+230B ISOamsc
lang: 9001, // left-pointing angle bracket = bra,U+2329 ISOtech
// lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'
rang: 9002, // right-pointing angle bracket = ket,U+232A ISOtech
// rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'
// Geometric Shapes
loz: 9674, // lozenge, U+25CA ISOpub
// Miscellaneous Symbols
spades: 9824, // black spade suit, U+2660 ISOpub
// black here seems to mean filled as opposed to hollow
clubs: 9827, // black club suit = shamrock,U+2663 ISOpub
hearts: 9829, // black heart suit = valentine,U+2665 ISOpub
diams: 9830, // black diamond suit, U+2666 ISOpub
// Latin Extended-A
OElig: 338, // -- latin capital ligature OE,U+0152 ISOlat2
oelig: 339, // -- latin small ligature oe, U+0153 ISOlat2
// ligature is a misnomer, this is a separate character in some languages
Scaron: 352, // -- latin capital letter S with caron,U+0160 ISOlat2
scaron: 353, // -- latin small letter s with caron,U+0161 ISOlat2
Yuml: 376, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2
// Spacing Modifier Letters
circ: 710, // -- modifier letter circumflex accent,U+02C6 ISOpub
tilde: 732, // small tilde, U+02DC ISOdia
// General Punctuation
ensp: 8194, // en space, U+2002 ISOpub
emsp: 8195, // em space, U+2003 ISOpub
thinsp: 8201, // thin space, U+2009 ISOpub
zwnj: 8204, // zero width non-joiner,U+200C NEW RFC 2070
zwj: 8205, // zero width joiner, U+200D NEW RFC 2070
lrm: 8206, // left-to-right mark, U+200E NEW RFC 2070
rlm: 8207, // right-to-left mark, U+200F NEW RFC 2070
ndash: 8211, // en dash, U+2013 ISOpub
mdash: 8212, // em dash, U+2014 ISOpub
lsquo: 8216, // left single quotation mark,U+2018 ISOnum
rsquo: 8217, // right single quotation mark,U+2019 ISOnum
sbquo: 8218, // single low-9 quotation mark, U+201A NEW
ldquo: 8220, // left double quotation mark,U+201C ISOnum
rdquo: 8221, // right double quotation mark,U+201D ISOnum
bdquo: 8222, // double low-9 quotation mark, U+201E NEW
dagger: 8224, // dagger, U+2020 ISOpub
Dagger: 8225, // double dagger, U+2021 ISOpub
permil: 8240, // per mille sign, U+2030 ISOtech
lsaquo: 8249, // single left-pointing angle quotation mark,U+2039 ISO proposed
// lsaquo is proposed but not yet ISO standardized
rsaquo: 8250, // single right-pointing angle quotation mark,U+203A ISO proposed
// rsaquo is proposed but not yet ISO standardized
euro: 8364 // -- euro sign, U+20AC NEW
},
/**
* Escapes the characters in a <code>String</code> using HTML entities.
*
* For example: <tt>"bread" & "butter"</tt> => <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
* Supports all known HTML 4.0 entities, including funky accents.
*
* * <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
* * <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
* * <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
* * <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
*
* @param str {String} the String to escape
* @return {String} a new escaped String
* @see #unescape
*/
escape(str) {
return qx.util.StringEscape.escape(str, qx.bom.String.FROM_CHARCODE);
},
/**
* Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.
*
* For example, the string "&lt;Fran&ccedil;ais&gt;"
* will become "<Français>"
*
* If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&gt;&zzzz;x" will
* become ">&zzzz;x".
*
* @param str {String} the String to unescape, may be null
* @return {var} a new unescaped String
* @see #escape
*/
unescape(str) {
return qx.util.StringEscape.unescape(str, qx.bom.String.TO_CHARCODE);
},
/**
* Converts a plain text string into HTML.
* This is similar to {@link #escape} but converts new lines to
* <tt><:br>:</tt> and preserves whitespaces.
*
* @param str {String} the String to convert
* @return {String} a new converted String
* @see #escape
*/
fromText(str) {
return qx.bom.String.escape(str).replace(/( |\n)/g, function (chr) {
var map = {
" ": " ",
"\n": "<br>"
};
return map[chr] || chr;
});
},
/**
* Converts HTML to plain text.
*
* * Strips all HTML tags
* * converts <tt><:br>:</tt> to new line
* * unescapes HTML entities
*
* @param str {String} HTML string to converts
* @return {String} plain text representation of the HTML string
*/
toText(str) {
return qx.bom.String.unescape(
str.replace(
/\s+|<([^>])+>/gi,
function (
chr
) //return qx.bom.String.unescape(str.replace(/<\/?[^>]+(>|$)/gi, function(chr)
{
if (chr.indexOf("<br") === 0) {
return "\n";
} else if (
chr.length > 0 &&
chr.replace(/^\s*/, "").replace(/\s*$/, "") == ""
) {
return " ";
} else {
return "";
}
}
)
);
}
},
/*
*****************************************************************************
DEFER
*****************************************************************************
*/
defer(statics) {
/** Mapping of char codes to HTML entity names */
statics.FROM_CHARCODE = qx.lang.Object.invert(statics.TO_CHARCODE);
}
});