1 | /* | |
2 | * $Id: EntitiesToUnicode.java 3373 2008-05-12 16:21:24Z xlv $ | |
3 | * | |
4 | * Copyright 2003-2007 Paulo Soares and Bruno Lowagie. | |
5 | * | |
6 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
7 | * (the "License"); you may not use this file except in compliance with the License. | |
8 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
9 | * | |
10 | * Software distributed under the License is distributed on an "AS IS" basis, | |
11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
12 | * for the specific language governing rights and limitations under the License. | |
13 | * | |
14 | * The Original Code is 'iText, a free JAVA-PDF library'. | |
15 | * | |
16 | * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by | |
17 | * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. | |
18 | * All Rights Reserved. | |
19 | * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer | |
20 | * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. | |
21 | * | |
22 | * Contributor(s): all the names of the contributors are added in the source code | |
23 | * where applicable. | |
24 | * | |
25 | * Alternatively, the contents of this file may be used under the terms of the | |
26 | * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the | |
27 | * provisions of LGPL are applicable instead of those above. If you wish to | |
28 | * allow use of your version of this file only under the terms of the LGPL | |
29 | * License and not to allow others to use your version of this file under | |
30 | * the MPL, indicate your decision by deleting the provisions above and | |
31 | * replace them with the notice and other provisions required by the LGPL. | |
32 | * If you do not delete the provisions above, a recipient may use your version | |
33 | * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. | |
34 | * | |
35 | * This library is free software; you can redistribute it and/or modify it | |
36 | * under the terms of the MPL as stated above or under the terms of the GNU | |
37 | * Library General Public License as published by the Free Software Foundation; | |
38 | * either version 2 of the License, or any later version. | |
39 | * | |
40 | * This library is distributed in the hope that it will be useful, but WITHOUT | |
41 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
42 | * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more | |
43 | * details. | |
44 | * | |
45 | * If you didn't download this code from the following link, you should check if | |
46 | * you aren't using an obsolete version: | |
47 | * http://www.lowagie.com/iText/ | |
48 | */ | |
49 | ||
50 | package com.lowagie.text.xml.simpleparser; | |
51 | ||
52 | import java.util.HashMap; | |
53 | ||
54 | /** | |
55 | * This class contains entities that can be used in an entity tag. | |
56 | */ | |
57 | ||
58 | public class EntitiesToUnicode { | |
59 | | |
60 | /** | |
61 | * This is a map that contains the names of entities and their unicode value. | |
62 | */ | |
63 | public static final HashMap map = new HashMap(); | |
64 | static { | |
65 | map.put("nbsp", '\u00a0'); // no-break space = non-breaking space, U+00A0 ISOnum | |
66 | map.put("iexcl", '\u00a1'); // inverted exclamation mark, U+00A1 ISOnum | |
67 | map.put("cent", '\u00a2'); // cent sign, U+00A2 ISOnum | |
68 | map.put("pound", '\u00a3'); // pound sign, U+00A3 ISOnum | |
69 | map.put("curren", '\u00a4'); // currency sign, U+00A4 ISOnum | |
70 | map.put("yen", '\u00a5'); // yen sign = yuan sign, U+00A5 ISOnum | |
71 | map.put("brvbar", '\u00a6'); // broken bar = broken vertical bar, U+00A6 ISOnum | |
72 | map.put("sect", '\u00a7'); // section sign, U+00A7 ISOnum | |
73 | map.put("uml", '\u00a8'); // diaeresis = spacing diaeresis, U+00A8 ISOdia | |
74 | map.put("copy", '\u00a9'); // copyright sign, U+00A9 ISOnum | |
75 | map.put("ordf", '\u00aa'); // feminine ordinal indicator, U+00AA ISOnum | |
76 | map.put("laquo", '\u00ab'); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum | |
77 | map.put("not", '\u00ac'); // not sign, U+00AC ISOnum | |
78 | map.put("shy", '\u00ad'); // soft hyphen = discretionary hyphen, U+00AD ISOnum | |
79 | map.put("reg", '\u00ae'); // registered sign = registered trade mark sign, U+00AE ISOnum | |
80 | map.put("macr", '\u00af'); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia | |
81 | map.put("deg", '\u00b0'); // degree sign, U+00B0 ISOnum | |
82 | map.put("plusmn", '\u00b1'); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum | |
83 | map.put("sup2", '\u00b2'); // superscript two = superscript digit two = squared, U+00B2 ISOnum | |
84 | map.put("sup3", '\u00b3'); // superscript three = superscript digit three = cubed, U+00B3 ISOnum | |
85 | map.put("acute", '\u00b4'); // acute accent = spacing acute, U+00B4 ISOdia | |
86 | map.put("micro", '\u00b5'); // micro sign, U+00B5 ISOnum | |
87 | map.put("para", '\u00b6'); // pilcrow sign = paragraph sign, U+00B6 ISOnum | |
88 | map.put("middot", '\u00b7'); // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum | |
89 | map.put("cedil", '\u00b8'); // cedilla = spacing cedilla, U+00B8 ISOdia | |
90 | map.put("sup1", '\u00b9'); // superscript one = superscript digit one, U+00B9 ISOnum | |
91 | map.put("ordm", '\u00ba'); // masculine ordinal indicator, U+00BA ISOnum | |
92 | map.put("raquo", '\u00bb'); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum | |
93 | map.put("frac14", '\u00bc'); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum | |
94 | map.put("frac12", '\u00bd'); // vulgar fraction one half = fraction one half, U+00BD ISOnum | |
95 | map.put("frac34", '\u00be'); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum | |
96 | map.put("iquest", '\u00bf'); // inverted question mark = turned question mark, U+00BF ISOnum | |
97 | map.put("Agrave", '\u00c0'); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 | |
98 | map.put("Aacute", '\u00c1'); // latin capital letter A with acute, U+00C1 ISOlat1 | |
99 | map.put("Acirc", '\u00c2'); // latin capital letter A with circumflex, U+00C2 ISOlat1 | |
100 | map.put("Atilde", '\u00c3'); // latin capital letter A with tilde, U+00C3 ISOlat1 | |
101 | map.put("Auml", '\u00c4'); // latin capital letter A with diaeresis, U+00C4 ISOlat1 | |
102 | map.put("Aring", '\u00c5'); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 | |
103 | map.put("AElig", '\u00c6'); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 | |
104 | map.put("Ccedil", '\u00c7'); // latin capital letter C with cedilla, U+00C7 ISOlat1 | |
105 | map.put("Egrave", '\u00c8'); // latin capital letter E with grave, U+00C8 ISOlat1 | |
106 | map.put("Eacute", '\u00c9'); // latin capital letter E with acute, U+00C9 ISOlat1 | |
107 | map.put("Ecirc", '\u00ca'); // latin capital letter E with circumflex, U+00CA ISOlat1 | |
108 | map.put("Euml", '\u00cb'); // latin capital letter E with diaeresis, U+00CB ISOlat1 | |
109 | map.put("Igrave", '\u00cc'); // latin capital letter I with grave, U+00CC ISOlat1 | |
110 | map.put("Iacute", '\u00cd'); // latin capital letter I with acute, U+00CD ISOlat1 | |
111 | map.put("Icirc", '\u00ce'); // latin capital letter I with circumflex, U+00CE ISOlat1 | |
112 | map.put("Iuml", '\u00cf'); // latin capital letter I with diaeresis, U+00CF ISOlat1 | |
113 | map.put("ETH", '\u00d0'); // latin capital letter ETH, U+00D0 ISOlat1 | |
114 | map.put("Ntilde", '\u00d1'); // latin capital letter N with tilde, U+00D1 ISOlat1 | |
115 | map.put("Ograve", '\u00d2'); // latin capital letter O with grave, U+00D2 ISOlat1 | |
116 | map.put("Oacute", '\u00d3'); // latin capital letter O with acute, U+00D3 ISOlat1 | |
117 | map.put("Ocirc", '\u00d4'); // latin capital letter O with circumflex, U+00D4 ISOlat1 | |
118 | map.put("Otilde", '\u00d5'); // latin capital letter O with tilde, U+00D5 ISOlat1 | |
119 | map.put("Ouml", '\u00d6'); // latin capital letter O with diaeresis, U+00D6 ISOlat1 | |
120 | map.put("times", '\u00d7'); // multiplication sign, U+00D7 ISOnum | |
121 | map.put("Oslash", '\u00d8'); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 | |
122 | map.put("Ugrave", '\u00d9'); // latin capital letter U with grave, U+00D9 ISOlat1 | |
123 | map.put("Uacute", '\u00da'); // latin capital letter U with acute, U+00DA ISOlat1 | |
124 | map.put("Ucirc", '\u00db'); // latin capital letter U with circumflex, U+00DB ISOlat1 | |
125 | map.put("Uuml", '\u00dc'); // latin capital letter U with diaeresis, U+00DC ISOlat1 | |
126 | map.put("Yacute", '\u00dd'); // latin capital letter Y with acute, U+00DD ISOlat1 | |
127 | map.put("THORN", '\u00de'); // latin capital letter THORN, U+00DE ISOlat1 | |
128 | map.put("szlig", '\u00df'); // latin small letter sharp s = ess-zed, U+00DF ISOlat1 | |
129 | map.put("agrave", '\u00e0'); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 | |
130 | map.put("aacute", '\u00e1'); // latin small letter a with acute, U+00E1 ISOlat1 | |
131 | map.put("acirc", '\u00e2'); // latin small letter a with circumflex, U+00E2 ISOlat1 | |
132 | map.put("atilde", '\u00e3'); // latin small letter a with tilde, U+00E3 ISOlat1 | |
133 | map.put("auml", '\u00e4'); // latin small letter a with diaeresis, U+00E4 ISOlat1 | |
134 | map.put("aring", '\u00e5'); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 | |
135 | map.put("aelig", '\u00e6'); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 | |
136 | map.put("ccedil", '\u00e7'); // latin small letter c with cedilla, U+00E7 ISOlat1 | |
137 | map.put("egrave", '\u00e8'); // latin small letter e with grave, U+00E8 ISOlat1 | |
138 | map.put("eacute", '\u00e9'); // latin small letter e with acute, U+00E9 ISOlat1 | |
139 | map.put("ecirc", '\u00ea'); // latin small letter e with circumflex, U+00EA ISOlat1 | |
140 | map.put("euml", '\u00eb'); // latin small letter e with diaeresis, U+00EB ISOlat1 | |
141 | map.put("igrave", '\u00ec'); // latin small letter i with grave, U+00EC ISOlat1 | |
142 | map.put("iacute", '\u00ed'); // latin small letter i with acute, U+00ED ISOlat1 | |
143 | map.put("icirc", '\u00ee'); // latin small letter i with circumflex, U+00EE ISOlat1 | |
144 | map.put("iuml", '\u00ef'); // latin small letter i with diaeresis, U+00EF ISOlat1 | |
145 | map.put("eth", '\u00f0'); // latin small letter eth, U+00F0 ISOlat1 | |
146 | map.put("ntilde", '\u00f1'); // latin small letter n with tilde, U+00F1 ISOlat1 | |
147 | map.put("ograve", '\u00f2'); // latin small letter o with grave, U+00F2 ISOlat1 | |
148 | map.put("oacute", '\u00f3'); // latin small letter o with acute, U+00F3 ISOlat1 | |
149 | map.put("ocirc", '\u00f4'); // latin small letter o with circumflex, U+00F4 ISOlat1 | |
150 | map.put("otilde", '\u00f5'); // latin small letter o with tilde, U+00F5 ISOlat1 | |
151 | map.put("ouml", '\u00f6'); // latin small letter o with diaeresis, U+00F6 ISOlat1 | |
152 | map.put("divide", '\u00f7'); // division sign, U+00F7 ISOnum | |
153 | map.put("oslash", '\u00f8'); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 | |
154 | map.put("ugrave", '\u00f9'); // latin small letter u with grave, U+00F9 ISOlat1 | |
155 | map.put("uacute", '\u00fa'); // latin small letter u with acute, U+00FA ISOlat1 | |
156 | map.put("ucirc", '\u00fb'); // latin small letter u with circumflex, U+00FB ISOlat1 | |
157 | map.put("uuml", '\u00fc'); // latin small letter u with diaeresis, U+00FC ISOlat1 | |
158 | map.put("yacute", '\u00fd'); // latin small letter y with acute, U+00FD ISOlat1 | |
159 | map.put("thorn", '\u00fe'); // latin small letter thorn, U+00FE ISOlat1 | |
160 | map.put("yuml", '\u00ff'); // latin small letter y with diaeresis, U+00FF ISOlat1 | |
161 | // Latin Extended-B | |
162 | map.put("fnof", '\u0192'); // latin small f with hook = function = florin, U+0192 ISOtech | |
163 | // Greek | |
164 | map.put("Alpha", '\u0391'); // greek capital letter alpha, U+0391 | |
165 | map.put("Beta", '\u0392'); // greek capital letter beta, U+0392 | |
166 | map.put("Gamma", '\u0393'); // greek capital letter gamma, U+0393 ISOgrk3 | |
167 | map.put("Delta", '\u0394'); // greek capital letter delta, U+0394 ISOgrk3 | |
168 | map.put("Epsilon", '\u0395'); // greek capital letter epsilon, U+0395 | |
169 | map.put("Zeta", '\u0396'); // greek capital letter zeta, U+0396 | |
170 | map.put("Eta", '\u0397'); // greek capital letter eta, U+0397 | |
171 | map.put("Theta", '\u0398'); // greek capital letter theta, U+0398 ISOgrk3 | |
172 | map.put("Iota", '\u0399'); // greek capital letter iota, U+0399 | |
173 | map.put("Kappa", '\u039a'); // greek capital letter kappa, U+039A | |
174 | map.put("Lambda", '\u039b'); // greek capital letter lambda, U+039B ISOgrk3 | |
175 | map.put("Mu", '\u039c'); // greek capital letter mu, U+039C | |
176 | map.put("Nu", '\u039d'); // greek capital letter nu, U+039D | |
177 | map.put("Xi", '\u039e'); // greek capital letter xi, U+039E ISOgrk3 | |
178 | map.put("Omicron", '\u039f'); // greek capital letter omicron, U+039F | |
179 | map.put("Pi", '\u03a0'); // greek capital letter pi, U+03A0 ISOgrk3 | |
180 | map.put("Rho", '\u03a1'); // greek capital letter rho, U+03A1 | |
181 | // there is no Sigmaf, and no U+03A2 character either | |
182 | map.put("Sigma", '\u03a3'); // greek capital letter sigma, U+03A3 ISOgrk3 | |
183 | map.put("Tau", '\u03a4'); // greek capital letter tau, U+03A4 | |
184 | map.put("Upsilon", '\u03a5'); // greek capital letter upsilon, U+03A5 ISOgrk3 | |
185 | map.put("Phi", '\u03a6'); // greek capital letter phi, U+03A6 ISOgrk3 | |
186 | map.put("Chi", '\u03a7'); // greek capital letter chi, U+03A7 | |
187 | map.put("Psi", '\u03a8'); // greek capital letter psi, U+03A8 ISOgrk3 | |
188 | map.put("Omega", '\u03a9'); // greek capital letter omega, U+03A9 ISOgrk3 | |
189 | map.put("alpha", '\u03b1'); // greek small letter alpha, U+03B1 ISOgrk3 | |
190 | map.put("beta", '\u03b2'); // greek small letter beta, U+03B2 ISOgrk3 | |
191 | map.put("gamma", '\u03b3'); // greek small letter gamma, U+03B3 ISOgrk3 | |
192 | map.put("delta", '\u03b4'); // greek small letter delta, U+03B4 ISOgrk3 | |
193 | map.put("epsilon", '\u03b5'); // greek small letter epsilon, U+03B5 ISOgrk3 | |
194 | map.put("zeta", '\u03b6'); // greek small letter zeta, U+03B6 ISOgrk3 | |
195 | map.put("eta", '\u03b7'); // greek small letter eta, U+03B7 ISOgrk3 | |
196 | map.put("theta", '\u03b8'); // greek small letter theta, U+03B8 ISOgrk3 | |
197 | map.put("iota", '\u03b9'); // greek small letter iota, U+03B9 ISOgrk3 | |
198 | map.put("kappa", '\u03ba'); // greek small letter kappa, U+03BA ISOgrk3 | |
199 | map.put("lambda", '\u03bb'); // greek small letter lambda, U+03BB ISOgrk3 | |
200 | map.put("mu", '\u03bc'); // greek small letter mu, U+03BC ISOgrk3 | |
201 | map.put("nu", '\u03bd'); // greek small letter nu, U+03BD ISOgrk3 | |
202 | map.put("xi", '\u03be'); // greek small letter xi, U+03BE ISOgrk3 | |
203 | map.put("omicron", '\u03bf'); // greek small letter omicron, U+03BF NEW | |
204 | map.put("pi", '\u03c0'); // greek small letter pi, U+03C0 ISOgrk3 | |
205 | map.put("rho", '\u03c1'); // greek small letter rho, U+03C1 ISOgrk3 | |
206 | map.put("sigmaf", '\u03c2'); // greek small letter final sigma, U+03C2 ISOgrk3 | |
207 | map.put("sigma", '\u03c3'); // greek small letter sigma, U+03C3 ISOgrk3 | |
208 | map.put("tau", '\u03c4'); // greek small letter tau, U+03C4 ISOgrk3 | |
209 | map.put("upsilon", '\u03c5'); // greek small letter upsilon, U+03C5 ISOgrk3 | |
210 | map.put("phi", '\u03c6'); // greek small letter phi, U+03C6 ISOgrk3 | |
211 | map.put("chi", '\u03c7'); // greek small letter chi, U+03C7 ISOgrk3 | |
212 | map.put("psi", '\u03c8'); // greek small letter psi, U+03C8 ISOgrk3 | |
213 | map.put("omega", '\u03c9'); // greek small letter omega, U+03C9 ISOgrk3 | |
214 | map.put("thetasym", '\u03d1'); // greek small letter theta symbol, U+03D1 NEW | |
215 | map.put("upsih", '\u03d2'); // greek upsilon with hook symbol, U+03D2 NEW | |
216 | map.put("piv", '\u03d6'); // greek pi symbol, U+03D6 ISOgrk3 | |
217 | // General Punctuation | |
218 | map.put("bull", '\u2022'); // bullet = black small circle, U+2022 ISOpub | |
219 | // bullet is NOT the same as bullet operator, U+2219 | |
220 | map.put("hellip", '\u2026'); // horizontal ellipsis = three dot leader, U+2026 ISOpub | |
221 | map.put("prime", '\u2032'); // prime = minutes = feet, U+2032 ISOtech | |
222 | map.put("Prime", '\u2033'); // double prime = seconds = inches, U+2033 ISOtech | |
223 | map.put("oline", '\u203e'); // overline = spacing overscore, U+203E NEW | |
224 | map.put("frasl", '\u2044'); // fraction slash, U+2044 NEW | |
225 | // Letterlike Symbols | |
226 | map.put("weierp", '\u2118'); // script capital P = power set = Weierstrass p, U+2118 ISOamso | |
227 | map.put("image", '\u2111'); // blackletter capital I = imaginary part, U+2111 ISOamso | |
228 | map.put("real", '\u211c'); // blackletter capital R = real part symbol, U+211C ISOamso | |
229 | map.put("trade", '\u2122'); // trade mark sign, U+2122 ISOnum | |
230 | map.put("alefsym", '\u2135'); // alef symbol = first transfinite cardinal, U+2135 NEW | |
231 | // alef symbol is NOT the same as hebrew letter alef, | |
232 | // U+05D0 although the same glyph could be used to depict both characters | |
233 | // Arrows | |
234 | map.put("larr", '\u2190'); // leftwards arrow, U+2190 ISOnum | |
235 | map.put("uarr", '\u2191'); // upwards arrow, U+2191 ISOnum | |
236 | map.put("rarr", '\u2192'); // rightwards arrow, U+2192 ISOnum | |
237 | map.put("darr", '\u2193'); // downwards arrow, U+2193 ISOnum | |
238 | map.put("harr", '\u2194'); // left right arrow, U+2194 ISOamsa | |
239 | map.put("crarr", '\u21b5'); // downwards arrow with corner leftwards = carriage return, U+21B5 NEW | |
240 | map.put("lArr", '\u21d0'); // leftwards double arrow, U+21D0 ISOtech | |
241 | // ISO 10646 does not say that lArr is the same as the 'is implied by' arrow | |
242 | // but also does not have any other character for that function. So ? lArr can | |
243 | // be used for 'is implied by' as ISOtech suggests | |
244 | map.put("uArr", '\u21d1'); // upwards double arrow, U+21D1 ISOamsa | |
245 | map.put("rArr", '\u21d2'); // rightwards double arrow, U+21D2 ISOtech | |
246 | // ISO 10646 does not say this is the 'implies' character but does not have | |
247 | // another character with this function so ? | |
248 | // rArr can be used for 'implies' as ISOtech suggests | |
249 | map.put("dArr", '\u21d3'); // downwards double arrow, U+21D3 ISOamsa | |
250 | map.put("hArr", '\u21d4'); // left right double arrow, U+21D4 ISOamsa | |
251 | // Mathematical Operators | |
252 | map.put("forall", '\u2200'); // for all, U+2200 ISOtech | |
253 | map.put("part", '\u2202'); // partial differential, U+2202 ISOtech | |
254 | map.put("exist", '\u2203'); // there exists, U+2203 ISOtech | |
255 | map.put("empty", '\u2205'); // empty set = null set = diameter, U+2205 ISOamso | |
256 | map.put("nabla", '\u2207'); // nabla = backward difference, U+2207 ISOtech | |
257 | map.put("isin", '\u2208'); // element of, U+2208 ISOtech | |
258 | map.put("notin", '\u2209'); // not an element of, U+2209 ISOtech | |
259 | map.put("ni", '\u220b'); // contains as member, U+220B ISOtech | |
260 | // should there be a more memorable name than 'ni'? | |
261 | map.put("prod", '\u220f'); // n-ary product = product sign, U+220F ISOamsb | |
262 | // prod is NOT the same character as U+03A0 'greek capital letter pi' though | |
263 | // the same glyph might be used for both | |
264 | map.put("sum", '\u2211'); // n-ary sumation, U+2211 ISOamsb | |
265 | // sum is NOT the same character as U+03A3 'greek capital letter sigma' | |
266 | // though the same glyph might be used for both | |
267 | map.put("minus", '\u2212'); // minus sign, U+2212 ISOtech | |
268 | map.put("lowast", '\u2217'); // asterisk operator, U+2217 ISOtech | |
269 | map.put("radic", '\u221a'); // square root = radical sign, U+221A ISOtech | |
270 | map.put("prop", '\u221d'); // proportional to, U+221D ISOtech | |
271 | map.put("infin", '\u221e'); // infinity, U+221E ISOtech | |
272 | map.put("ang", '\u2220'); // angle, U+2220 ISOamso | |
273 | map.put("and", '\u2227'); // logical and = wedge, U+2227 ISOtech | |
274 | map.put("or", '\u2228'); // logical or = vee, U+2228 ISOtech | |
275 | map.put("cap", '\u2229'); // intersection = cap, U+2229 ISOtech | |
276 | map.put("cup", '\u222a'); // union = cup, U+222A ISOtech | |
277 | map.put("int", '\u222b'); // integral, U+222B ISOtech | |
278 | map.put("there4", '\u2234'); // therefore, U+2234 ISOtech | |
279 | map.put("sim", '\u223c'); // tilde operator = varies with = similar to, U+223C ISOtech | |
280 | // tilde operator is NOT the same character as the tilde, U+007E, | |
281 | // although the same glyph might be used to represent both | |
282 | map.put("cong", '\u2245'); // approximately equal to, U+2245 ISOtech | |
283 | map.put("asymp", '\u2248'); // almost equal to = asymptotic to, U+2248 ISOamsr | |
284 | map.put("ne", '\u2260'); // not equal to, U+2260 ISOtech | |
285 | map.put("equiv", '\u2261'); // identical to, U+2261 ISOtech | |
286 | map.put("le", '\u2264'); // less-than or equal to, U+2264 ISOtech | |
287 | map.put("ge", '\u2265'); // greater-than or equal to, U+2265 ISOtech | |
288 | map.put("sub", '\u2282'); // subset of, U+2282 ISOtech | |
289 | map.put("sup", '\u2283'); // superset of, U+2283 ISOtech | |
290 | // note that nsup, 'not a superset of, U+2283' is not covered by the Symbol | |
291 | // font encoding and is not included. Should it be, for symmetry? | |
292 | // It is in ISOamsn | |
293 | map.put("nsub", '\u2284'); // not a subset of, U+2284 ISOamsn | |
294 | map.put("sube", '\u2286'); // subset of or equal to, U+2286 ISOtech | |
295 | map.put("supe", '\u2287'); // superset of or equal to, U+2287 ISOtech | |
296 | map.put("oplus", '\u2295'); // circled plus = direct sum, U+2295 ISOamsb | |
297 | map.put("otimes", '\u2297'); // circled times = vector product, U+2297 ISOamsb | |
298 | map.put("perp", '\u22a5'); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech | |
299 | map.put("sdot", '\u22c5'); // dot operator, U+22C5 ISOamsb | |
300 | // dot operator is NOT the same character as U+00B7 middle dot | |
301 | // Miscellaneous Technical | |
302 | map.put("lceil", '\u2308'); // left ceiling = apl upstile, U+2308 ISOamsc | |
303 | map.put("rceil", '\u2309'); // right ceiling, U+2309 ISOamsc | |
304 | map.put("lfloor", '\u230a'); // left floor = apl downstile, U+230A ISOamsc | |
305 | map.put("rfloor", '\u230b'); // right floor, U+230B ISOamsc | |
306 | map.put("lang", '\u2329'); // left-pointing angle bracket = bra, U+2329 ISOtech | |
307 | // lang is NOT the same character as U+003C 'less than' | |
308 | // or U+2039 'single left-pointing angle quotation mark' | |
309 | map.put("rang", '\u232a'); // right-pointing angle bracket = ket, U+232A ISOtech | |
310 | // rang is NOT the same character as U+003E 'greater than' | |
311 | // or U+203A 'single right-pointing angle quotation mark' | |
312 | // Geometric Shapes | |
313 | map.put("loz", '\u25ca'); // lozenge, U+25CA ISOpub | |
314 | // Miscellaneous Symbols | |
315 | map.put("spades", '\u2660'); // black spade suit, U+2660 ISOpub | |
316 | // black here seems to mean filled as opposed to hollow | |
317 | map.put("clubs", '\u2663'); // black club suit = shamrock, U+2663 ISOpub | |
318 | map.put("hearts", '\u2665'); // black heart suit = valentine, U+2665 ISOpub | |
319 | map.put("diams", '\u2666'); // black diamond suit, U+2666 ISOpub | |
320 | // C0 Controls and Basic Latin | |
321 | map.put("quot", '\u0022'); // quotation mark = APL quote, U+0022 ISOnum | |
322 | map.put("amp", '\u0026'); // ampersand, U+0026 ISOnum | |
323 | map.put("apos", '\''); | |
324 | map.put("lt", '\u003c'); // less-than sign, U+003C ISOnum | |
325 | map.put("gt", '\u003e'); // greater-than sign, U+003E ISOnum | |
326 | // Latin Extended-A | |
327 | map.put("OElig", '\u0152'); // latin capital ligature OE, U+0152 ISOlat2 | |
328 | map.put("oelig", '\u0153'); // latin small ligature oe, U+0153 ISOlat2 | |
329 | // ligature is a misnomer, this is a separate character in some languages | |
330 | map.put("Scaron", '\u0160'); // latin capital letter S with caron, U+0160 ISOlat2 | |
331 | map.put("scaron", '\u0161'); // latin small letter s with caron, U+0161 ISOlat2 | |
332 | map.put("Yuml", '\u0178'); // latin capital letter Y with diaeresis, U+0178 ISOlat2 | |
333 | // Spacing Modifier Letters | |
334 | map.put("circ", '\u02c6'); // modifier letter circumflex accent, U+02C6 ISOpub | |
335 | map.put("tilde", '\u02dc'); // small tilde, U+02DC ISOdia | |
336 | // General Punctuation | |
337 | map.put("ensp", '\u2002'); // en space, U+2002 ISOpub | |
338 | map.put("emsp", '\u2003'); // em space, U+2003 ISOpub | |
339 | map.put("thinsp", '\u2009'); // thin space, U+2009 ISOpub | |
340 | map.put("zwnj", '\u200c'); // zero width non-joiner, U+200C NEW RFC 2070 | |
341 | map.put("zwj", '\u200d'); // zero width joiner, U+200D NEW RFC 2070 | |
342 | map.put("lrm", '\u200e'); // left-to-right mark, U+200E NEW RFC 2070 | |
343 | map.put("rlm", '\u200f'); // right-to-left mark, U+200F NEW RFC 2070 | |
344 | map.put("ndash", '\u2013'); // en dash, U+2013 ISOpub | |
345 | map.put("mdash", '\u2014'); // em dash, U+2014 ISOpub | |
346 | map.put("lsquo", '\u2018'); // left single quotation mark, U+2018 ISOnum | |
347 | map.put("rsquo", '\u2019'); // right single quotation mark, U+2019 ISOnum | |
348 | map.put("sbquo", '\u201a'); // single low-9 quotation mark, U+201A NEW | |
349 | map.put("ldquo", '\u201c'); // left double quotation mark, U+201C ISOnum | |
350 | map.put("rdquo", '\u201d'); // right double quotation mark, U+201D ISOnum | |
351 | map.put("bdquo", '\u201e'); // double low-9 quotation mark, U+201E NEW | |
352 | map.put("dagger", '\u2020'); // dagger, U+2020 ISOpub | |
353 | map.put("Dagger", '\u2021'); // double dagger, U+2021 ISOpub | |
354 | map.put("permil", '\u2030'); // per mille sign, U+2030 ISOtech | |
355 | map.put("lsaquo", '\u2039'); // single left-pointing angle quotation mark, U+2039 ISO proposed | |
356 | // lsaquo is proposed but not yet ISO standardized | |
357 | map.put("rsaquo", '\u203a'); // single right-pointing angle quotation mark, U+203A ISO proposed | |
358 | // rsaquo is proposed but not yet ISO standardized | |
359 | map.put("euro", '\u20ac'); // euro sign, U+20AC NEW | |
360 | } | |
361 | | |
362 | ||
363 | /** | |
364 | * Translates an entity to a unicode character. | |
365 | * | |
366 | * @param name the name of the entity | |
367 | * @return the corresponding unicode character | |
368 | */ | |
369 | public static char decodeEntity(String name) { | |
370 |
1
1. decodeEntity : negated conditional → NO_COVERAGE |
if (name.startsWith("#x")) { |
371 | try { | |
372 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return (char)Integer.parseInt(name.substring(2),16); |
373 | } | |
374 | catch(NumberFormatException nfe) { | |
375 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return '\0'; |
376 | } | |
377 | } | |
378 |
1
1. decodeEntity : negated conditional → NO_COVERAGE |
if (name.startsWith("#")) { |
379 | try { | |
380 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return (char)Integer.parseInt(name.substring(1)); |
381 | } | |
382 | catch(NumberFormatException nfe) { | |
383 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return '\0'; |
384 | } | |
385 | } | |
386 | Character c = (Character)map.get(name); | |
387 |
1
1. decodeEntity : negated conditional → NO_COVERAGE |
if (c == null) |
388 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return '\0'; |
389 | else | |
390 |
1
1. decodeEntity : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return c; |
391 | } | |
392 | | |
393 | /** | |
394 | * Translates a String with entities (&...;) to a String without entities, | |
395 | * replacing the entity with the right (unicode) character. | |
396 | */ | |
397 | public static String decodeString(String s) { | |
398 | int pos_amp = s.indexOf('&'); | |
399 |
2
1. decodeString : negated conditional → NO_COVERAGE 2. decodeString : mutated return of Object value for com/lowagie/text/xml/simpleparser/EntitiesToUnicode::decodeString to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
if (pos_amp == -1) return s; |
400 | | |
401 | int pos_sc; | |
402 | int pos_a; | |
403 | StringBuilder buf = new StringBuilder(s.substring(0, pos_amp)); | |
404 | char replace; | |
405 | while (true) { | |
406 | pos_sc = s.indexOf(';', pos_amp); | |
407 |
1
1. decodeString : negated conditional → NO_COVERAGE |
if (pos_sc == -1) { |
408 | buf.append(s.substring(pos_amp)); | |
409 |
1
1. decodeString : mutated return of Object value for com/lowagie/text/xml/simpleparser/EntitiesToUnicode::decodeString to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return buf.toString(); |
410 | } | |
411 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
pos_a = s.indexOf('&', pos_amp + 1); |
412 |
3
1. decodeString : changed conditional boundary → NO_COVERAGE 2. decodeString : negated conditional → NO_COVERAGE 3. decodeString : negated conditional → NO_COVERAGE |
while (pos_a != -1 && pos_a < pos_sc) { |
413 | buf.append(s, pos_amp, pos_a); | |
414 | pos_amp = pos_a; | |
415 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
pos_a = s.indexOf('&', pos_amp + 1); |
416 | } | |
417 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
replace = decodeEntity(s.substring(pos_amp + 1, pos_sc)); |
418 |
3
1. decodeString : changed conditional boundary → NO_COVERAGE 2. decodeString : Replaced integer addition with subtraction → NO_COVERAGE 3. decodeString : negated conditional → NO_COVERAGE |
if (s.length() < pos_sc + 1) { |
419 |
1
1. decodeString : mutated return of Object value for com/lowagie/text/xml/simpleparser/EntitiesToUnicode::decodeString to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return buf.toString(); |
420 | } | |
421 |
1
1. decodeString : negated conditional → NO_COVERAGE |
if (replace == '\0') { |
422 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
buf.append(s, pos_amp, pos_sc + 1); |
423 | } | |
424 | else { | |
425 | buf.append(replace); | |
426 | } | |
427 | pos_amp = s.indexOf('&', pos_sc); | |
428 |
1
1. decodeString : negated conditional → NO_COVERAGE |
if (pos_amp == -1) { |
429 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
buf.append(s.substring(pos_sc + 1)); |
430 |
1
1. decodeString : mutated return of Object value for com/lowagie/text/xml/simpleparser/EntitiesToUnicode::decodeString to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return buf.toString(); |
431 | } | |
432 | else { | |
433 |
1
1. decodeString : Replaced integer addition with subtraction → NO_COVERAGE |
buf.append(s, pos_sc + 1, pos_amp); |
434 | } | |
435 | } | |
436 | } | |
437 | } | |
Mutations | ||
370 |
1.1 |
|
372 |
1.1 |
|
375 |
1.1 |
|
378 |
1.1 |
|
380 |
1.1 |
|
383 |
1.1 |
|
387 |
1.1 |
|
388 |
1.1 |
|
390 |
1.1 |
|
399 |
1.1 2.2 |
|
407 |
1.1 |
|
409 |
1.1 |
|
411 |
1.1 |
|
412 |
1.1 2.2 3.3 |
|
415 |
1.1 |
|
417 |
1.1 |
|
418 |
1.1 2.2 3.3 |
|
419 |
1.1 |
|
421 |
1.1 |
|
422 |
1.1 |
|
428 |
1.1 |
|
429 |
1.1 |
|
430 |
1.1 |
|
433 |
1.1 |