1 | /* | |
2 | * Copyright 2003 Paulo Soares | |
3 | * | |
4 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
5 | * (the "License"); you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
7 | * | |
8 | * Software distributed under the License is distributed on an "AS IS" basis, | |
9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
10 | * for the specific language governing rights and limitations under the License. | |
11 | * | |
12 | * The Original Code is 'iText, a free JAVA-PDF library'. | |
13 | * | |
14 | * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by | |
15 | * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. | |
16 | * All Rights Reserved. | |
17 | * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer | |
18 | * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. | |
19 | * | |
20 | * Contributor(s): all the names of the contributors are added in the source code | |
21 | * where applicable. | |
22 | * | |
23 | * Alternatively, the contents of this file may be used under the terms of the | |
24 | * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the | |
25 | * provisions of LGPL are applicable instead of those above. If you wish to | |
26 | * allow use of your version of this file only under the terms of the LGPL | |
27 | * License and not to allow others to use your version of this file under | |
28 | * the MPL, indicate your decision by deleting the provisions above and | |
29 | * replace them with the notice and other provisions required by the LGPL. | |
30 | * If you do not delete the provisions above, a recipient may use your version | |
31 | * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. | |
32 | * | |
33 | * This library is free software; you can redistribute it and/or modify it | |
34 | * under the terms of the MPL as stated above or under the terms of the GNU | |
35 | * Library General Public License as published by the Free Software Foundation; | |
36 | * either version 2 of the License, or any later version. | |
37 | * | |
38 | * This library is distributed in the hope that it will be useful, but WITHOUT | |
39 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
40 | * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more | |
41 | * details. | |
42 | * | |
43 | */ | |
44 | package com.lowagie.text.xml.simpleparser; | |
45 | ||
46 | import com.lowagie.text.error_messages.MessageLocalization; | |
47 | import org.mozilla.universalchardet.UniversalDetector; | |
48 | ||
49 | import java.io.BufferedReader; | |
50 | import java.io.ByteArrayOutputStream; | |
51 | import java.io.IOException; | |
52 | import java.io.InputStream; | |
53 | import java.io.InputStreamReader; | |
54 | import java.io.Reader; | |
55 | import java.util.HashMap; | |
56 | import java.util.Stack; | |
57 | ||
58 | /** | |
59 | * A simple XML and HTML parser. This parser is, like the SAX parser, | |
60 | * an event based parser, but with much less functionality. | |
61 | * <p> | |
62 | * The parser can: | |
63 | * <p> | |
64 | * <ul> | |
65 | * <li>It recognizes the encoding used | |
66 | * <li>It recognizes all the elements' start tags and end tags | |
67 | * <li>It lists attributes, where attribute values can be enclosed in single or double quotes | |
68 | * <li>It recognizes the <code><[CDATA[ ... ]]></code> construct | |
69 | * <li>It recognizes the standard entities: &amp;, &lt;, &gt;, &quot;, and &apos;, as well as numeric entities | |
70 | * <li>It maps lines ending in <code>\r\n</code> and <code>\r</code> to <code>\n</code> on input, in accordance with the XML Specification, Section 2.11 | |
71 | * </ul> | |
72 | * <p> | |
73 | */ | |
74 | @Deprecated | |
75 | public final class SimpleXMLParser { | |
76 | /** possible states */ | |
77 | private final static int UNKNOWN = 0; | |
78 | private final static int TEXT = 1; | |
79 | private final static int TAG_ENCOUNTERED = 2; | |
80 | private final static int EXAMIN_TAG = 3; | |
81 | private final static int TAG_EXAMINED = 4; | |
82 | private final static int IN_CLOSETAG = 5; | |
83 | private final static int SINGLE_TAG = 6; | |
84 | private final static int CDATA = 7; | |
85 | private final static int COMMENT = 8; | |
86 | private final static int PI = 9; | |
87 | private final static int ENTITY = 10; | |
88 | private final static int QUOTE = 11; | |
89 | private final static int ATTRIBUTE_KEY = 12; | |
90 | private final static int ATTRIBUTE_EQUAL = 13; | |
91 | private final static int ATTRIBUTE_VALUE = 14; | |
92 | | |
93 | /** the state stack */ | |
94 | Stack stack; | |
95 | /** The current character. */ | |
96 | int character = 0; | |
97 | /** The previous character. */ | |
98 | int previousCharacter = -1; | |
99 | /** the line we are currently reading */ | |
100 | int lines = 1; | |
101 | /** the column where the current character occurs */ | |
102 | int columns = 0; | |
103 | /** was the last character equivalent to a newline? */ | |
104 | boolean eol = false; | |
105 | /** | |
106 | * A boolean indicating if the next character should be taken into account | |
107 | * if it's a space character. When nospace is false, the previous character | |
108 | * wasn't whitespace. | |
109 | * @since 2.1.5 | |
110 | */ | |
111 | boolean nowhite = false; | |
112 | /** the current state */ | |
113 | int state; | |
114 | /** Are we parsing HTML? */ | |
115 | boolean html; | |
116 | /** current text (whatever is encountered between tags) */ | |
117 | StringBuffer text = new StringBuffer(); | |
118 | /** current entity (whatever is encountered between & and ;) */ | |
119 | StringBuffer entity = new StringBuffer(); | |
120 | /** current tagname */ | |
121 | String tag = null; | |
122 | /** current attributes */ | |
123 | HashMap attributes = null; | |
124 | /** The handler to which we are going to forward document content */ | |
125 | SimpleXMLDocHandler doc; | |
126 | /** The handler to which we are going to forward comments. */ | |
127 | SimpleXMLDocHandlerComment comment; | |
128 | /** Keeps track of the number of tags that are open. */ | |
129 | int nested = 0; | |
130 | /** the quote character that was used to open the quote. */ | |
131 | int quoteCharacter = '"'; | |
132 | /** the attribute key. */ | |
133 | String attributekey = null; | |
134 | /** the attribute value. */ | |
135 | String attributevalue = null; | |
136 | | |
137 | /** | |
138 | * Creates a Simple XML parser object. | |
139 | * Call go(BufferedReader) immediately after creation. | |
140 | */ | |
141 | private SimpleXMLParser(SimpleXMLDocHandler doc, SimpleXMLDocHandlerComment comment, boolean html) { | |
142 | this.doc = doc; | |
143 | this.comment = comment; | |
144 | this.html = html; | |
145 | stack = new Stack(); | |
146 |
1
1. |
state = html ? TEXT : UNKNOWN; |
147 | } | |
148 | | |
149 | /** | |
150 | * Does the actual parsing. Perform this immediately | |
151 | * after creating the parser object. | |
152 | */ | |
153 | private void go(Reader r) throws IOException { | |
154 | BufferedReader reader; | |
155 |
1
1. go : negated conditional → NO_COVERAGE |
if (r instanceof BufferedReader) |
156 | reader = (BufferedReader)r; | |
157 | else | |
158 | reader = new BufferedReader(r); | |
159 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::startDocument → NO_COVERAGE |
doc.startDocument(); |
160 | while(true) { | |
161 | // read a new character | |
162 |
1
1. go : negated conditional → NO_COVERAGE |
if (previousCharacter == -1) { |
163 | character = reader.read(); | |
164 | } | |
165 | // or re-examine the previous character | |
166 | else { | |
167 | character = previousCharacter; | |
168 | previousCharacter = -1; | |
169 | } | |
170 | | |
171 | // the end of the file was reached | |
172 |
1
1. go : negated conditional → NO_COVERAGE |
if (character == -1) { |
173 |
1
1. go : negated conditional → NO_COVERAGE |
if (html) { |
174 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
if (html && state == TEXT) |
175 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
176 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::endDocument → NO_COVERAGE |
doc.endDocument(); |
177 | } else { | |
178 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::throwException → NO_COVERAGE |
throwException(MessageLocalization.getComposedMessage("missing.end.tag")); |
179 | } | |
180 | return; | |
181 | } | |
182 | | |
183 | // dealing with \n and \r | |
184 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
if (character == '\n' && eol) { |
185 | eol = false; | |
186 | continue; | |
187 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (eol) { |
188 | eol = false; | |
189 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (character == '\n') { |
190 |
1
1. go : Replaced integer addition with subtraction → NO_COVERAGE |
lines++; |
191 | columns = 0; | |
192 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (character == '\r') { |
193 | eol = true; | |
194 | character = '\n'; | |
195 |
1
1. go : Replaced integer addition with subtraction → NO_COVERAGE |
lines++; |
196 | columns = 0; | |
197 | } else { | |
198 |
1
1. go : Replaced integer addition with subtraction → NO_COVERAGE |
columns++; |
199 | } | |
200 | | |
201 | switch(state) { | |
202 | // we are in an unknown state before there's actual content | |
203 | case UNKNOWN: | |
204 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '<') { |
205 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::saveState → NO_COVERAGE |
saveState(TEXT); |
206 | state = TAG_ENCOUNTERED; | |
207 | } | |
208 | break; | |
209 | // we can encounter any content | |
210 | case TEXT: | |
211 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '<') { |
212 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
213 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::saveState → NO_COVERAGE |
saveState(state); |
214 | state = TAG_ENCOUNTERED; | |
215 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(character == '&') { |
216 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::saveState → NO_COVERAGE |
saveState(state); |
217 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
entity.setLength(0); |
218 | state = ENTITY; | |
219 | nowhite = true; | |
220 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (Character.isWhitespace((char)character)) { |
221 |
1
1. go : negated conditional → NO_COVERAGE |
if (nowhite) |
222 | text.append((char)character); | |
223 | nowhite = false; | |
224 | } else { | |
225 | text.append((char)character); | |
226 | nowhite = true; | |
227 | } | |
228 | break; | |
229 | // we have just seen a < and are wondering what we are looking at | |
230 | // <foo>, </foo>, <!-- ... --->, etc. | |
231 | case TAG_ENCOUNTERED: | |
232 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
233 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '/') { |
234 | state = IN_CLOSETAG; | |
235 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (character == '?') { |
236 | restoreState(); | |
237 | state = PI; | |
238 | } else { | |
239 | text.append((char)character); | |
240 | state = EXAMIN_TAG; | |
241 | } | |
242 | break; | |
243 | // we are processing something like this <foo ... >. | |
244 | // It could still be a <!-- ... --> or something. | |
245 | case EXAMIN_TAG: | |
246 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>') { |
247 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::doTag → NO_COVERAGE |
doTag(); |
248 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
249 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
250 | state = restoreState(); | |
251 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(character == '/') { |
252 | state = SINGLE_TAG; | |
253 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if(character == '-' && text.toString().equals("!-")) { |
254 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
255 | state = COMMENT; | |
256 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if(character == '[' && text.toString().equals("![CDATA")) { |
257 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
258 | state = CDATA; | |
259 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if(character == 'E' && text.toString().equals("!DOCTYP")) { |
260 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
261 | state = PI; | |
262 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(Character.isWhitespace((char)character)) { |
263 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::doTag → NO_COVERAGE |
doTag(); |
264 | state = TAG_EXAMINED; | |
265 | } else { | |
266 | text.append((char)character); | |
267 | } | |
268 | break; | |
269 | // we know the name of the tag now. | |
270 | case TAG_EXAMINED: | |
271 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>') { |
272 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
273 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
274 | state = restoreState(); | |
275 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(character == '/') { |
276 | state = SINGLE_TAG; | |
277 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(Character.isWhitespace((char)character)) { |
278 | // empty | |
279 | } else { | |
280 | text.append((char)character); | |
281 | state = ATTRIBUTE_KEY; | |
282 | } | |
283 | break; | |
284 | | |
285 | // we are processing a closing tag: e.g. </foo> | |
286 | case IN_CLOSETAG: | |
287 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>') { |
288 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::doTag → NO_COVERAGE |
doTag(); |
289 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(false); |
290 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
if(!html && nested==0) return; |
291 | state = restoreState(); | |
292 | } else { | |
293 |
1
1. go : negated conditional → NO_COVERAGE |
if (!Character.isWhitespace((char)character)) |
294 | text.append((char)character); | |
295 | } | |
296 | break; | |
297 | | |
298 | // we have just seen something like this: <foo a="b"/ | |
299 | // and are looking for the final >. | |
300 | case SINGLE_TAG: | |
301 |
1
1. go : negated conditional → NO_COVERAGE |
if(character != '>') |
302 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::throwException → NO_COVERAGE |
throwException(MessageLocalization.getComposedMessage("expected.gt.for.tag.lt.1.gt", tag)); |
303 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::doTag → NO_COVERAGE |
doTag(); |
304 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
305 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(false); |
306 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
307 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
if(!html && nested==0) { |
308 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::endDocument → NO_COVERAGE |
doc.endDocument(); |
309 | return; | |
310 | } | |
311 | state = restoreState(); | |
312 | break; | |
313 | | |
314 | // we are processing CDATA | |
315 | case CDATA: | |
316 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>' |
317 |
1
1. go : negated conditional → NO_COVERAGE |
&& text.toString().endsWith("]]")) { |
318 |
2
1. go : Replaced integer subtraction with addition → NO_COVERAGE 2. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(text.length()-2); |
319 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
320 | state = restoreState(); | |
321 | } else | |
322 | text.append((char)character); | |
323 | break; | |
324 | | |
325 | // we are processing a comment. We are inside | |
326 | // the <!-- .... --> looking for the -->. | |
327 | case COMMENT: | |
328 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>' |
329 |
1
1. go : negated conditional → NO_COVERAGE |
&& text.toString().endsWith("--")) { |
330 |
2
1. go : Replaced integer subtraction with addition → NO_COVERAGE 2. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(text.length() - 2); |
331 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
332 | state = restoreState(); | |
333 | } else | |
334 | text.append((char)character); | |
335 | break; | |
336 | | |
337 | // We are inside one of these <? ... ?> or one of these <!DOCTYPE ... > | |
338 | case PI: | |
339 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '>') { |
340 | state = restoreState(); | |
341 |
1
1. go : negated conditional → NO_COVERAGE |
if(state == TEXT) state = UNKNOWN; |
342 | } | |
343 | break; | |
344 | | |
345 | // we are processing an entity, e.g. <, », etc. | |
346 | case ENTITY: | |
347 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == ';') { |
348 | state = restoreState(); | |
349 | String cent = entity.toString(); | |
350 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
entity.setLength(0); |
351 | char ce = EntitiesToUnicode.decodeEntity(cent); | |
352 |
1
1. go : negated conditional → NO_COVERAGE |
if (ce == '\0') |
353 | text.append('&').append(cent).append(';'); | |
354 | else | |
355 | text.append(ce); | |
356 |
13
1. go : changed conditional boundary → NO_COVERAGE 2. go : changed conditional boundary → NO_COVERAGE 3. go : changed conditional boundary → NO_COVERAGE 4. go : changed conditional boundary → NO_COVERAGE 5. go : changed conditional boundary → NO_COVERAGE 6. go : changed conditional boundary → NO_COVERAGE 7. go : negated conditional → NO_COVERAGE 8. go : negated conditional → NO_COVERAGE 9. go : negated conditional → NO_COVERAGE 10. go : negated conditional → NO_COVERAGE 11. go : negated conditional → NO_COVERAGE 12. go : negated conditional → NO_COVERAGE 13. go : negated conditional → NO_COVERAGE |
} else if ((character != '#' && (character < '0' || character > '9') && (character < 'a' || character > 'z') |
357 |
2
1. go : changed conditional boundary → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
&& (character < 'A' || character > 'Z')) || entity.length() >= 7) { |
358 | state = restoreState(); | |
359 | previousCharacter = character; | |
360 | text.append('&').append(entity.toString()); | |
361 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
entity.setLength(0); |
362 | } | |
363 | else { | |
364 | entity.append((char)character); | |
365 | } | |
366 | break; | |
367 | // We are processing the quoted right-hand side of an element's attribute. | |
368 | case QUOTE: | |
369 |
3
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE 3. go : negated conditional → NO_COVERAGE |
if (html && quoteCharacter == ' ' && character == '>') { |
370 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
371 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
372 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
373 | state = restoreState(); | |
374 | } | |
375 |
3
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE 3. go : negated conditional → NO_COVERAGE |
else if (html && quoteCharacter == ' ' && Character.isWhitespace((char)character)) { |
376 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
377 | state = TAG_EXAMINED; | |
378 | } | |
379 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
else if (html && quoteCharacter == ' ') { |
380 | text.append((char)character); | |
381 | } | |
382 |
1
1. go : negated conditional → NO_COVERAGE |
else if(character == quoteCharacter) { |
383 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
384 | state = TAG_EXAMINED; | |
385 |
2
1. go : changed conditional boundary → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if(" \r\n\u0009".indexOf(character)>=0) { |
386 | text.append(' '); | |
387 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(character == '&') { |
388 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::saveState → NO_COVERAGE |
saveState(state); |
389 | state = ENTITY; | |
390 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
entity.setLength(0); |
391 | } else { | |
392 | text.append((char)character); | |
393 | } | |
394 | break; | |
395 | | |
396 | case ATTRIBUTE_KEY: | |
397 |
1
1. go : negated conditional → NO_COVERAGE |
if(Character.isWhitespace((char)character)) { |
398 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
399 | state = ATTRIBUTE_EQUAL; | |
400 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(character == '=') { |
401 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
402 | state = ATTRIBUTE_VALUE; | |
403 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if (html && character == '>') { |
404 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(0); |
405 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
406 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
407 | state = restoreState(); | |
408 | } else { | |
409 | text.append((char)character); | |
410 | } | |
411 | break; | |
412 | | |
413 | case ATTRIBUTE_EQUAL: | |
414 |
1
1. go : negated conditional → NO_COVERAGE |
if(character == '=') { |
415 | state = ATTRIBUTE_VALUE; | |
416 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(Character.isWhitespace((char)character)) { |
417 | // empty | |
418 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if (html && character == '>') { |
419 |
1
1. go : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(0); |
420 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
421 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
422 | state = restoreState(); | |
423 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if (html && character == '/') { |
424 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
425 | state = SINGLE_TAG; | |
426 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (html) { |
427 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
428 | text.append((char)character); | |
429 | state = ATTRIBUTE_KEY; | |
430 | } else { | |
431 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::throwException → NO_COVERAGE |
throwException(MessageLocalization.getComposedMessage("error.in.attribute.processing")); |
432 | } | |
433 | break; | |
434 | | |
435 | case ATTRIBUTE_VALUE: | |
436 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
if(character == '"' || character == '\'') { |
437 | quoteCharacter = character; | |
438 | state = QUOTE; | |
439 |
1
1. go : negated conditional → NO_COVERAGE |
} else if(Character.isWhitespace((char)character)) { |
440 | // empty | |
441 |
2
1. go : negated conditional → NO_COVERAGE 2. go : negated conditional → NO_COVERAGE |
} else if (html && character == '>') { |
442 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::flush → NO_COVERAGE |
flush(); |
443 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::processTag → NO_COVERAGE |
processTag(true); |
444 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::initTag → NO_COVERAGE |
initTag(); |
445 | state = restoreState(); | |
446 |
1
1. go : negated conditional → NO_COVERAGE |
} else if (html) { |
447 | text.append((char)character); | |
448 | quoteCharacter = ' '; | |
449 | state = QUOTE; | |
450 | } else { | |
451 |
1
1. go : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::throwException → NO_COVERAGE |
throwException(MessageLocalization.getComposedMessage("error.in.attribute.processing")); |
452 | } | |
453 | break; | |
454 | } | |
455 | } | |
456 | } | |
457 | ||
458 | /** | |
459 | * Gets a state from the stack | |
460 | * @return the previous state | |
461 | */ | |
462 | private int restoreState() { | |
463 |
1
1. restoreState : negated conditional → NO_COVERAGE |
if(!stack.empty()) |
464 |
1
1. restoreState : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return (Integer) stack.pop(); |
465 | else | |
466 |
1
1. restoreState : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return UNKNOWN; |
467 | } | |
468 | /** | |
469 | * Adds a state to the stack. | |
470 | * @param s a state to add to the stack | |
471 | */ | |
472 | private void saveState(int s) { | |
473 | stack.push(s); | |
474 | } | |
475 | /** | |
476 | * Flushes the text that is currently in the buffer. | |
477 | * The text can be ignored, added to the document | |
478 | * as content or as comment,... depending on the current state. | |
479 | */ | |
480 | private void flush() { | |
481 | switch(state){ | |
482 | case TEXT: | |
483 | case CDATA: | |
484 |
2
1. flush : changed conditional boundary → NO_COVERAGE 2. flush : negated conditional → NO_COVERAGE |
if(text.length() > 0) { |
485 |
1
1. flush : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::text → NO_COVERAGE |
doc.text(text.toString()); |
486 | } | |
487 | break; | |
488 | case COMMENT: | |
489 |
1
1. flush : negated conditional → NO_COVERAGE |
if (comment != null) { |
490 |
1
1. flush : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandlerComment::comment → NO_COVERAGE |
comment.comment(text.toString()); |
491 | } | |
492 | break; | |
493 | case ATTRIBUTE_KEY: | |
494 | attributekey = text.toString(); | |
495 |
1
1. flush : negated conditional → NO_COVERAGE |
if (html) |
496 | attributekey = attributekey.toLowerCase(); | |
497 | break; | |
498 | case QUOTE: | |
499 | case ATTRIBUTE_VALUE: | |
500 | attributevalue = text.toString(); | |
501 | attributes.put(attributekey,attributevalue); | |
502 | break; | |
503 | default: | |
504 | // do nothing | |
505 | } | |
506 |
1
1. flush : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(0); |
507 | } | |
508 | /** | |
509 | * Initialized the tag name and attributes. | |
510 | */ | |
511 | private void initTag() { | |
512 | tag = null; | |
513 | attributes = new HashMap(); | |
514 | } | |
515 | /** Sets the name of the tag. */ | |
516 | private void doTag() { | |
517 |
1
1. doTag : negated conditional → NO_COVERAGE |
if(tag == null) |
518 | tag = text.toString(); | |
519 |
1
1. doTag : negated conditional → NO_COVERAGE |
if (html) |
520 | tag = tag.toLowerCase(); | |
521 |
1
1. doTag : removed call to java/lang/StringBuffer::setLength → NO_COVERAGE |
text.setLength(0); |
522 | } | |
523 | /** | |
524 | * processes the tag. | |
525 | * @param start if true we are dealing with a tag that has just been opened; if false we are closing a tag. | |
526 | */ | |
527 | private void processTag(boolean start) { | |
528 |
1
1. processTag : negated conditional → NO_COVERAGE |
if (start) { |
529 |
1
1. processTag : Replaced integer addition with subtraction → NO_COVERAGE |
nested++; |
530 |
1
1. processTag : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::startElement → NO_COVERAGE |
doc.startElement(tag,attributes); |
531 | } | |
532 | else { | |
533 |
1
1. processTag : Replaced integer subtraction with addition → NO_COVERAGE |
nested--; |
534 |
1
1. processTag : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLDocHandler::endElement → NO_COVERAGE |
doc.endElement(tag); |
535 | } | |
536 | } | |
537 | /** Throws an exception */ | |
538 | private void throwException(String s) throws IOException { | |
539 | throw new IOException(MessageLocalization.getComposedMessage("1.near.line.2.column.3", s, String.valueOf(lines), String.valueOf(columns))); | |
540 | } | |
541 | | |
542 | /** | |
543 | * Parses the XML document firing the events to the handler. | |
544 | * @param doc the document handler | |
545 | * @param r the document. The encoding is already resolved. The reader is not closed | |
546 | * @throws IOException on error | |
547 | */ | |
548 | public static void parse(SimpleXMLDocHandler doc, SimpleXMLDocHandlerComment comment, Reader r, boolean html) throws IOException { | |
549 | SimpleXMLParser parser = new SimpleXMLParser(doc, comment, html); | |
550 |
1
1. parse : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::go → NO_COVERAGE |
parser.go(r); |
551 | } | |
552 | | |
553 | /** | |
554 | * Parses the XML document firing the events to the handler. | |
555 | * @param doc the document handler | |
556 | * @param in the document. The encoding is deduced from the stream. The stream is not closed | |
557 | * @throws IOException on error | |
558 | */ | |
559 | public static void parse(SimpleXMLDocHandler doc, InputStream in) throws IOException { | |
560 | byte[] b4 = new byte[4]; | |
561 | int count = in.read(b4); | |
562 |
1
1. parse : negated conditional → NO_COVERAGE |
if (count != 4) |
563 | throw new IOException(MessageLocalization.getComposedMessage("insufficient.length")); | |
564 | String encoding = UniversalDetector.detectCharsetFromBOM(b4); | |
565 |
1
1. parse : negated conditional → NO_COVERAGE |
if (encoding == null) encoding = "UTF-8"; //UTF-8 is default. |
566 | ||
567 | String decl = null; | |
568 |
1
1. parse : negated conditional → NO_COVERAGE |
if (encoding.equals("UTF-8")) { |
569 | StringBuilder sb = new StringBuilder(); | |
570 | int c; | |
571 |
1
1. parse : negated conditional → NO_COVERAGE |
while ((c = in.read()) != -1) { |
572 |
1
1. parse : negated conditional → NO_COVERAGE |
if (c == '>') |
573 | break; | |
574 | sb.append((char)c); | |
575 | } | |
576 | decl = sb.toString(); | |
577 | } | |
578 |
1
1. parse : negated conditional → NO_COVERAGE |
else if (encoding.equals("CP037")) { |
579 | ByteArrayOutputStream bi = new ByteArrayOutputStream(); | |
580 | int c; | |
581 |
1
1. parse : negated conditional → NO_COVERAGE |
while ((c = in.read()) != -1) { |
582 |
1
1. parse : negated conditional → NO_COVERAGE |
if (c == 0x6e) // that's '>' in ebcdic |
583 | break; | |
584 |
1
1. parse : removed call to java/io/ByteArrayOutputStream::write → NO_COVERAGE |
bi.write(c); |
585 | } | |
586 | decl = new String(bi.toByteArray(), "CP037"); | |
587 | } | |
588 |
1
1. parse : negated conditional → NO_COVERAGE |
if (decl != null) { |
589 | decl = getDeclaredEncoding(decl); | |
590 |
1
1. parse : negated conditional → NO_COVERAGE |
if (decl != null) |
591 | encoding = decl; | |
592 | } | |
593 |
1
1. parse : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::parse → NO_COVERAGE |
parse(doc, new InputStreamReader(in, IanaEncodings.getJavaEncoding(encoding))); |
594 | } | |
595 | | |
596 | private static String getDeclaredEncoding(String decl) { | |
597 |
1
1. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if (decl == null) |
598 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
599 | int idx = decl.indexOf("encoding"); | |
600 |
2
1. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 2. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if (idx < 0) |
601 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
602 | int idx1 = decl.indexOf('"', idx); | |
603 | int idx2 = decl.indexOf('\'', idx); | |
604 |
1
1. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if (idx1 == idx2) |
605 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
606 |
8
1. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 2. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 3. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 4. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 5. getDeclaredEncoding : negated conditional → NO_COVERAGE 6. getDeclaredEncoding : negated conditional → NO_COVERAGE 7. getDeclaredEncoding : negated conditional → NO_COVERAGE 8. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if ((idx1 < 0 && idx2 > 0) || (idx2 > 0 && idx2 < idx1)) { |
607 |
1
1. getDeclaredEncoding : Replaced integer addition with subtraction → NO_COVERAGE |
int idx3 = decl.indexOf('\'', idx2 + 1); |
608 |
2
1. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 2. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if (idx3 < 0) |
609 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
610 |
2
1. getDeclaredEncoding : Replaced integer addition with subtraction → NO_COVERAGE 2. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return decl.substring(idx2 + 1, idx3); |
611 | } | |
612 |
8
1. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 2. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 3. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 4. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 5. getDeclaredEncoding : negated conditional → NO_COVERAGE 6. getDeclaredEncoding : negated conditional → NO_COVERAGE 7. getDeclaredEncoding : negated conditional → NO_COVERAGE 8. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if ((idx2 < 0 && idx1 > 0) || (idx1 > 0 && idx1 < idx2)) { |
613 |
1
1. getDeclaredEncoding : Replaced integer addition with subtraction → NO_COVERAGE |
int idx3 = decl.indexOf('"', idx1 + 1); |
614 |
2
1. getDeclaredEncoding : changed conditional boundary → NO_COVERAGE 2. getDeclaredEncoding : negated conditional → NO_COVERAGE |
if (idx3 < 0) |
615 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
616 |
2
1. getDeclaredEncoding : Replaced integer addition with subtraction → NO_COVERAGE 2. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return decl.substring(idx1 + 1, idx3); |
617 | } | |
618 |
1
1. getDeclaredEncoding : mutated return of Object value for com/lowagie/text/xml/simpleparser/SimpleXMLParser::getDeclaredEncoding to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return null; |
619 | } | |
620 | | |
621 | public static void parse(SimpleXMLDocHandler doc,Reader r) throws IOException { | |
622 |
1
1. parse : removed call to com/lowagie/text/xml/simpleparser/SimpleXMLParser::parse → NO_COVERAGE |
parse(doc, null, r, false); |
623 | } | |
624 | ||
625 | } | |
Mutations | ||
146 |
1.1 |
|
155 |
1.1 |
|
159 |
1.1 |
|
162 |
1.1 |
|
172 |
1.1 |
|
173 |
1.1 |
|
174 |
1.1 2.2 |
|
175 |
1.1 |
|
176 |
1.1 |
|
178 |
1.1 |
|
184 |
1.1 2.2 |
|
187 |
1.1 |
|
189 |
1.1 |
|
190 |
1.1 |
|
192 |
1.1 |
|
195 |
1.1 |
|
198 |
1.1 |
|
204 |
1.1 |
|
205 |
1.1 |
|
211 |
1.1 |
|
212 |
1.1 |
|
213 |
1.1 |
|
215 |
1.1 |
|
216 |
1.1 |
|
217 |
1.1 |
|
220 |
1.1 |
|
221 |
1.1 |
|
232 |
1.1 |
|
233 |
1.1 |
|
235 |
1.1 |
|
246 |
1.1 |
|
247 |
1.1 |
|
248 |
1.1 |
|
249 |
1.1 |
|
251 |
1.1 |
|
253 |
1.1 2.2 |
|
254 |
1.1 |
|
256 |
1.1 2.2 |
|
257 |
1.1 |
|
259 |
1.1 2.2 |
|
260 |
1.1 |
|
262 |
1.1 |
|
263 |
1.1 |
|
271 |
1.1 |
|
272 |
1.1 |
|
273 |
1.1 |
|
275 |
1.1 |
|
277 |
1.1 |
|
287 |
1.1 |
|
288 |
1.1 |
|
289 |
1.1 |
|
290 |
1.1 2.2 |
|
293 |
1.1 |
|
301 |
1.1 |
|
302 |
1.1 |
|
303 |
1.1 |
|
304 |
1.1 |
|
305 |
1.1 |
|
306 |
1.1 |
|
307 |
1.1 2.2 |
|
308 |
1.1 |
|
316 |
1.1 |
|
317 |
1.1 |
|
318 |
1.1 2.2 |
|
319 |
1.1 |
|
328 |
1.1 |
|
329 |
1.1 |
|
330 |
1.1 2.2 |
|
331 |
1.1 |
|
339 |
1.1 |
|
341 |
1.1 |
|
347 |
1.1 |
|
350 |
1.1 |
|
352 |
1.1 |
|
356 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9 10.10 11.11 12.12 13.13 |
|
357 |
1.1 2.2 |
|
361 |
1.1 |
|
369 |
1.1 2.2 3.3 |
|
370 |
1.1 |
|
371 |
1.1 |
|
372 |
1.1 |
|
375 |
1.1 2.2 3.3 |
|
376 |
1.1 |
|
379 |
1.1 2.2 |
|
382 |
1.1 |
|
383 |
1.1 |
|
385 |
1.1 2.2 |
|
387 |
1.1 |
|
388 |
1.1 |
|
390 |
1.1 |
|
397 |
1.1 |
|
398 |
1.1 |
|
400 |
1.1 |
|
401 |
1.1 |
|
403 |
1.1 2.2 |
|
404 |
1.1 |
|
405 |
1.1 |
|
406 |
1.1 |
|
414 |
1.1 |
|
416 |
1.1 |
|
418 |
1.1 2.2 |
|
419 |
1.1 |
|
420 |
1.1 |
|
421 |
1.1 |
|
423 |
1.1 2.2 |
|
424 |
1.1 |
|
426 |
1.1 |
|
427 |
1.1 |
|
431 |
1.1 |
|
436 |
1.1 2.2 |
|
439 |
1.1 |
|
441 |
1.1 2.2 |
|
442 |
1.1 |
|
443 |
1.1 |
|
444 |
1.1 |
|
446 |
1.1 |
|
451 |
1.1 |
|
463 |
1.1 |
|
464 |
1.1 |
|
466 |
1.1 |
|
484 |
1.1 2.2 |
|
485 |
1.1 |
|
489 |
1.1 |
|
490 |
1.1 |
|
495 |
1.1 |
|
506 |
1.1 |
|
517 |
1.1 |
|
519 |
1.1 |
|
521 |
1.1 |
|
528 |
1.1 |
|
529 |
1.1 |
|
530 |
1.1 |
|
533 |
1.1 |
|
534 |
1.1 |
|
550 |
1.1 |
|
562 |
1.1 |
|
565 |
1.1 |
|
568 |
1.1 |
|
571 |
1.1 |
|
572 |
1.1 |
|
578 |
1.1 |
|
581 |
1.1 |
|
582 |
1.1 |
|
584 |
1.1 |
|
588 |
1.1 |
|
590 |
1.1 |
|
593 |
1.1 |
|
597 |
1.1 |
|
598 |
1.1 |
|
600 |
1.1 2.2 |
|
601 |
1.1 |
|
604 |
1.1 |
|
605 |
1.1 |
|
606 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 |
|
607 |
1.1 |
|
608 |
1.1 2.2 |
|
609 |
1.1 |
|
610 |
1.1 2.2 |
|
612 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 |
|
613 |
1.1 |
|
614 |
1.1 2.2 |
|
615 |
1.1 |
|
616 |
1.1 2.2 |
|
618 |
1.1 |
|
622 |
1.1 |