1 | /* | |
2 | * Copyright 2008 by Kevin Day. | |
3 | * | |
4 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
5 | * (the "License"); you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
7 | * | |
8 | * Software distributed under the License is distributed on an "AS IS" basis, | |
9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
10 | * for the specific language governing rights and limitations under the License. | |
11 | * | |
12 | * The Original Code is 'iText, a free JAVA-PDF library'. | |
13 | * | |
14 | * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by | |
15 | * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie. | |
16 | * All Rights Reserved. | |
17 | * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer | |
18 | * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved. | |
19 | * | |
20 | * Contributor(s): all the names of the contributors are added in the source code | |
21 | * where applicable. | |
22 | * | |
23 | * Alternatively, the contents of this file may be used under the terms of the | |
24 | * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the | |
25 | * provisions of LGPL are applicable instead of those above. If you wish to | |
26 | * allow use of your version of this file only under the terms of the LGPL | |
27 | * License and not to allow others to use your version of this file under | |
28 | * the MPL, indicate your decision by deleting the provisions above and | |
29 | * replace them with the notice and other provisions required by the LGPL. | |
30 | * If you do not delete the provisions above, a recipient may use your version | |
31 | * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. | |
32 | * | |
33 | * This library is free software; you can redistribute it and/or modify it | |
34 | * under the terms of the MPL as stated above or under the terms of the GNU | |
35 | * Library General Public License as published by the Free Software Foundation; | |
36 | * either version 2 of the License, or any later version. | |
37 | * | |
38 | * This library is distributed in the hope that it will be useful, but WITHOUT | |
39 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
40 | * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more | |
41 | * details. | |
42 | * | |
43 | * If you didn't download this code from the following link, you should check if | |
44 | * you aren't using an obsolete version: | |
45 | * http://www.lowagie.com/iText/ | |
46 | */ | |
47 | package com.lowagie.text.pdf.parser; | |
48 | ||
49 | import com.lowagie.text.pdf.PdfDictionary; | |
50 | import com.lowagie.text.pdf.PdfName; | |
51 | import com.lowagie.text.pdf.PdfObject; | |
52 | import com.lowagie.text.pdf.PdfReader; | |
53 | import com.lowagie.text.pdf.RandomAccessFileOrArray; | |
54 | ||
55 | import java.io.ByteArrayInputStream; | |
56 | import java.io.File; | |
57 | import java.io.FileOutputStream; | |
58 | import java.io.IOException; | |
59 | import java.io.InputStream; | |
60 | import java.io.PrintWriter; | |
61 | import java.util.ArrayList; | |
62 | import java.util.List; | |
63 | ||
64 | /** | |
65 | * Tool that parses the content of a PDF document. | |
66 | * | |
67 | * @since 2.1.4 | |
68 | */ | |
69 | @SuppressWarnings("WeakerAccess") | |
70 | public class PdfContentReaderTool { | |
71 | ||
72 | /** | |
73 | * Shows the detail of a dictionary. This is similar to the PdfLister | |
74 | * functionality. | |
75 | * | |
76 | * @param dic | |
77 | * the dictionary of which you want the detail | |
78 | * @return a String representation of the dictionary | |
79 | */ | |
80 | public static String getDictionaryDetail(PdfDictionary dic) { | |
81 |
1
1. getDictionaryDetail : mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return getDictionaryDetail(dic, 0); |
82 | } | |
83 | ||
84 | /** | |
85 | * Shows the detail of a dictionary. | |
86 | * | |
87 | * @param dic | |
88 | * the dictionary of which you want the detail | |
89 | * @param depth | |
90 | * the depth of the current dictionary (for nested dictionaries) | |
91 | * @return a String representation of the dictionary | |
92 | */ | |
93 | public static String getDictionaryDetail(PdfDictionary dic, int depth) { | |
94 | StringBuilder builder = new StringBuilder(); | |
95 | builder.append('('); | |
96 | List<PdfName> subDictionaries = new ArrayList<>(); | |
97 | for (PdfName key : dic.getKeys()) { | |
98 | PdfObject val = dic.getDirectObject(key); | |
99 |
1
1. getDictionaryDetail : negated conditional → NO_COVERAGE |
if (val.isDictionary()) { |
100 | subDictionaries.add(key); | |
101 | } | |
102 | builder.append(key); | |
103 | builder.append('='); | |
104 | builder.append(val); | |
105 | builder.append(", "); | |
106 | } | |
107 |
2
1. getDictionaryDetail : Replaced integer subtraction with addition → NO_COVERAGE 2. getDictionaryDetail : removed call to java/lang/StringBuilder::setLength → NO_COVERAGE |
builder.setLength(builder.length() - 2); |
108 | builder.append(')'); | |
109 | PdfName pdfSubDictionaryName; | |
110 | for (Object subDictionary : subDictionaries) { | |
111 | pdfSubDictionaryName = (PdfName) subDictionary; | |
112 | builder.append('\n'); | |
113 |
4
1. getDictionaryDetail : changed conditional boundary → NO_COVERAGE 2. getDictionaryDetail : Changed increment from 1 to -1 → NO_COVERAGE 3. getDictionaryDetail : Replaced integer addition with subtraction → NO_COVERAGE 4. getDictionaryDetail : negated conditional → NO_COVERAGE |
for (int i = 0; i < depth + 1; i++) { |
114 | builder.append('\t'); | |
115 | } | |
116 | builder.append("Subdictionary "); | |
117 | builder.append(pdfSubDictionaryName); | |
118 | builder.append(" = "); | |
119 | builder.append(getDictionaryDetail( | |
120 |
1
1. getDictionaryDetail : Replaced integer addition with subtraction → NO_COVERAGE |
dic.getAsDict(pdfSubDictionaryName), depth + 1)); |
121 | } | |
122 |
1
1. getDictionaryDetail : mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return builder.toString(); |
123 | } | |
124 | ||
125 | /** | |
126 | * Writes information about a specific page from PdfReader to the specified | |
127 | * output stream. | |
128 | * | |
129 | * @since 2.1.5 | |
130 | * @param reader | |
131 | * the PdfReader to read the page content from | |
132 | * @param pageNum | |
133 | * the page number to read | |
134 | * @param out | |
135 | * the output stream to send the content to | |
136 | * @throws IOException | |
137 | */ | |
138 | public static void listContentStreamForPage(PdfReader reader, int pageNum, PrintWriter out) | |
139 | throws IOException { | |
140 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println("==============Page " + pageNum + "===================="); |
141 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println("- - - - - Dictionary - - - - - -"); |
142 | PdfDictionary pageDictionary = reader.getPageN(pageNum); | |
143 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println(getDictionaryDetail(pageDictionary)); |
144 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println("- - - - - Content Stream - - - - - -"); |
145 | RandomAccessFileOrArray f = reader.getSafeFile(); | |
146 | ||
147 | byte[] contentBytes = reader.getPageContent(pageNum, f); | |
148 |
1
1. listContentStreamForPage : removed call to com/lowagie/text/pdf/RandomAccessFileOrArray::close → NO_COVERAGE |
f.close(); |
149 | ||
150 | InputStream is = new ByteArrayInputStream(contentBytes); | |
151 | int ch; | |
152 |
1
1. listContentStreamForPage : negated conditional → NO_COVERAGE |
while ((ch = is.read()) != -1) { |
153 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::print → NO_COVERAGE |
out.print((char) ch); |
154 | } | |
155 | ||
156 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println("- - - - - Text Extraction - - - - - -"); |
157 | PdfTextExtractor extractor = new PdfTextExtractor(reader, | |
158 | new MarkedUpTextAssembler(reader)); | |
159 | String extractedText = extractor.getTextFromPage(pageNum); | |
160 |
1
1. listContentStreamForPage : negated conditional → NO_COVERAGE |
if (extractedText.length() != 0) { |
161 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println(extractedText); |
162 | } else { | |
163 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println("No text found on page " + pageNum); |
164 | } | |
165 | ||
166 |
1
1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE |
out.println(); |
167 | ||
168 | } | |
169 | ||
170 | /** | |
171 | * Writes information about each page in a PDF file to the specified output | |
172 | * stream. | |
173 | * | |
174 | * @since 2.1.5 | |
175 | * @param pdfFile | |
176 | * a File instance referring to a PDF file | |
177 | * @param out | |
178 | * the output stream to send the content to | |
179 | * @throws IOException | |
180 | */ | |
181 | public static void listContentStream(File pdfFile, PrintWriter out) | |
182 | throws IOException { | |
183 | PdfReader reader = new PdfReader(pdfFile.getCanonicalPath()); | |
184 | ||
185 | int maxPageNum = reader.getNumberOfPages(); | |
186 | ||
187 |
3
1. listContentStream : changed conditional boundary → NO_COVERAGE 2. listContentStream : Changed increment from 1 to -1 → NO_COVERAGE 3. listContentStream : negated conditional → NO_COVERAGE |
for (int pageNum = 1; pageNum <= maxPageNum; pageNum++) { |
188 |
1
1. listContentStream : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE |
listContentStreamForPage(reader, pageNum, out); |
189 | } | |
190 | ||
191 | } | |
192 | ||
193 | /** | |
194 | * Writes information about the specified page in a PDF file to the | |
195 | * specified output stream. | |
196 | * | |
197 | * @since 2.1.5 | |
198 | * @param pdfFile | |
199 | * a File instance referring to a PDF file | |
200 | * @param pageNum | |
201 | * the page number to read | |
202 | * @param out | |
203 | * the output stream to send the content to | |
204 | * @throws IOException | |
205 | */ | |
206 | public static void listContentStream(File pdfFile, int pageNum, | |
207 | PrintWriter out) throws IOException { | |
208 | PdfReader reader = new PdfReader(pdfFile.getCanonicalPath()); | |
209 | ||
210 |
1
1. listContentStream : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE |
listContentStreamForPage(reader, pageNum, out); |
211 | } | |
212 | ||
213 | /** | |
214 | * Writes information about each page in a PDF file to the specified file, | |
215 | * or System.out. | |
216 | * | |
217 | * @param args | |
218 | */ | |
219 | public static void main(String[] args) { | |
220 | try { | |
221 |
4
1. main : changed conditional boundary → NO_COVERAGE 2. main : changed conditional boundary → NO_COVERAGE 3. main : negated conditional → NO_COVERAGE 4. main : negated conditional → NO_COVERAGE |
if (args.length < 1 || args.length > 3) { |
222 |
1
1. main : removed call to java/io/PrintStream::println → NO_COVERAGE |
System.out.println("Usage: PdfContentReaderTool <pdf file> [<output file>|stdout] [<page num>]"); |
223 | return; | |
224 | } | |
225 | ||
226 | PrintWriter writer = new PrintWriter(System.out); | |
227 |
2
1. main : changed conditional boundary → NO_COVERAGE 2. main : negated conditional → NO_COVERAGE |
if (args.length >= 2) { |
228 |
1
1. main : negated conditional → NO_COVERAGE |
if (args[1].compareToIgnoreCase("stdout") != 0) { |
229 |
1
1. main : removed call to java/io/PrintStream::println → NO_COVERAGE |
System.out.println("Writing PDF content to " + args[1]); |
230 | writer = new PrintWriter(new FileOutputStream(new File(args[1]))); | |
231 | } | |
232 | } | |
233 | ||
234 | int pageNum = -1; | |
235 |
2
1. main : changed conditional boundary → NO_COVERAGE 2. main : negated conditional → NO_COVERAGE |
if (args.length >= 3) { |
236 | pageNum = Integer.parseInt(args[2]); | |
237 | } | |
238 | ||
239 |
1
1. main : negated conditional → NO_COVERAGE |
if (pageNum == -1) { |
240 |
1
1. main : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE |
listContentStream(new File(args[0]), writer); |
241 | } else { | |
242 |
1
1. main : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE |
listContentStream(new File(args[0]), pageNum, writer); |
243 | } | |
244 |
1
1. main : removed call to java/io/PrintWriter::flush → NO_COVERAGE |
writer.flush(); |
245 | ||
246 |
2
1. main : changed conditional boundary → NO_COVERAGE 2. main : negated conditional → NO_COVERAGE |
if (args.length >= 2) { |
247 |
1
1. main : removed call to java/io/PrintWriter::close → NO_COVERAGE |
writer.close(); |
248 |
1
1. main : removed call to java/io/PrintStream::println → NO_COVERAGE |
System.out.println("Finished writing content to " + args[1]); |
249 | } | |
250 | } catch (Exception e) { | |
251 |
1
1. main : removed call to java/lang/Exception::printStackTrace → NO_COVERAGE |
e.printStackTrace(System.err); |
252 | } | |
253 | } | |
254 | ||
255 | } | |
Mutations | ||
81 |
1.1 |
|
99 |
1.1 |
|
107 |
1.1 2.2 |
|
113 |
1.1 2.2 3.3 4.4 |
|
120 |
1.1 |
|
122 |
1.1 |
|
140 |
1.1 |
|
141 |
1.1 |
|
143 |
1.1 |
|
144 |
1.1 |
|
148 |
1.1 |
|
152 |
1.1 |
|
153 |
1.1 |
|
156 |
1.1 |
|
160 |
1.1 |
|
161 |
1.1 |
|
163 |
1.1 |
|
166 |
1.1 |
|
187 |
1.1 2.2 3.3 |
|
188 |
1.1 |
|
210 |
1.1 |
|
221 |
1.1 2.2 3.3 4.4 |
|
222 |
1.1 |
|
227 |
1.1 2.2 |
|
228 |
1.1 |
|
229 |
1.1 |
|
235 |
1.1 2.2 |
|
239 |
1.1 |
|
240 |
1.1 |
|
242 |
1.1 |
|
244 |
1.1 |
|
246 |
1.1 2.2 |
|
247 |
1.1 |
|
248 |
1.1 |
|
251 |
1.1 |