PdfContentReaderTool.java

1
/*
2
 * Copyright 2008 by Kevin Day.
3
 *
4
 * The contents of this file are subject to the Mozilla Public License Version 1.1
5
 * (the "License"); you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
7
 *
8
 * Software distributed under the License is distributed on an "AS IS" basis,
9
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10
 * for the specific language governing rights and limitations under the License.
11
 *
12
 * The Original Code is 'iText, a free JAVA-PDF library'.
13
 *
14
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
15
 * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie.
16
 * All Rights Reserved.
17
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
18
 * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved.
19
 *
20
 * Contributor(s): all the names of the contributors are added in the source code
21
 * where applicable.
22
 *
23
 * Alternatively, the contents of this file may be used under the terms of the
24
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
25
 * provisions of LGPL are applicable instead of those above.  If you wish to
26
 * allow use of your version of this file only under the terms of the LGPL
27
 * License and not to allow others to use your version of this file under
28
 * the MPL, indicate your decision by deleting the provisions above and
29
 * replace them with the notice and other provisions required by the LGPL.
30
 * If you do not delete the provisions above, a recipient may use your version
31
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
32
 *
33
 * This library is free software; you can redistribute it and/or modify it
34
 * under the terms of the MPL as stated above or under the terms of the GNU
35
 * Library General Public License as published by the Free Software Foundation;
36
 * either version 2 of the License, or any later version.
37
 *
38
 * This library is distributed in the hope that it will be useful, but WITHOUT
39
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
41
 * details.
42
 *
43
 * If you didn't download this code from the following link, you should check if
44
 * you aren't using an obsolete version:
45
 * http://www.lowagie.com/iText/
46
 */
47
package com.lowagie.text.pdf.parser;
48
49
import com.lowagie.text.pdf.PdfDictionary;
50
import com.lowagie.text.pdf.PdfName;
51
import com.lowagie.text.pdf.PdfObject;
52
import com.lowagie.text.pdf.PdfReader;
53
import com.lowagie.text.pdf.RandomAccessFileOrArray;
54
55
import java.io.ByteArrayInputStream;
56
import java.io.File;
57
import java.io.FileOutputStream;
58
import java.io.IOException;
59
import java.io.InputStream;
60
import java.io.PrintWriter;
61
import java.util.ArrayList;
62
import java.util.List;
63
64
/**
65
 * Tool that parses the content of a PDF document.
66
 * 
67
 * @since 2.1.4
68
 */
69
@SuppressWarnings("WeakerAccess")
70
public class PdfContentReaderTool {
71
72
    /**
73
     * Shows the detail of a dictionary. This is similar to the PdfLister
74
     * functionality.
75
     * 
76
     * @param dic
77
     *            the dictionary of which you want the detail
78
     * @return a String representation of the dictionary
79
     */
80
    public static String getDictionaryDetail(PdfDictionary dic) {
81 1 1. getDictionaryDetail : mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return getDictionaryDetail(dic, 0);
82
    }
83
84
    /**
85
     * Shows the detail of a dictionary.
86
     * 
87
     * @param dic
88
     *            the dictionary of which you want the detail
89
     * @param depth
90
     *            the depth of the current dictionary (for nested dictionaries)
91
     * @return a String representation of the dictionary
92
     */
93
    public static String getDictionaryDetail(PdfDictionary dic, int depth) {
94
        StringBuilder builder = new StringBuilder();
95
        builder.append('(');
96
        List<PdfName> subDictionaries = new ArrayList<>();
97
        for (PdfName key : dic.getKeys()) {
98
            PdfObject val = dic.getDirectObject(key);
99 1 1. getDictionaryDetail : negated conditional → NO_COVERAGE
            if (val.isDictionary()) {
100
                subDictionaries.add(key);
101
            }
102
            builder.append(key);
103
            builder.append('=');
104
            builder.append(val);
105
            builder.append(", ");
106
        }
107 2 1. getDictionaryDetail : Replaced integer subtraction with addition → NO_COVERAGE
2. getDictionaryDetail : removed call to java/lang/StringBuilder::setLength → NO_COVERAGE
        builder.setLength(builder.length() - 2);
108
        builder.append(')');
109
        PdfName pdfSubDictionaryName;
110
        for (Object subDictionary : subDictionaries) {
111
            pdfSubDictionaryName = (PdfName) subDictionary;
112
            builder.append('\n');
113 4 1. getDictionaryDetail : changed conditional boundary → NO_COVERAGE
2. getDictionaryDetail : Changed increment from 1 to -1 → NO_COVERAGE
3. getDictionaryDetail : Replaced integer addition with subtraction → NO_COVERAGE
4. getDictionaryDetail : negated conditional → NO_COVERAGE
            for (int i = 0; i < depth + 1; i++) {
114
                builder.append('\t');
115
            }
116
            builder.append("Subdictionary ");
117
            builder.append(pdfSubDictionaryName);
118
            builder.append(" = ");
119
            builder.append(getDictionaryDetail(
120 1 1. getDictionaryDetail : Replaced integer addition with subtraction → NO_COVERAGE
                    dic.getAsDict(pdfSubDictionaryName), depth + 1));
121
        }
122 1 1. getDictionaryDetail : mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return builder.toString();
123
    }
124
125
    /**
126
     * Writes information about a specific page from PdfReader to the specified
127
     * output stream.
128
     * 
129
     * @since 2.1.5
130
     * @param reader
131
     *            the PdfReader to read the page content from
132
     * @param pageNum
133
     *            the page number to read
134
     * @param out
135
     *            the output stream to send the content to
136
     * @throws IOException
137
     */
138
    public static void listContentStreamForPage(PdfReader reader, int pageNum, PrintWriter out)
139
            throws IOException {
140 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println("==============Page " + pageNum + "====================");
141 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println("- - - - - Dictionary - - - - - -");
142
        PdfDictionary pageDictionary = reader.getPageN(pageNum);
143 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println(getDictionaryDetail(pageDictionary));
144 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println("- - - - - Content Stream - - - - - -");
145
        RandomAccessFileOrArray f = reader.getSafeFile();
146
147
        byte[] contentBytes = reader.getPageContent(pageNum, f);
148 1 1. listContentStreamForPage : removed call to com/lowagie/text/pdf/RandomAccessFileOrArray::close → NO_COVERAGE
        f.close();
149
150
        InputStream is = new ByteArrayInputStream(contentBytes);
151
        int ch;
152 1 1. listContentStreamForPage : negated conditional → NO_COVERAGE
        while ((ch = is.read()) != -1) {
153 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::print → NO_COVERAGE
            out.print((char) ch);
154
        }
155
156 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println("- - - - - Text Extraction - - - - - -");
157
        PdfTextExtractor extractor = new PdfTextExtractor(reader,
158
                new MarkedUpTextAssembler(reader));
159
        String extractedText = extractor.getTextFromPage(pageNum);
160 1 1. listContentStreamForPage : negated conditional → NO_COVERAGE
        if (extractedText.length() != 0) {
161 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
            out.println(extractedText);
162
        } else {
163 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
            out.println("No text found on page " + pageNum);
164
        }
165
166 1 1. listContentStreamForPage : removed call to java/io/PrintWriter::println → NO_COVERAGE
        out.println();
167
168
    }
169
170
    /**
171
     * Writes information about each page in a PDF file to the specified output
172
     * stream.
173
     * 
174
     * @since 2.1.5
175
     * @param pdfFile
176
     *            a File instance referring to a PDF file
177
     * @param out
178
     *            the output stream to send the content to
179
     * @throws IOException
180
     */
181
    public static void listContentStream(File pdfFile, PrintWriter out)
182
            throws IOException {
183
        PdfReader reader = new PdfReader(pdfFile.getCanonicalPath());
184
185
        int maxPageNum = reader.getNumberOfPages();
186
187 3 1. listContentStream : changed conditional boundary → NO_COVERAGE
2. listContentStream : Changed increment from 1 to -1 → NO_COVERAGE
3. listContentStream : negated conditional → NO_COVERAGE
        for (int pageNum = 1; pageNum <= maxPageNum; pageNum++) {
188 1 1. listContentStream : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE
            listContentStreamForPage(reader, pageNum, out);
189
        }
190
191
    }
192
193
    /**
194
     * Writes information about the specified page in a PDF file to the
195
     * specified output stream.
196
     * 
197
     * @since 2.1.5
198
     * @param pdfFile
199
     *            a File instance referring to a PDF file
200
     * @param pageNum
201
     *            the page number to read
202
     * @param out
203
     *            the output stream to send the content to
204
     * @throws IOException
205
     */
206
    public static void listContentStream(File pdfFile, int pageNum,
207
            PrintWriter out) throws IOException {
208
        PdfReader reader = new PdfReader(pdfFile.getCanonicalPath());
209
210 1 1. listContentStream : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE
        listContentStreamForPage(reader, pageNum, out);
211
    }
212
213
    /**
214
     * Writes information about each page in a PDF file to the specified file,
215
     * or System.out.
216
     * 
217
     * @param args
218
     */
219
    public static void main(String[] args) {
220
        try {
221 4 1. main : changed conditional boundary → NO_COVERAGE
2. main : changed conditional boundary → NO_COVERAGE
3. main : negated conditional → NO_COVERAGE
4. main : negated conditional → NO_COVERAGE
            if (args.length < 1 || args.length > 3) {
222 1 1. main : removed call to java/io/PrintStream::println → NO_COVERAGE
                System.out.println("Usage:  PdfContentReaderTool <pdf file> [<output file>|stdout] [<page num>]");
223
                return;
224
            }
225
226
            PrintWriter writer = new PrintWriter(System.out);
227 2 1. main : changed conditional boundary → NO_COVERAGE
2. main : negated conditional → NO_COVERAGE
            if (args.length >= 2) {
228 1 1. main : negated conditional → NO_COVERAGE
                if (args[1].compareToIgnoreCase("stdout") != 0) {
229 1 1. main : removed call to java/io/PrintStream::println → NO_COVERAGE
                    System.out.println("Writing PDF content to " + args[1]);
230
                    writer = new PrintWriter(new FileOutputStream(new File(args[1])));
231
                }
232
            }
233
234
            int pageNum = -1;
235 2 1. main : changed conditional boundary → NO_COVERAGE
2. main : negated conditional → NO_COVERAGE
            if (args.length >= 3) {
236
                pageNum = Integer.parseInt(args[2]);
237
            }
238
239 1 1. main : negated conditional → NO_COVERAGE
            if (pageNum == -1) {
240 1 1. main : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE
                listContentStream(new File(args[0]), writer);
241
            } else {
242 1 1. main : removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE
                listContentStream(new File(args[0]), pageNum, writer);
243
            }
244 1 1. main : removed call to java/io/PrintWriter::flush → NO_COVERAGE
            writer.flush();
245
246 2 1. main : changed conditional boundary → NO_COVERAGE
2. main : negated conditional → NO_COVERAGE
            if (args.length >= 2) {
247 1 1. main : removed call to java/io/PrintWriter::close → NO_COVERAGE
                writer.close();
248 1 1. main : removed call to java/io/PrintStream::println → NO_COVERAGE
                System.out.println("Finished writing content to " + args[1]);
249
            }
250
        } catch (Exception e) {
251 1 1. main : removed call to java/lang/Exception::printStackTrace → NO_COVERAGE
            e.printStackTrace(System.err);
252
        }
253
    }
254
255
}

Mutations

81

1.1
Location : getDictionaryDetail
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

99

1.1
Location : getDictionaryDetail
Killed by : none
negated conditional → NO_COVERAGE

107

1.1
Location : getDictionaryDetail
Killed by : none
Replaced integer subtraction with addition → NO_COVERAGE

2.2
Location : getDictionaryDetail
Killed by : none
removed call to java/lang/StringBuilder::setLength → NO_COVERAGE

113

1.1
Location : getDictionaryDetail
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : getDictionaryDetail
Killed by : none
Changed increment from 1 to -1 → NO_COVERAGE

3.3
Location : getDictionaryDetail
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

4.4
Location : getDictionaryDetail
Killed by : none
negated conditional → NO_COVERAGE

120

1.1
Location : getDictionaryDetail
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

122

1.1
Location : getDictionaryDetail
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/PdfContentReaderTool::getDictionaryDetail to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

140

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

141

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

143

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

144

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

148

1.1
Location : listContentStreamForPage
Killed by : none
removed call to com/lowagie/text/pdf/RandomAccessFileOrArray::close → NO_COVERAGE

152

1.1
Location : listContentStreamForPage
Killed by : none
negated conditional → NO_COVERAGE

153

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::print → NO_COVERAGE

156

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

160

1.1
Location : listContentStreamForPage
Killed by : none
negated conditional → NO_COVERAGE

161

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

163

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

166

1.1
Location : listContentStreamForPage
Killed by : none
removed call to java/io/PrintWriter::println → NO_COVERAGE

187

1.1
Location : listContentStream
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : listContentStream
Killed by : none
Changed increment from 1 to -1 → NO_COVERAGE

3.3
Location : listContentStream
Killed by : none
negated conditional → NO_COVERAGE

188

1.1
Location : listContentStream
Killed by : none
removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE

210

1.1
Location : listContentStream
Killed by : none
removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStreamForPage → NO_COVERAGE

221

1.1
Location : main
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : main
Killed by : none
changed conditional boundary → NO_COVERAGE

3.3
Location : main
Killed by : none
negated conditional → NO_COVERAGE

4.4
Location : main
Killed by : none
negated conditional → NO_COVERAGE

222

1.1
Location : main
Killed by : none
removed call to java/io/PrintStream::println → NO_COVERAGE

227

1.1
Location : main
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : main
Killed by : none
negated conditional → NO_COVERAGE

228

1.1
Location : main
Killed by : none
negated conditional → NO_COVERAGE

229

1.1
Location : main
Killed by : none
removed call to java/io/PrintStream::println → NO_COVERAGE

235

1.1
Location : main
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : main
Killed by : none
negated conditional → NO_COVERAGE

239

1.1
Location : main
Killed by : none
negated conditional → NO_COVERAGE

240

1.1
Location : main
Killed by : none
removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE

242

1.1
Location : main
Killed by : none
removed call to com/lowagie/text/pdf/parser/PdfContentReaderTool::listContentStream → NO_COVERAGE

244

1.1
Location : main
Killed by : none
removed call to java/io/PrintWriter::flush → NO_COVERAGE

246

1.1
Location : main
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : main
Killed by : none
negated conditional → NO_COVERAGE

247

1.1
Location : main
Killed by : none
removed call to java/io/PrintWriter::close → NO_COVERAGE

248

1.1
Location : main
Killed by : none
removed call to java/io/PrintStream::println → NO_COVERAGE

251

1.1
Location : main
Killed by : none
removed call to java/lang/Exception::printStackTrace → NO_COVERAGE

Active mutators

Tests examined


Report generated by PIT 1.4.2