MarkedUpTextAssembler.java

1
/**
2
 * Copyright 2014 by Tizra Inc.
3
 * The contents of this file are subject to the Mozilla Public License Version 1.1
4
 * (the "License"); you may not use this file except in compliance with the License.
5
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
6
 * <p>
7
 * Software distributed under the License is distributed on an "AS IS" basis,
8
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
9
 * for the specific language governing rights and limitations under the License.
10
 * <p>
11
 * The Original Code is 'iText, a free JAVA-PDF library'.
12
 * <p>
13
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
14
 * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie.
15
 * All Rights Reserved.
16
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
17
 * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved.
18
 * <p>
19
 * Contributor(s): all the names of the contributors are added in the source code
20
 * where applicable.
21
 * <p>
22
 * Alternatively, the contents of this file may be used under the terms of the
23
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
24
 * provisions of LGPL are applicable instead of those above.  If you wish to
25
 * allow use of your version of this file only under the terms of the LGPL
26
 * License and not to allow others to use your version of this file under
27
 * the MPL, indicate your decision by deleting the provisions above and
28
 * replace them with the notice and other provisions required by the LGPL.
29
 * If you do not delete the provisions above, a recipient may use your version
30
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
31
 * <p>
32
 * This library is free software; you can redistribute it and/or modify it
33
 * under the terms of the MPL as stated above or under the terms of the GNU
34
 * Library General Public License as published by the Free Software Foundation;
35
 * either version 2 of the License, or any later version.
36
 * <p>
37
 * This library is distributed in the hope that it will be useful, but WITHOUT
38
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
39
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
40
 * details.
41
 */
42
package com.lowagie.text.pdf.parser;
43
44
import com.lowagie.text.pdf.PdfReader;
45
46
import javax.annotation.Nullable;
47
import java.util.ArrayList;
48
import java.util.List;
49
50
/**
51
 * We'll get called on a variety of marked section content (perhaps including
52
 * the results of nested sections), and will assemble it into an order as we
53
 * can.
54
 *
55
 * @author dgd
56
 *
57
 */
58
public class MarkedUpTextAssembler implements TextAssembler {
59
    /**
60
     * our result may be partially processed already, in which case we'll just
61
     * add things to it, once ready.
62
     */
63
    List<FinalText> result = new ArrayList<>();
64
    private PdfReader reader;
65
    @Nullable
66
    private ParsedTextImpl inProgress = null;
67
    private int page;
68
    private int wordIdCounter = 1;
69
    private boolean usePdfMarkupElements = false;
70
    /**
71
     * as we get new content (final or not), we accumulate it until we reach the
72
     * end of a parsing unit
73
     *
74
     * Each parsing unit may have a tag name that should wrap its content
75
     */
76
    private List<TextAssemblyBuffer> partialWords = new ArrayList<>();
77
78
    MarkedUpTextAssembler(PdfReader reader) {
79
        this.reader = reader;
80
    }
81
82
    MarkedUpTextAssembler(PdfReader reader, boolean usePdfMarkupElements) {
83
        this.reader = reader;
84
        this.usePdfMarkupElements = usePdfMarkupElements;
85
    }
86
87
    /**
88
     * Remember an unassembled chunk until we hit the end of this element, or we
89
     * hit an assembled chunk, and need to pull things together.
90
     *
91
     * @param unassembled
92
     *            chunk of text rendering instruction to contribute to final
93
     *            text
94
     */
95
    @Override
96
    public void process(ParsedText unassembled, String contextName) {
97
        partialWords.addAll(unassembled.getAsPartialWords());
98
    }
99
100
    /**
101
     * Slot fully-assembled chunk into our result at the current location. If
102
     * there are unassembled chunks waiting, assemble them first.
103
     *
104
     * @param completed
105
     *            This is a chunk from a nested element
106
     */
107
    @Override
108
    public void process(FinalText completed, String contextName) {
109 1 1. process : removed call to com/lowagie/text/pdf/parser/MarkedUpTextAssembler::clearAccumulator → NO_COVERAGE
        clearAccumulator();
110
        result.add(completed);
111
112
    }
113
114
    /**
115
     * {@inheritDoc}
116
     * @see com.lowagie.text.pdf.parser.TextAssembler#process(com.lowagie.text.pdf.parser.Word,
117
     *      String)
118
     */
119
    @Override
120
    public void process(Word completed, String contextName) {
121
        partialWords.add(completed);
122
    }
123
124
    /**
125
     *
126
     */
127
    private void clearAccumulator() {
128
        for (TextAssemblyBuffer partialWord : partialWords) {
129
            // Visit each partialWord, calling renderText 
130 1 1. clearAccumulator : removed call to com/lowagie/text/pdf/parser/TextAssemblyBuffer::assemble → NO_COVERAGE
            partialWord.assemble(this);
131
        }
132 1 1. clearAccumulator : removed call to java/util/List::clear → NO_COVERAGE
        partialWords.clear();
133 1 1. clearAccumulator : negated conditional → NO_COVERAGE
        if (inProgress != null) {
134
            result.add(inProgress.getFinalText(reader, page, this, usePdfMarkupElements));
135
            inProgress = null;
136
        }
137
    }
138
139
    private FinalText concatenateResult(@Nullable String containingElementName) {
140
        // null element means that this is a formatting artifact, not content.
141 1 1. concatenateResult : negated conditional → NO_COVERAGE
        if (containingElementName == null) {
142
            // at some point, we may want to extract alternate text for some
143
            // artifacts.
144 1 1. concatenateResult : mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::concatenateResult to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
            return null;
145
        }
146
        StringBuilder res = new StringBuilder();
147 2 1. concatenateResult : negated conditional → NO_COVERAGE
2. concatenateResult : negated conditional → NO_COVERAGE
        if (usePdfMarkupElements && !containingElementName.isEmpty()) {
148
            res.append('<').append(containingElementName).append('>');
149
        }
150
        for (FinalText item : result) {
151
            res.append(item.getText());
152
        }
153
        // important, as the stuff buffered in the result is now used up!
154 1 1. concatenateResult : removed call to java/util/List::clear → NO_COVERAGE
        result.clear();
155 2 1. concatenateResult : negated conditional → NO_COVERAGE
2. concatenateResult : negated conditional → NO_COVERAGE
        if (usePdfMarkupElements && !containingElementName.isEmpty()) {
156
            res.append("</");
157
            int spacePos = containingElementName.indexOf(' ');
158 2 1. concatenateResult : changed conditional boundary → NO_COVERAGE
2. concatenateResult : negated conditional → NO_COVERAGE
            if (spacePos >= 0) {
159
                containingElementName = containingElementName.substring(0, spacePos);
160
            }
161
            res.append(containingElementName).append('>');
162
        }
163 1 1. concatenateResult : mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::concatenateResult to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return new FinalText(res.toString());
164
    }
165
166
    /**
167
     * {@inheritDoc}
168
     * @see com.lowagie.text.pdf.parser.TextAssembler#endParsingContext(String)
169
     */
170
    @Override
171
    public FinalText endParsingContext(@Nullable String containingElementName) {
172 1 1. endParsingContext : removed call to com/lowagie/text/pdf/parser/MarkedUpTextAssembler::clearAccumulator → NO_COVERAGE
        clearAccumulator();
173 1 1. endParsingContext : mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::endParsingContext to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return concatenateResult(containingElementName);
174
    }
175
176
    /**
177
     *
178
     * @see com.lowagie.text.pdf.parser.TextAssembler#reset()
179
     */
180
    @Override
181
    public void reset() {
182 1 1. reset : removed call to java/util/List::clear → NO_COVERAGE
        result.clear();
183 1 1. reset : removed call to java/util/List::clear → NO_COVERAGE
        partialWords.clear();
184
        inProgress = null;
185
    }
186
187
    @Override
188
    public void renderText(FinalText finalText) {
189
        result.add(finalText);
190
    }
191
192
    /**
193
     * Captures text using a simplified algorithm for inserting hard returns and
194
     * spaces
195
     *
196
     * @see com.lowagie.text.pdf.parser.AbstractRenderListener#renderText(java.lang.String,
197
     *      com.lowagie.text.pdf.parser.GraphicsState,
198
     *      com.lowagie.text.pdf.parser.Matrix,
199
     *      com.lowagie.text.pdf.parser.Matrix)
200
     */
201
    @Override
202
    public void renderText(ParsedTextImpl partialWord) {
203 1 1. renderText : negated conditional → NO_COVERAGE
        boolean firstRender = inProgress == null;
204
        boolean hardReturn = false;
205 1 1. renderText : negated conditional → NO_COVERAGE
        if (firstRender) {
206
            inProgress = partialWord;
207
            return;
208
        }
209
        Vector start = partialWord.getStartPoint();
210
        Vector lastStart = inProgress.getStartPoint();
211
        Vector lastEnd = inProgress.getEndPoint();
212
213
        // see
214
        // http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
215
        float dist = inProgress.getBaseline()
216
                .subtract(lastStart)
217
                .cross(lastStart.subtract(start))
218 1 1. renderText : Replaced float division with multiplication → NO_COVERAGE
                .lengthSquared() / inProgress.getBaseline().subtract(lastStart).lengthSquared();
219
220 1 1. renderText : Replaced float multiplication with division → NO_COVERAGE
        float sameLineThreshold = partialWord.getAscent() * 0.5f;
221
        // let's try using 25% of current leading for vertical slop.
222 3 1. renderText : changed conditional boundary → NO_COVERAGE
2. renderText : negated conditional → NO_COVERAGE
3. renderText : negated conditional → NO_COVERAGE
        if (dist > sameLineThreshold || Float.isNaN(dist)) {
223
            hardReturn = true;
224
        }
225
        /*
226
         * Note: Technically, we should check both the start and end positions,
227
         * in case the angle of the text changed without any displacement but
228
         * this sort of thing probably doesn't happen much in reality, so we'll
229
         * leave it alone for now
230
         */
231
        float spacing = lastEnd.subtract(start).length();
232 2 1. renderText : negated conditional → NO_COVERAGE
2. renderText : negated conditional → NO_COVERAGE
        if (hardReturn || partialWord.breakBefore()) {
233
            result.add(inProgress.getFinalText(reader, page, this, usePdfMarkupElements));
234 1 1. renderText : negated conditional → NO_COVERAGE
            if (hardReturn) {
235
                result.add(new FinalText("\n"));
236 1 1. renderText : negated conditional → NO_COVERAGE
                if (usePdfMarkupElements) {
237
                    result.add(new FinalText("<br class='t-pdf' />"));
238
                }
239
            }
240
            inProgress = partialWord;
241
            // System.out.println("<< Hard Return >>");
242 4 1. renderText : changed conditional boundary → NO_COVERAGE
2. renderText : Replaced double division with multiplication → NO_COVERAGE
3. renderText : negated conditional → NO_COVERAGE
4. renderText : negated conditional → NO_COVERAGE
        } else if (spacing < partialWord.getSingleSpaceWidth() / 2.3 || inProgress.shouldNotSplit()) {
243
            inProgress = new Word(inProgress.getText() + partialWord.getText().trim(),
244
                    partialWord.getAscent(),
245
                    partialWord.getDescent(),
246
                    lastStart,
247
                    partialWord.getEndPoint(),
248
                    inProgress.getBaseline(),
249
                    partialWord.getSingleSpaceWidth(),
250
                    inProgress.shouldNotSplit(),
251
                    inProgress.breakBefore());
252
        } else {
253
            result.add(inProgress.getFinalText(reader, page, this, usePdfMarkupElements));
254
            inProgress = partialWord;
255
        }
256
    }
257
258
    /**
259
     * Getter.
260
     *
261
     * @see SimpleTextExtractingPdfContentRenderListener#_reader
262
     * @return reader
263
     */
264
    protected PdfReader getReader() {
265
        return reader;
266
    }
267
268
    /**
269
     * {@inheritDoc}
270
     * @see com.lowagie.text.pdf.parser.TextAssembler#setPage(int)
271
     */
272
    @Override
273
    public void setPage(int page) {
274
        this.page = page;
275
    }
276
277
    /**
278
     * {@inheritDoc}
279
     * @see com.lowagie.text.pdf.parser.TextAssembler#getWordId()
280
     */
281
    @Override
282
    public String getWordId() {
283 2 1. getWordId : Replaced integer addition with subtraction → NO_COVERAGE
2. getWordId : mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::getWordId to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return "word" + wordIdCounter++;
284
    }
285
286
}

Mutations

109

1.1
Location : process
Killed by : none
removed call to com/lowagie/text/pdf/parser/MarkedUpTextAssembler::clearAccumulator → NO_COVERAGE

130

1.1
Location : clearAccumulator
Killed by : none
removed call to com/lowagie/text/pdf/parser/TextAssemblyBuffer::assemble → NO_COVERAGE

132

1.1
Location : clearAccumulator
Killed by : none
removed call to java/util/List::clear → NO_COVERAGE

133

1.1
Location : clearAccumulator
Killed by : none
negated conditional → NO_COVERAGE

141

1.1
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

144

1.1
Location : concatenateResult
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::concatenateResult to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

147

1.1
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

2.2
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

154

1.1
Location : concatenateResult
Killed by : none
removed call to java/util/List::clear → NO_COVERAGE

155

1.1
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

2.2
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

158

1.1
Location : concatenateResult
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : concatenateResult
Killed by : none
negated conditional → NO_COVERAGE

163

1.1
Location : concatenateResult
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::concatenateResult to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

172

1.1
Location : endParsingContext
Killed by : none
removed call to com/lowagie/text/pdf/parser/MarkedUpTextAssembler::clearAccumulator → NO_COVERAGE

173

1.1
Location : endParsingContext
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::endParsingContext to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

182

1.1
Location : reset
Killed by : none
removed call to java/util/List::clear → NO_COVERAGE

183

1.1
Location : reset
Killed by : none
removed call to java/util/List::clear → NO_COVERAGE

203

1.1
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

205

1.1
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

218

1.1
Location : renderText
Killed by : none
Replaced float division with multiplication → NO_COVERAGE

220

1.1
Location : renderText
Killed by : none
Replaced float multiplication with division → NO_COVERAGE

222

1.1
Location : renderText
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

3.3
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

232

1.1
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

2.2
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

234

1.1
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

236

1.1
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

242

1.1
Location : renderText
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : renderText
Killed by : none
Replaced double division with multiplication → NO_COVERAGE

3.3
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

4.4
Location : renderText
Killed by : none
negated conditional → NO_COVERAGE

283

1.1
Location : getWordId
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

2.2
Location : getWordId
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/parser/MarkedUpTextAssembler::getWordId to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

Active mutators

Tests examined


Report generated by PIT 1.4.2