1 | /* | |
2 | Copyright 2014 by Tizra Inc. | |
3 | The contents of this file are subject to the Mozilla Public License Version 1.1 | |
4 | (the "License"); you may not use this file except in compliance with the License. | |
5 | You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
6 | ||
7 | Software distributed under the License is distributed on an "AS IS" basis, | |
8 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
9 | for the specific language governing rights and limitations under the License. | |
10 | ||
11 | The Original Code is 'iText, a free JAVA-PDF library'. | |
12 | ||
13 | The Initial Developer of the Original Code is Bruno Lowagie. Portions created by | |
14 | the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie. | |
15 | All Rights Reserved. | |
16 | Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer | |
17 | are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved. | |
18 | ||
19 | Contributor(s): all the names of the contributors are added in the source code | |
20 | where applicable. | |
21 | ||
22 | Alternatively, the contents of this file may be used under the terms of the | |
23 | LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the | |
24 | provisions of LGPL are applicable instead of those above. If you wish to | |
25 | allow use of your version of this file only under the terms of the LGPL | |
26 | License and not to allow others to use your version of this file under | |
27 | the MPL, indicate your decision by deleting the provisions above and | |
28 | replace them with the notice and other provisions required by the LGPL. | |
29 | If you do not delete the provisions above, a recipient may use your version | |
30 | of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. | |
31 | ||
32 | This library is free software; you can redistribute it and/or modify it | |
33 | under the terms of the MPL as stated above or under the terms of the GNU | |
34 | Library General Public License as published by the Free Software Foundation; | |
35 | either version 2 of the License, or any later version. | |
36 | ||
37 | This library is distributed in the hope that it will be useful, but WITHOUT | |
38 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
39 | FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more | |
40 | details. | |
41 | ||
42 | dgd: com.lowagie.text.pdf.parser | |
43 | */ | |
44 | package com.lowagie.text.pdf.parser; | |
45 | ||
46 | import com.lowagie.text.Rectangle; | |
47 | import com.lowagie.text.pdf.PdfReader; | |
48 | ||
49 | import javax.annotation.Nullable; | |
50 | ||
51 | /** | |
52 | * @author dgd | |
53 | */ | |
54 | public class Word extends ParsedTextImpl { | |
55 | | |
56 | /** | |
57 | * Is this an indivisible fragment, because it contained a space or was split from a space- | |
58 | * containing string. Non-splittable words can be merged (into new non-splittable words). | |
59 | */ | |
60 | private boolean shouldNotSplit; | |
61 | /** | |
62 | * If this word or fragment was preceded by a space, or a line break, it should never be merged | |
63 | * into a preceding word. | |
64 | */ | |
65 | private boolean breakBefore; | |
66 | ||
67 | /** | |
68 | * @param text text content | |
69 | * @param ascent font ascent (e.g. height) | |
70 | * @param descent How far below the baseline letters go | |
71 | * @param startPoint first point of the text | |
72 | * @param endPoint ending offset of text | |
73 | * @param baseline line along which text is set. | |
74 | * @param spaceWidth how much space is a space supposed to take. | |
75 | * @param isCompleteWord word should never be split | |
76 | * @param breakBefore word starts here, should never combine to the left. | |
77 | */ | |
78 | Word(String text, float ascent, float descent, Vector startPoint, | |
79 | Vector endPoint, Vector baseline, float spaceWidth, boolean isCompleteWord, boolean breakBefore) { | |
80 | super(text, startPoint, endPoint, baseline, ascent, descent, spaceWidth); | |
81 | shouldNotSplit = isCompleteWord; | |
82 | this.breakBefore = breakBefore; | |
83 | } | |
84 | ||
85 | /** | |
86 | * accept a visitor that is assembling text | |
87 | * | |
88 | * @param p the assembler that is visiting us. | |
89 | * @param contextName What is the wrapping markup element name if any | |
90 | * @see com.lowagie.text.pdf.parser.ParsedTextImpl#accumulate(com.lowagie.text.pdf.parser.TextAssembler, String) | |
91 | * @see com.lowagie.text.pdf.parser.TextAssemblyBuffer#accumulate(com.lowagie.text.pdf.parser.TextAssembler, String) | |
92 | */ | |
93 | @Override | |
94 | public void accumulate(TextAssembler p, String contextName) { | |
95 | p.process(this, contextName); | |
96 | } | |
97 | ||
98 | /** | |
99 | * Accept a visitor that is assembling text | |
100 | * @param p the assembler that is visiting us. | |
101 | * @see com.lowagie.text.pdf.parser.TextAssemblyBuffer#assemble(com.lowagie.text.pdf.parser.TextAssembler) | |
102 | * @see com.lowagie.text.pdf.parser.ParsedTextImpl#assemble(com.lowagie.text.pdf.parser.TextAssembler) | |
103 | */ | |
104 | @Override | |
105 | public void assemble(TextAssembler p) { | |
106 | p.renderText(this); | |
107 | } | |
108 | ||
109 | private static String formatPercent(float f) { | |
110 |
1
1. formatPercent : mutated return of Object value for com/lowagie/text/pdf/parser/Word::formatPercent to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return String.format("%.2f%%", f); |
111 | } | |
112 | ||
113 | /** | |
114 | * Generate markup for this word. send the assembler a strings representing | |
115 | * a CSS style that will format us nicely. | |
116 | * | |
117 | * @param text | |
118 | * passed in because we may have wanted to alter it, e.g. by | |
119 | * trimming white space, or filtering characters or something. | |
120 | * @param reader | |
121 | * the file reader from which we are extracting | |
122 | * @param page | |
123 | * number of the page we are reading text from | |
124 | * @param assembler | |
125 | * object to assemble text from fragments and larger strings on a | |
126 | * page. | |
127 | * @return markup to represent this one word. | |
128 | */ | |
129 | private String wordMarkup(@Nullable String text, PdfReader reader, int page, TextAssembler assembler) { | |
130 |
1
1. wordMarkup : negated conditional → NO_COVERAGE |
if (text == null) { |
131 |
1
1. wordMarkup : mutated return of Object value for com/lowagie/text/pdf/parser/Word::wordMarkup to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return ""; |
132 | } | |
133 | Rectangle mediaBox = reader.getPageSize(page); | |
134 | Rectangle cropBox = reader.getBoxSize(page, "crop"); | |
135 | text = text.replaceAll("[\u00A0\u202f]", " ").trim(); | |
136 |
1
1. wordMarkup : negated conditional → NO_COVERAGE |
if (text.length() == 0) { |
137 |
1
1. wordMarkup : mutated return of Object value for com/lowagie/text/pdf/parser/Word::wordMarkup to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return text; |
138 | } | |
139 |
1
1. wordMarkup : removed call to com/lowagie/text/Rectangle::normalize → NO_COVERAGE |
mediaBox.normalize(); |
140 |
1
1. wordMarkup : negated conditional → NO_COVERAGE |
if (cropBox != null) { |
141 |
1
1. wordMarkup : removed call to com/lowagie/text/Rectangle::normalize → NO_COVERAGE |
cropBox.normalize(); |
142 | } else { | |
143 | cropBox = reader.getBoxSize(page, "trim"); | |
144 |
1
1. wordMarkup : negated conditional → NO_COVERAGE |
if (cropBox != null) { |
145 |
1
1. wordMarkup : removed call to com/lowagie/text/Rectangle::normalize → NO_COVERAGE |
cropBox.normalize(); |
146 | } else { | |
147 | cropBox = mediaBox; | |
148 | } | |
149 | } | |
150 |
1
1. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE |
float xOffset = cropBox.getLeft() - mediaBox.getLeft(); |
151 |
1
1. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE |
float yOffset = cropBox.getTop() - mediaBox.getTop(); |
152 | Vector startPoint = getStartPoint(); | |
153 | Vector endPoint = getEndPoint(); | |
154 | float pageWidth = cropBox.getWidth(); | |
155 | float pageHeight = cropBox.getHeight(); | |
156 |
4
1. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE 2. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE 3. wordMarkup : Replaced float division with multiplication → NO_COVERAGE 4. wordMarkup : Replaced double multiplication with division → NO_COVERAGE |
float leftPercent = (float) ((startPoint.get(0) - xOffset - mediaBox.getLeft()) / pageWidth * 100.0); |
157 |
3
1. wordMarkup : Replaced float addition with subtraction → NO_COVERAGE 2. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE 3. wordMarkup : Replaced float subtraction with addition → NO_COVERAGE |
float bottom = endPoint.get(1) + yOffset - getDescent() - mediaBox.getBottom(); |
158 |
2
1. wordMarkup : Replaced float division with multiplication → NO_COVERAGE 2. wordMarkup : Replaced float multiplication with division → NO_COVERAGE |
float bottomPercent = bottom / pageHeight * 100f; |
159 | StringBuilder result = new StringBuilder(); | |
160 | float width = getWidth(); | |
161 |
2
1. wordMarkup : Replaced float division with multiplication → NO_COVERAGE 2. wordMarkup : Replaced float multiplication with division → NO_COVERAGE |
float widthPercent = width / pageWidth * 100.0f; |
162 | ||
163 | float height = getAscent(); | |
164 |
2
1. wordMarkup : Replaced float division with multiplication → NO_COVERAGE 2. wordMarkup : Replaced float multiplication with division → NO_COVERAGE |
float heightPercent = height / pageHeight * 100.0f; |
165 | String myId = assembler.getWordId(); | |
166 |
2
1. wordMarkup : Replaced float addition with subtraction → NO_COVERAGE 2. wordMarkup : Replaced float addition with subtraction → NO_COVERAGE |
Rectangle resultRect = new Rectangle(leftPercent, bottomPercent, leftPercent+widthPercent, bottomPercent+heightPercent); |
167 | result.append("<span class=\"t-word\" style=\"bottom: ") | |
168 | .append(formatPercent(resultRect.getBottom())).append("; left: ") | |
169 | .append(formatPercent(resultRect.getLeft())).append("; width: ") | |
170 | .append(formatPercent(resultRect.getWidth())).append("; height: ") | |
171 | .append(formatPercent(resultRect.getHeight())).append(";\"") | |
172 | .append(" id=\"").append(myId).append("\">") | |
173 | .append(escapeHTML(text)).append(" "); | |
174 | result.append("</span> "); | |
175 | ||
176 |
1
1. wordMarkup : mutated return of Object value for com/lowagie/text/pdf/parser/Word::wordMarkup to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return result.toString(); |
177 | } | |
178 | ||
179 | private static String escapeHTML(String s) { | |
180 |
1
1. escapeHTML : mutated return of Object value for com/lowagie/text/pdf/parser/Word::escapeHTML to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return s.replaceAll("&", "&").replaceAll("<", "<") |
181 | .replaceAll(">", ">"); | |
182 | } | |
183 | ||
184 | /** | |
185 | * @see com.lowagie.text.pdf.parser.TextAssemblyBuffer#getFinalText(PdfReader, | |
186 | * int, TextAssembler, boolean) | |
187 | */ | |
188 | @Override | |
189 | public FinalText getFinalText(PdfReader reader, int page, | |
190 | TextAssembler assembler, boolean useMarkup) { | |
191 |
1
1. getFinalText : negated conditional → NO_COVERAGE |
if (useMarkup) { |
192 |
1
1. getFinalText : mutated return of Object value for com/lowagie/text/pdf/parser/Word::getFinalText to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return new FinalText(wordMarkup(getText(), reader, page, assembler)); |
193 | } else { | |
194 |
1
1. getFinalText : mutated return of Object value for com/lowagie/text/pdf/parser/Word::getFinalText to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return new FinalText(getText() + " "); |
195 | } | |
196 | } | |
197 | ||
198 | @Override | |
199 | public String toString() { | |
200 | return "[Word: [" + getText() + "] " + getStartPoint() + ", " | |
201 | + getEndPoint() + "] lead" + getAscent() + "]"; | |
202 | } | |
203 | ||
204 | /** | |
205 | * @see com.lowagie.text.pdf.parser.ParsedTextImpl#shouldNotSplit() | |
206 | */ | |
207 | @Override | |
208 | public boolean shouldNotSplit() { | |
209 | return shouldNotSplit; | |
210 | } | |
211 | ||
212 | /** | |
213 | * @see com.lowagie.text.pdf.parser.ParsedTextImpl#breakBefore() | |
214 | */ | |
215 | @Override | |
216 | public boolean breakBefore() { | |
217 | return breakBefore; | |
218 | } | |
219 | } | |
Mutations | ||
110 |
1.1 |
|
130 |
1.1 |
|
131 |
1.1 |
|
136 |
1.1 |
|
137 |
1.1 |
|
139 |
1.1 |
|
140 |
1.1 |
|
141 |
1.1 |
|
144 |
1.1 |
|
145 |
1.1 |
|
150 |
1.1 |
|
151 |
1.1 |
|
156 |
1.1 2.2 3.3 4.4 |
|
157 |
1.1 2.2 3.3 |
|
158 |
1.1 2.2 |
|
161 |
1.1 2.2 |
|
164 |
1.1 2.2 |
|
166 |
1.1 2.2 |
|
176 |
1.1 |
|
180 |
1.1 |
|
191 |
1.1 |
|
192 |
1.1 |
|
194 |
1.1 |