1 | /* | |
2 | * Copyright 2008 by Kevin Day. | |
3 | * | |
4 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
5 | * (the "License"); you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
7 | * | |
8 | * Software distributed under the License is distributed on an "AS IS" basis, | |
9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
10 | * for the specific language governing rights and limitations under the License. | |
11 | * | |
12 | * The Original Code is 'iText, a free JAVA-PDF library'. | |
13 | * | |
14 | * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by | |
15 | * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie. | |
16 | * All Rights Reserved. | |
17 | * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer | |
18 | * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved. | |
19 | * | |
20 | * Contributor(s): all the names of the contributors are added in the source code | |
21 | * where applicable. | |
22 | * | |
23 | * Alternatively, the contents of this file may be used under the terms of the | |
24 | * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the | |
25 | * provisions of LGPL are applicable instead of those above. If you wish to | |
26 | * allow use of your version of this file only under the terms of the LGPL | |
27 | * License and not to allow others to use your version of this file under | |
28 | * the MPL, indicate your decision by deleting the provisions above and | |
29 | * replace them with the notice and other provisions required by the LGPL. | |
30 | * If you do not delete the provisions above, a recipient may use your version | |
31 | * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. | |
32 | * | |
33 | * This library is free software; you can redistribute it and/or modify it | |
34 | * under the terms of the MPL as stated above or under the terms of the GNU | |
35 | * Library General Public License as published by the Free Software Foundation; | |
36 | * either version 2 of the License, or any later version. | |
37 | * | |
38 | * This library is distributed in the hope that it will be useful, but WITHOUT | |
39 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
40 | * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more | |
41 | * details. | |
42 | * | |
43 | * If you didn't download this code from the following link, you should check if | |
44 | * you aren't using an obsolete version: | |
45 | * http://www.lowagie.com/iText/ | |
46 | */ | |
47 | package com.lowagie.text.pdf; | |
48 | ||
49 | import com.lowagie.text.error_messages.MessageLocalization; | |
50 | import com.lowagie.text.pdf.fonts.cmaps.CMap; | |
51 | import com.lowagie.text.pdf.fonts.cmaps.CMapParser; | |
52 | ||
53 | import java.io.ByteArrayInputStream; | |
54 | import java.io.IOException; | |
55 | ||
56 | /** | |
57 | * Implementation of DocumentFont used while parsing PDF streams. | |
58 | * | |
59 | * @since 2.1.4 | |
60 | */ | |
61 | public class CMapAwareDocumentFont extends DocumentFont { | |
62 | ||
63 | /** The font dictionary. */ | |
64 | private PdfDictionary fontDic; | |
65 | /** the width of a space for this font, in normalized 1000 point units */ | |
66 | private int spaceWidth; | |
67 | /** | |
68 | * The CMap constructed from the ToUnicode map from the font's dictionary, | |
69 | * if present. This CMap transforms CID values into unicode equivalent | |
70 | */ | |
71 | private CMap toUnicodeCmap; | |
72 | /** | |
73 | * Mapping between CID code (single byte only for now) and unicode | |
74 | * equivalent as derived by the font's encoding. Only needed if the | |
75 | * ToUnicode CMap is not provided. | |
76 | */ | |
77 | private char[] cidbyte2uni; | |
78 | ||
79 | /** | |
80 | * Creates an instance of a CMapAwareFont based on an indirect reference to | |
81 | * a font. | |
82 | * | |
83 | * @param refFont | |
84 | * the indirect reference to a font | |
85 | */ | |
86 | public CMapAwareDocumentFont(PRIndirectReference refFont) { | |
87 | super(refFont); | |
88 | fontDic = (PdfDictionary) PdfReader.getPdfObjectRelease(refFont); | |
89 | ||
90 |
1
1. |
processToUnicode(); |
91 |
1
1. |
if (toUnicodeCmap == null) { |
92 |
1
1. |
processUni2Byte(); |
93 | } | |
94 | ||
95 | spaceWidth = super.getWidth(' '); | |
96 |
1
1. |
if (spaceWidth == 0) { |
97 | spaceWidth = computeAverageWidth(); | |
98 | } | |
99 | ||
100 | } | |
101 | ||
102 | /** | |
103 | * Parses the ToUnicode entry, if present, and constructs a CMap for it | |
104 | * | |
105 | * @since 2.1.7 | |
106 | */ | |
107 | private void processToUnicode() { | |
108 | ||
109 | PdfObject toUni = fontDic.get(PdfName.TOUNICODE); | |
110 |
1
1. processToUnicode : negated conditional → NO_COVERAGE |
if (toUni != null) { |
111 | ||
112 | try { | |
113 | byte[] touni = PdfReader.getStreamBytes((PRStream) PdfReader | |
114 | .getPdfObjectRelease(toUni)); | |
115 | ||
116 | CMapParser cmapParser = new CMapParser(); | |
117 | toUnicodeCmap = cmapParser | |
118 | .parse(new ByteArrayInputStream(touni)); | |
119 | } catch (IOException e) { | |
120 | throw new Error("Unable to process ToUnicode map - " | |
121 | + e.getMessage(), e); | |
122 | } | |
123 | } | |
124 | } | |
125 | ||
126 | /** | |
127 | * Inverts DocumentFont's uni2byte mapping to obtain a cid-to-unicode | |
128 | * mapping based on the font's encoding | |
129 | * | |
130 | * @since 2.1.7 | |
131 | */ | |
132 | private void processUni2Byte() { | |
133 | IntHashtable uni2byte = getUni2Byte(); | |
134 | int[] e = uni2byte.toOrderedKeys(); | |
135 | ||
136 | cidbyte2uni = new char[256]; | |
137 | for (int element : e) { | |
138 | int n = uni2byte.get(element); | |
139 | ||
140 | // this is messy, messy - an encoding can have multiple unicode | |
141 | // values mapping to the same cid - we are going to arbitrarily | |
142 | // choose the first one | |
143 | // what we really need to do is to parse the encoding, and handle | |
144 | // the differences info ourselves. This is a huge duplication of | |
145 | // code of what is already | |
146 | // being done in DocumentFont, so I really hate to go down that path | |
147 | // without seriously thinking about a change in the organization of | |
148 | // the Font class hierarchy | |
149 |
1
1. processUni2Byte : negated conditional → NO_COVERAGE |
if (cidbyte2uni[n] == 0) { |
150 | cidbyte2uni[n] = (char) element; | |
151 | } | |
152 | } | |
153 | } | |
154 | ||
155 | /** | |
156 | * For all widths of all glyphs, compute the average width in normalized | |
157 | * 1000 point units. This is used to give some meaningful width in cases | |
158 | * where we need an average font width (such as if the width of a space | |
159 | * isn't specified by a given font) | |
160 | * | |
161 | * @return the average width of all non-zero width glyphs in the font | |
162 | */ | |
163 | private int computeAverageWidth() { | |
164 | int count = 0; | |
165 | int total = 0; | |
166 | for (int width : super.widths) { | |
167 |
1
1. computeAverageWidth : negated conditional → NO_COVERAGE |
if (width != 0) { |
168 |
1
1. computeAverageWidth : Replaced integer addition with subtraction → NO_COVERAGE |
total += width; |
169 |
1
1. computeAverageWidth : Changed increment from 1 to -1 → NO_COVERAGE |
count++; |
170 | } | |
171 | } | |
172 |
3
1. computeAverageWidth : Replaced integer division with multiplication → NO_COVERAGE 2. computeAverageWidth : negated conditional → NO_COVERAGE 3. computeAverageWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return count != 0 ? total / count : 0; |
173 | } | |
174 | ||
175 | /** | |
176 | * @since 2.1.5 Override to allow special handling for fonts that don't | |
177 | * specify width of space character | |
178 | * @see com.lowagie.text.pdf.DocumentFont#getWidth(int) | |
179 | */ | |
180 | @Override | |
181 | public int getWidth(int char1) { | |
182 |
1
1. getWidth : negated conditional → NO_COVERAGE |
if (char1 == ' ') { |
183 |
1
1. getWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return spaceWidth; |
184 | } | |
185 | ||
186 |
1
1. getWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return super.getWidth(char1); |
187 | } | |
188 | ||
189 | /** | |
190 | * Decodes a single CID (represented by one or two bytes) to a unicode | |
191 | * String. | |
192 | * | |
193 | * @param bytes | |
194 | * the bytes making up the character code to convert | |
195 | * @param offset | |
196 | * an offset | |
197 | * @param len | |
198 | * a length | |
199 | * @return a String containing the encoded form of the input bytes using the | |
200 | * font's encoding. | |
201 | */ | |
202 | private String decodeSingleCID(byte[] bytes, int offset, int len) { | |
203 |
1
1. decodeSingleCID : negated conditional → NO_COVERAGE |
if (hasUnicodeCMAP()) { |
204 |
3
1. decodeSingleCID : changed conditional boundary → NO_COVERAGE 2. decodeSingleCID : Replaced integer addition with subtraction → NO_COVERAGE 3. decodeSingleCID : negated conditional → NO_COVERAGE |
if (offset + len > bytes.length) { |
205 |
1
1. decodeSingleCID : Replaced integer addition with subtraction → NO_COVERAGE |
throw new ArrayIndexOutOfBoundsException( |
206 | MessageLocalization.getComposedMessage( | |
207 | "invalid.index.1", offset + len)); | |
208 | } | |
209 |
1
1. decodeSingleCID : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return toUnicodeCmap.lookup(bytes, offset, len); |
210 | } | |
211 | ||
212 |
1
1. decodeSingleCID : negated conditional → NO_COVERAGE |
if (len == 1) { |
213 |
2
1. decodeSingleCID : Replaced bitwise AND with OR → NO_COVERAGE 2. decodeSingleCID : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return new String(cidbyte2uni, 0xff & bytes[offset], 1); |
214 | } | |
215 | ||
216 | throw new Error("Multi-byte glyphs not implemented yet"); | |
217 | } | |
218 | ||
219 | /** | |
220 | * @return true if this font has unicode information available. | |
221 | */ | |
222 | public boolean hasUnicodeCMAP() { | |
223 |
2
1. hasUnicodeCMAP : negated conditional → NO_COVERAGE 2. hasUnicodeCMAP : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE |
return toUnicodeCmap != null; |
224 | } | |
225 | ||
226 | /** | |
227 | * Decodes a string of bytes (encoded in the font's encoding) into a unicode string. This will | |
228 | * use the ToUnicode map of the font, if available, otherwise it uses the font's encoding | |
229 | * | |
230 | * @param cidbytes | |
231 | * the bytes that need to be decoded | |
232 | * @return the unicode String that results from decoding | |
233 | * @since 2.1.7 | |
234 | */ | |
235 | public String decode(byte[] cidbytes, | |
236 | final int offset, | |
237 | final int len) { | |
238 | StringBuilder sb = new StringBuilder(); // it's a shame we can't make this | |
239 | // StringBuilder | |
240 |
4
1. decode : changed conditional boundary → NO_COVERAGE 2. decode : Changed increment from 1 to -1 → NO_COVERAGE 3. decode : Replaced integer addition with subtraction → NO_COVERAGE 4. decode : negated conditional → NO_COVERAGE |
for (int i = offset; i < offset + len; i++ ) { |
241 | String rslt = decodeSingleCID(cidbytes, i, 1); | |
242 |
5
1. decode : changed conditional boundary → NO_COVERAGE 2. decode : Replaced integer addition with subtraction → NO_COVERAGE 3. decode : Replaced integer addition with subtraction → NO_COVERAGE 4. decode : negated conditional → NO_COVERAGE 5. decode : negated conditional → NO_COVERAGE |
if (rslt == null && i + 1 < offset + len) { |
243 | rslt = decodeSingleCID(cidbytes, i, 2); | |
244 |
1
1. decode : Changed increment from 1 to -1 → NO_COVERAGE |
i++ ; |
245 | } | |
246 |
1
1. decode : negated conditional → NO_COVERAGE |
if (rslt != null) { |
247 | sb.append(rslt); | |
248 | } | |
249 | } | |
250 | ||
251 |
1
1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return sb.toString(); |
252 | } | |
253 | ||
254 | /** | |
255 | * Decodes a string. This is a normal Java string, but if the range of character values | |
256 | * exceeds the range of the encoding for the font, this will fail. Required since we need to | |
257 | * process the characters of strings, and we can't determine the character boundaries in | |
258 | * advance, especially because of Identity-H encoded fonts which have two-byte character | |
259 | * indexes. | |
260 | * | |
261 | * PdfString is used to hold character code points, even though the bytes may not map 1-1. It's | |
262 | * not possible to change the encoding once a string is in place. | |
263 | * | |
264 | * @param chars | |
265 | * the Characters that need to be decoded | |
266 | * @return the unicode String that results from decoding | |
267 | * @since 2.1. | |
268 | */ | |
269 | public String decode(String chars) { | |
270 | StringBuilder sb = new StringBuilder(); // it's a shame we can't make this | |
271 | // StringBuilder | |
272 | for (char c : chars.toCharArray()) { | |
273 | String result = decode(c); | |
274 |
1
1. decode : negated conditional → NO_COVERAGE |
if (result != null) { |
275 | sb.append(result); | |
276 | } | |
277 | } | |
278 | ||
279 |
1
1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return sb.toString(); |
280 | } | |
281 | ||
282 | /** | |
283 | * Decode single character whose value represents a code point in this font. Will fail if | |
284 | * the characters do not have values that correspond to valid code points for the font. | |
285 | * @param c character to decode | |
286 | * @return Unicode character corresponding to the remapped code according to the font's current encoding. | |
287 | * @throws Error if the the character is out of range | |
288 | */ | |
289 | public String decode(char c) throws Error { | |
290 | String result; | |
291 |
1
1. decode : negated conditional → NO_COVERAGE |
if (hasUnicodeCMAP()) { |
292 | result = toUnicodeCmap.lookup(c); | |
293 |
2
1. decode : changed conditional boundary → NO_COVERAGE 2. decode : negated conditional → NO_COVERAGE |
} else if (c <= 0xff) { |
294 |
1
1. decode : Replaced bitwise AND with OR → NO_COVERAGE |
result = new String(cidbyte2uni, 0xff & c, 1); |
295 | } else { | |
296 | throw new Error("Multi-byte glyphs not implemented yet"); | |
297 | } | |
298 |
1
1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return result; |
299 | } | |
300 | ||
301 | /** | |
302 | * Encodes bytes to a String. | |
303 | * | |
304 | * @param bytes | |
305 | * the bytes from a stream | |
306 | * @param offset | |
307 | * an offset | |
308 | * @param len | |
309 | * a length | |
310 | * @return a String encoded taking into account if the bytes are in unicode | |
311 | * or not. | |
312 | * @deprecated method name is not indicative of what it does. Use | |
313 | * <code>decode</code> instead. | |
314 | */ | |
315 | @Deprecated | |
316 | public String encode(byte[] bytes, int offset, int len) { | |
317 | return decode(bytes, offset, len); | |
318 | } | |
319 | } | |
Mutations | ||
90 |
1.1 |
|
91 |
1.1 |
|
92 |
1.1 |
|
96 |
1.1 |
|
110 |
1.1 |
|
149 |
1.1 |
|
167 |
1.1 |
|
168 |
1.1 |
|
169 |
1.1 |
|
172 |
1.1 2.2 3.3 |
|
182 |
1.1 |
|
183 |
1.1 |
|
186 |
1.1 |
|
203 |
1.1 |
|
204 |
1.1 2.2 3.3 |
|
205 |
1.1 |
|
209 |
1.1 |
|
212 |
1.1 |
|
213 |
1.1 2.2 |
|
223 |
1.1 2.2 |
|
240 |
1.1 2.2 3.3 4.4 |
|
242 |
1.1 2.2 3.3 4.4 5.5 |
|
244 |
1.1 |
|
246 |
1.1 |
|
251 |
1.1 |
|
274 |
1.1 |
|
279 |
1.1 |
|
291 |
1.1 |
|
293 |
1.1 2.2 |
|
294 |
1.1 |
|
298 |
1.1 |