CMapAwareDocumentFont.java

1
/*
2
 * Copyright 2008 by Kevin Day.
3
 *
4
 * The contents of this file are subject to the Mozilla Public License Version 1.1
5
 * (the "License"); you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
7
 *
8
 * Software distributed under the License is distributed on an "AS IS" basis,
9
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10
 * for the specific language governing rights and limitations under the License.
11
 *
12
 * The Original Code is 'iText, a free JAVA-PDF library'.
13
 *
14
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
15
 * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie.
16
 * All Rights Reserved.
17
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
18
 * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved.
19
 *
20
 * Contributor(s): all the names of the contributors are added in the source code
21
 * where applicable.
22
 *
23
 * Alternatively, the contents of this file may be used under the terms of the
24
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
25
 * provisions of LGPL are applicable instead of those above.  If you wish to
26
 * allow use of your version of this file only under the terms of the LGPL
27
 * License and not to allow others to use your version of this file under
28
 * the MPL, indicate your decision by deleting the provisions above and
29
 * replace them with the notice and other provisions required by the LGPL.
30
 * If you do not delete the provisions above, a recipient may use your version
31
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
32
 *
33
 * This library is free software; you can redistribute it and/or modify it
34
 * under the terms of the MPL as stated above or under the terms of the GNU
35
 * Library General Public License as published by the Free Software Foundation;
36
 * either version 2 of the License, or any later version.
37
 *
38
 * This library is distributed in the hope that it will be useful, but WITHOUT
39
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
41
 * details.
42
 *
43
 * If you didn't download this code from the following link, you should check if
44
 * you aren't using an obsolete version:
45
 * http://www.lowagie.com/iText/
46
 */
47
package com.lowagie.text.pdf;
48
49
import com.lowagie.text.error_messages.MessageLocalization;
50
import com.lowagie.text.pdf.fonts.cmaps.CMap;
51
import com.lowagie.text.pdf.fonts.cmaps.CMapParser;
52
53
import java.io.ByteArrayInputStream;
54
import java.io.IOException;
55
56
/**
57
 * Implementation of DocumentFont used while parsing PDF streams.
58
 * 
59
 * @since 2.1.4
60
 */
61
public class CMapAwareDocumentFont extends DocumentFont {
62
63
    /** The font dictionary. */
64
    private PdfDictionary fontDic;
65
    /** the width of a space for this font, in normalized 1000 point units */
66
    private int spaceWidth;
67
    /**
68
     * The CMap constructed from the ToUnicode map from the font's dictionary,
69
     * if present. This CMap transforms CID values into unicode equivalent
70
     */
71
    private CMap toUnicodeCmap;
72
    /**
73
     * Mapping between CID code (single byte only for now) and unicode
74
     * equivalent as derived by the font's encoding. Only needed if the
75
     * ToUnicode CMap is not provided.
76
     */
77
    private char[] cidbyte2uni;
78
79
    /**
80
     * Creates an instance of a CMapAwareFont based on an indirect reference to
81
     * a font.
82
     * 
83
     * @param refFont
84
     *            the indirect reference to a font
85
     */
86
    public CMapAwareDocumentFont(PRIndirectReference refFont) {
87
        super(refFont);
88
        fontDic = (PdfDictionary) PdfReader.getPdfObjectRelease(refFont);
89
90 1 1. : removed call to com/lowagie/text/pdf/CMapAwareDocumentFont::processToUnicode → NO_COVERAGE
        processToUnicode();
91 1 1. : negated conditional → NO_COVERAGE
        if (toUnicodeCmap == null) {
92 1 1. : removed call to com/lowagie/text/pdf/CMapAwareDocumentFont::processUni2Byte → NO_COVERAGE
            processUni2Byte();
93
        }
94
95
        spaceWidth = super.getWidth(' ');
96 1 1. : negated conditional → NO_COVERAGE
        if (spaceWidth == 0) {
97
            spaceWidth = computeAverageWidth();
98
        }
99
100
    }
101
102
    /**
103
     * Parses the ToUnicode entry, if present, and constructs a CMap for it
104
     * 
105
     * @since 2.1.7
106
     */
107
    private void processToUnicode() {
108
109
        PdfObject toUni = fontDic.get(PdfName.TOUNICODE);
110 1 1. processToUnicode : negated conditional → NO_COVERAGE
        if (toUni != null) {
111
112
            try {
113
                byte[] touni = PdfReader.getStreamBytes((PRStream) PdfReader
114
                        .getPdfObjectRelease(toUni));
115
116
                CMapParser cmapParser = new CMapParser();
117
                toUnicodeCmap = cmapParser
118
                        .parse(new ByteArrayInputStream(touni));
119
            } catch (IOException e) {
120
                throw new Error("Unable to process ToUnicode map - "
121
                        + e.getMessage(), e);
122
            }
123
        }
124
    }
125
126
    /**
127
     * Inverts DocumentFont's uni2byte mapping to obtain a cid-to-unicode
128
     * mapping based on the font's encoding
129
     * 
130
     * @since 2.1.7
131
     */
132
    private void processUni2Byte() {
133
        IntHashtable uni2byte = getUni2Byte();
134
        int[] e = uni2byte.toOrderedKeys();
135
136
        cidbyte2uni = new char[256];
137
        for (int element : e) {
138
            int n = uni2byte.get(element);
139
140
            // this is messy, messy - an encoding can have multiple unicode
141
            // values mapping to the same cid - we are going to arbitrarily
142
            // choose the first one
143
            // what we really need to do is to parse the encoding, and handle
144
            // the differences info ourselves. This is a huge duplication of
145
            // code of what is already
146
            // being done in DocumentFont, so I really hate to go down that path
147
            // without seriously thinking about a change in the organization of
148
            // the Font class hierarchy
149 1 1. processUni2Byte : negated conditional → NO_COVERAGE
            if (cidbyte2uni[n] == 0) {
150
                cidbyte2uni[n] = (char) element;
151
            }
152
        }
153
    }
154
155
    /**
156
     * For all widths of all glyphs, compute the average width in normalized
157
     * 1000 point units. This is used to give some meaningful width in cases
158
     * where we need an average font width (such as if the width of a space
159
     * isn't specified by a given font)
160
     * 
161
     * @return the average width of all non-zero width glyphs in the font
162
     */
163
    private int computeAverageWidth() {
164
        int count = 0;
165
        int total = 0;
166
        for (int width : super.widths) {
167 1 1. computeAverageWidth : negated conditional → NO_COVERAGE
            if (width != 0) {
168 1 1. computeAverageWidth : Replaced integer addition with subtraction → NO_COVERAGE
                total += width;
169 1 1. computeAverageWidth : Changed increment from 1 to -1 → NO_COVERAGE
                count++;
170
            }
171
        }
172 3 1. computeAverageWidth : Replaced integer division with multiplication → NO_COVERAGE
2. computeAverageWidth : negated conditional → NO_COVERAGE
3. computeAverageWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE
        return count != 0 ? total / count : 0;
173
    }
174
175
    /**
176
     * @since 2.1.5 Override to allow special handling for fonts that don't
177
     *        specify width of space character
178
     * @see com.lowagie.text.pdf.DocumentFont#getWidth(int)
179
     */
180
    @Override
181
    public int getWidth(int char1) {
182 1 1. getWidth : negated conditional → NO_COVERAGE
        if (char1 == ' ') {
183 1 1. getWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE
            return spaceWidth;
184
        }
185
186 1 1. getWidth : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE
        return super.getWidth(char1);
187
    }
188
189
    /**
190
     * Decodes a single CID (represented by one or two bytes) to a unicode
191
     * String.
192
     * 
193
     * @param bytes
194
     *            the bytes making up the character code to convert
195
     * @param offset
196
     *            an offset
197
     * @param len
198
     *            a length
199
     * @return a String containing the encoded form of the input bytes using the
200
     *         font's encoding.
201
     */
202
    private String decodeSingleCID(byte[] bytes, int offset, int len) {
203 1 1. decodeSingleCID : negated conditional → NO_COVERAGE
        if (hasUnicodeCMAP()) {
204 3 1. decodeSingleCID : changed conditional boundary → NO_COVERAGE
2. decodeSingleCID : Replaced integer addition with subtraction → NO_COVERAGE
3. decodeSingleCID : negated conditional → NO_COVERAGE
            if (offset + len > bytes.length) {
205 1 1. decodeSingleCID : Replaced integer addition with subtraction → NO_COVERAGE
                throw new ArrayIndexOutOfBoundsException(
206
                        MessageLocalization.getComposedMessage(
207
                                "invalid.index.1", offset + len));
208
            }
209 1 1. decodeSingleCID : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
            return toUnicodeCmap.lookup(bytes, offset, len);
210
        }
211
212 1 1. decodeSingleCID : negated conditional → NO_COVERAGE
        if (len == 1) {
213 2 1. decodeSingleCID : Replaced bitwise AND with OR → NO_COVERAGE
2. decodeSingleCID : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
            return new String(cidbyte2uni, 0xff & bytes[offset], 1);
214
        }
215
216
        throw new Error("Multi-byte glyphs not implemented yet");
217
    }
218
219
    /**
220
     * @return true if this font has unicode information available.
221
     */
222
    public boolean hasUnicodeCMAP() {
223 2 1. hasUnicodeCMAP : negated conditional → NO_COVERAGE
2. hasUnicodeCMAP : replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE
        return toUnicodeCmap != null;
224
    }
225
226
    /**
227
     * Decodes a string of bytes (encoded in the font's encoding) into a unicode string. This will
228
     * use the ToUnicode map of the font, if available, otherwise it uses the font's encoding
229
     *
230
     * @param cidbytes
231
     *            the bytes that need to be decoded
232
     * @return the unicode String that results from decoding
233
     * @since 2.1.7
234
     */
235
    public String decode(byte[] cidbytes,
236
                         final int offset,
237
                         final int len) {
238
        StringBuilder sb = new StringBuilder(); // it's a shame we can't make this
239
                                             // StringBuilder
240 4 1. decode : changed conditional boundary → NO_COVERAGE
2. decode : Changed increment from 1 to -1 → NO_COVERAGE
3. decode : Replaced integer addition with subtraction → NO_COVERAGE
4. decode : negated conditional → NO_COVERAGE
        for (int i = offset; i < offset + len; i++ ) {
241
            String rslt = decodeSingleCID(cidbytes, i, 1);
242 5 1. decode : changed conditional boundary → NO_COVERAGE
2. decode : Replaced integer addition with subtraction → NO_COVERAGE
3. decode : Replaced integer addition with subtraction → NO_COVERAGE
4. decode : negated conditional → NO_COVERAGE
5. decode : negated conditional → NO_COVERAGE
            if (rslt == null && i + 1 < offset + len) {
243
                rslt = decodeSingleCID(cidbytes, i, 2);
244 1 1. decode : Changed increment from 1 to -1 → NO_COVERAGE
                i++ ;
245
            }
246 1 1. decode : negated conditional → NO_COVERAGE
            if (rslt != null) {
247
                sb.append(rslt);
248
            }
249
        }
250
251 1 1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return sb.toString();
252
    }
253
254
    /**
255
     * Decodes a string. This is a normal Java string, but if the range of character values
256
     * exceeds the range of the encoding for the font, this will fail. Required since we need to
257
     * process the characters of strings, and we can't determine the character boundaries in
258
     * advance, especially because of Identity-H encoded fonts which have two-byte character
259
     * indexes.
260
     * 
261
     * PdfString is used to hold character code points, even though the bytes may not map 1-1. It's
262
     * not possible to change the encoding once a string is in place. 
263
     * 
264
     * @param chars
265
     *            the Characters that need to be decoded
266
     * @return the unicode String that results from decoding
267
     * @since 2.1.
268
     */
269
    public String decode(String chars) {
270
        StringBuilder sb = new StringBuilder(); // it's a shame we can't make this
271
                                             // StringBuilder
272
        for (char c : chars.toCharArray()) {
273
            String result = decode(c);
274 1 1. decode : negated conditional → NO_COVERAGE
            if (result != null) {
275
                sb.append(result);
276
            }
277
        }
278
279 1 1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return sb.toString();
280
    }
281
282
    /**
283
     * Decode  single character whose value represents a code point in this font. Will fail if
284
     * the characters do not have values that correspond to valid code points for the font.
285
     * @param c character to decode
286
     * @return Unicode character corresponding to the remapped code according to the font's current encoding.
287
     * @throws Error if the the character is out of range
288
     */
289
    public String decode(char c) throws Error {
290
        String result;
291 1 1. decode : negated conditional → NO_COVERAGE
        if (hasUnicodeCMAP()) {
292
            result = toUnicodeCmap.lookup(c);
293 2 1. decode : changed conditional boundary → NO_COVERAGE
2. decode : negated conditional → NO_COVERAGE
        } else if (c <= 0xff) {
294 1 1. decode : Replaced bitwise AND with OR → NO_COVERAGE
            result = new String(cidbyte2uni, 0xff & c, 1);
295
        } else {
296
            throw new Error("Multi-byte glyphs not implemented yet");
297
        }
298 1 1. decode : mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
        return result;
299
    }
300
301
    /**
302
     * Encodes bytes to a String.
303
     * 
304
     * @param bytes
305
     *            the bytes from a stream
306
     * @param offset
307
     *            an offset
308
     * @param len
309
     *            a length
310
     * @return a String encoded taking into account if the bytes are in unicode
311
     *         or not.
312
     * @deprecated method name is not indicative of what it does. Use
313
     *             <code>decode</code> instead.
314
     */
315
    @Deprecated
316
    public String encode(byte[] bytes, int offset, int len) {
317
        return decode(bytes, offset, len);
318
    }
319
}

Mutations

90

1.1
Location :
Killed by : none
removed call to com/lowagie/text/pdf/CMapAwareDocumentFont::processToUnicode → NO_COVERAGE

91

1.1
Location :
Killed by : none
negated conditional → NO_COVERAGE

92

1.1
Location :
Killed by : none
removed call to com/lowagie/text/pdf/CMapAwareDocumentFont::processUni2Byte → NO_COVERAGE

96

1.1
Location :
Killed by : none
negated conditional → NO_COVERAGE

110

1.1
Location : processToUnicode
Killed by : none
negated conditional → NO_COVERAGE

149

1.1
Location : processUni2Byte
Killed by : none
negated conditional → NO_COVERAGE

167

1.1
Location : computeAverageWidth
Killed by : none
negated conditional → NO_COVERAGE

168

1.1
Location : computeAverageWidth
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

169

1.1
Location : computeAverageWidth
Killed by : none
Changed increment from 1 to -1 → NO_COVERAGE

172

1.1
Location : computeAverageWidth
Killed by : none
Replaced integer division with multiplication → NO_COVERAGE

2.2
Location : computeAverageWidth
Killed by : none
negated conditional → NO_COVERAGE

3.3
Location : computeAverageWidth
Killed by : none
replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE

182

1.1
Location : getWidth
Killed by : none
negated conditional → NO_COVERAGE

183

1.1
Location : getWidth
Killed by : none
replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE

186

1.1
Location : getWidth
Killed by : none
replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE

203

1.1
Location : decodeSingleCID
Killed by : none
negated conditional → NO_COVERAGE

204

1.1
Location : decodeSingleCID
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : decodeSingleCID
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

3.3
Location : decodeSingleCID
Killed by : none
negated conditional → NO_COVERAGE

205

1.1
Location : decodeSingleCID
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

209

1.1
Location : decodeSingleCID
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

212

1.1
Location : decodeSingleCID
Killed by : none
negated conditional → NO_COVERAGE

213

1.1
Location : decodeSingleCID
Killed by : none
Replaced bitwise AND with OR → NO_COVERAGE

2.2
Location : decodeSingleCID
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decodeSingleCID to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

223

1.1
Location : hasUnicodeCMAP
Killed by : none
negated conditional → NO_COVERAGE

2.2
Location : hasUnicodeCMAP
Killed by : none
replaced return of integer sized value with (x == 0 ? 1 : 0) → NO_COVERAGE

240

1.1
Location : decode
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : decode
Killed by : none
Changed increment from 1 to -1 → NO_COVERAGE

3.3
Location : decode
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

4.4
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

242

1.1
Location : decode
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : decode
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

3.3
Location : decode
Killed by : none
Replaced integer addition with subtraction → NO_COVERAGE

4.4
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

5.5
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

244

1.1
Location : decode
Killed by : none
Changed increment from 1 to -1 → NO_COVERAGE

246

1.1
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

251

1.1
Location : decode
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

274

1.1
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

279

1.1
Location : decode
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

291

1.1
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

293

1.1
Location : decode
Killed by : none
changed conditional boundary → NO_COVERAGE

2.2
Location : decode
Killed by : none
negated conditional → NO_COVERAGE

294

1.1
Location : decode
Killed by : none
Replaced bitwise AND with OR → NO_COVERAGE

298

1.1
Location : decode
Killed by : none
mutated return of Object value for com/lowagie/text/pdf/CMapAwareDocumentFont::decode to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

Active mutators

Tests examined


Report generated by PIT 1.4.2