diff --git a/pdfbox/pom.xml b/pdfbox/pom.xml
index 2a7b9ea45..0833019f9 100644
--- a/pdfbox/pom.xml
+++ b/pdfbox/pom.xml
@@ -77,6 +77,19 @@
test
+
+ org.openjdk.jmh
+ jmh-core
+ 1.21
+ test
+
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ 1.21
+ test
+
+
diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java
index 5468c8142..03a891dd1 100644
--- a/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java
+++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java
@@ -179,6 +179,8 @@ public abstract class Filter
{
LOG.warn(ex.getMessage(), ex);
}
- return Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel));
+ compressionLevel = Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel));
+ return compressionLevel;
}
+
}
diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
index 7b82548e1..f09526a94 100644
--- a/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
+++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
@@ -135,4 +135,5 @@ final class FlateFilter extends Filter
encoded.flush();
deflater.end();
}
+
}
diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java
index 70d71fd9e..585557118 100644
--- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java
+++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java
@@ -15,21 +15,29 @@
*/
package org.apache.pdfbox.pdmodel.graphics.image;
-import java.awt.Transparency;
-import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import javax.imageio.stream.MemoryCacheImageOutputStream;
import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.filter.Filter;
import org.apache.pdfbox.filter.FilterFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
-import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceColorSpace;
-import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
-import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
+import org.apache.pdfbox.pdmodel.graphics.color.*;
+
+import javax.imageio.stream.MemoryCacheImageOutputStream;
+import java.awt.*;
+import java.awt.color.ColorSpace;
+import java.awt.color.ICC_ColorSpace;
+import java.awt.color.ICC_Profile;
+import java.awt.image.BufferedImage;
+import java.awt.image.DataBuffer;
+import java.awt.image.Raster;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.zip.Deflater;
+import java.util.zip.DeflaterOutputStream;
/**
* Factory for creating a PDImageXObject containing a lossless compressed image.
@@ -42,6 +50,12 @@ public final class LosslessFactory
{
}
+
+ /**
+ * Internal, only for benchmark purpuse
+ */
+ static boolean usePredictorEncoder = true;
+
/**
* Creates a new lossless encoded Image XObject from a Buffered Image.
*
@@ -60,6 +74,17 @@ public final class LosslessFactory
}
else
{
+ // We try to encode the image with predictor
+ if (usePredictorEncoder)
+ {
+ PDImageXObject pdImageXObject = new PredictorEncoder(document, image).encode();
+ if (pdImageXObject != null)
+ {
+ return pdImageXObject;
+ }
+ }
+
+ // Fallback: We export the image as 8-bit sRGB and might loose color information
return createFromRGBImage(image, document);
}
}
@@ -180,7 +205,7 @@ public final class LosslessFactory
PDColorSpace initColorSpace) throws IOException
{
//pre-size the output stream to half of the input
- ByteArrayOutputStream baos = new ByteArrayOutputStream(byteArray.length/2);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(byteArray.length / 2);
Filter filter = FilterFactory.INSTANCE.getFilter(COSName.FLATE_DECODE);
filter.encode(new ByteArrayInputStream(byteArray), baos, new COSDictionary(), 0);
@@ -189,5 +214,453 @@ public final class LosslessFactory
return new PDImageXObject(document, encodedByteStream, COSName.FLATE_DECODE,
width, height, bitsPerComponent, initColorSpace);
}
+
+ private static class PredictorEncoder
+ {
+ private final PDDocument document;
+ private final BufferedImage image;
+ private final int componentsPerPixel;
+ private final int transferType;
+ private final int bytesPerComponent;
+ private final int bytesPerPixel;
+
+ private final int height;
+ private final int width;
+
+ private final byte[] dataRawRowNone;
+ private final byte[] dataRawRowSub;
+ private final byte[] dataRawRowUp;
+ private final byte[] dataRawRowAverage;
+ private final byte[] dataRawRowPaeth;
+
+ final int imageType;
+ final boolean hasAlpha;
+ final byte[] alphaImageData;
+
+ final byte[] aValues;
+ final byte[] cValues;
+ final byte[] bValues;
+ final byte[] xValues;
+ final byte[] tmpResultValues;
+
+ /**
+ * Initialise the encoder and set all final fields
+ */
+ PredictorEncoder(PDDocument document, BufferedImage image)
+ {
+ this.document = document;
+ this.image = image;
+
+ // The raw count of components per pixel including optional alpha
+ this.componentsPerPixel = image.getColorModel().getNumComponents();
+ this.transferType = image.getRaster().getTransferType();
+ this.bytesPerComponent = (transferType == DataBuffer.TYPE_SHORT
+ || transferType == DataBuffer.TYPE_USHORT) ? 2 : 1;
+
+ // Only the bytes we need in the output (excluding alpha)
+ this.bytesPerPixel = image.getColorModel().getNumColorComponents() * bytesPerComponent;
+
+ this.height = image.getHeight();
+ this.width = image.getWidth();
+ this.imageType = image.getType();
+ this.hasAlpha = image.getColorModel().getNumComponents() != image.getColorModel()
+ .getNumColorComponents();
+ this.alphaImageData = hasAlpha ? new byte[width * height * bytesPerComponent] : null;
+
+ // The rows have 1-byte encoding marker and width * BYTES_PER_PIXEL pixel-bytes
+ int dataRowByteCount = width * bytesPerPixel + 1;
+ this.dataRawRowNone = new byte[dataRowByteCount];
+ this.dataRawRowSub = new byte[dataRowByteCount];
+ this.dataRawRowUp = new byte[dataRowByteCount];
+ this.dataRawRowAverage = new byte[dataRowByteCount];
+ this.dataRawRowPaeth = new byte[dataRowByteCount];
+
+ // Write the encoding markers
+ dataRawRowNone[0] = 0;
+ dataRawRowSub[0] = 1;
+ dataRawRowUp[0] = 2;
+ dataRawRowAverage[0] = 3;
+ dataRawRowPaeth[0] = 4;
+
+ // c | b
+ // -----
+ // a | x
+ //
+ // x => current pixel
+ this.aValues = new byte[bytesPerPixel];
+ this.cValues = new byte[bytesPerPixel];
+ this.bValues = new byte[bytesPerPixel];
+ this.xValues = new byte[bytesPerPixel];
+ this.tmpResultValues = new byte[bytesPerPixel];
+ }
+
+ /**
+ * Tries to compress the image using a predictor.
+ *
+ * @return the image or null if it is not possible to encoded the image (e.g. not supported raster format etc.)
+ */
+ PDImageXObject encode() throws IOException
+ {
+ Raster imageRaster = image.getRaster();
+ final int elementsInRowPerPixel;
+
+ // This variable store a row of the image each, the exact type depends
+ // on the image encoding. Can be a int[], short[] or byte[]
+ Object prevRow, transferRow;
+
+ switch (imageType)
+ {
+ case BufferedImage.TYPE_CUSTOM:
+ {
+ switch (imageRaster.getTransferType())
+ {
+ case DataBuffer.TYPE_USHORT:
+ elementsInRowPerPixel = componentsPerPixel;
+ prevRow = new short[width * elementsInRowPerPixel];
+ transferRow = new short[width * elementsInRowPerPixel];
+ break;
+ case DataBuffer.TYPE_BYTE:
+ elementsInRowPerPixel = componentsPerPixel;
+ prevRow = new byte[width * elementsInRowPerPixel];
+ transferRow = new byte[width * elementsInRowPerPixel];
+ break;
+ default:
+ return null;
+ }
+ break;
+ }
+
+ case BufferedImage.TYPE_3BYTE_BGR:
+ case BufferedImage.TYPE_4BYTE_ABGR:
+ {
+ elementsInRowPerPixel = componentsPerPixel;
+ prevRow = new byte[width * elementsInRowPerPixel];
+ transferRow = new byte[width * elementsInRowPerPixel];
+ break;
+ }
+
+ case BufferedImage.TYPE_INT_BGR:
+ case BufferedImage.TYPE_INT_ARGB:
+ case BufferedImage.TYPE_INT_RGB:
+ {
+ elementsInRowPerPixel = 1;
+ prevRow = new int[width * elementsInRowPerPixel];
+ transferRow = new int[width * elementsInRowPerPixel];
+ break;
+ }
+
+ default:
+ // We can not handle this unknown format
+ return null;
+ }
+
+ final int elementsInTransferRow = width * elementsInRowPerPixel;
+
+ // pre-size the output stream to half of the maximum size
+ ByteArrayOutputStream stream = new ByteArrayOutputStream(
+ height * width * bytesPerPixel / 2);
+ Deflater deflater = new Deflater(Filter.getCompressionLevel());
+ DeflaterOutputStream zip = new DeflaterOutputStream(stream, deflater);
+
+ int alphaPtr = 0;
+
+ for (int rowNum = 0; rowNum < height; rowNum++)
+ {
+ imageRaster.getDataElements(0, rowNum, width, 1, transferRow);
+
+ // We start to write at index one, as the predictor marker is in index zero
+ int writerPtr = 1;
+ Arrays.fill(aValues, (byte) 0);
+ Arrays.fill(cValues, (byte) 0);
+
+ final byte[] transferRowByte;
+ final byte[] prevRowByte;
+ final int[] transferRowInt;
+ final int[] prevRowInt;
+ final short[] transferRowShort;
+ final short[] prevRowShort;
+
+ if (transferRow instanceof byte[])
+ {
+ transferRowByte = (byte[]) transferRow;
+ prevRowByte = (byte[]) prevRow;
+ transferRowInt = prevRowInt = null;
+ transferRowShort = prevRowShort = null;
+ }
+ else if (transferRow instanceof int[])
+ {
+ transferRowInt = (int[]) transferRow;
+ prevRowInt = (int[]) prevRow;
+ transferRowShort = prevRowShort = null;
+ transferRowByte = prevRowByte = null;
+ }
+ else
+ {
+ // This must be short[]
+ transferRowShort = (short[]) transferRow;
+ prevRowShort = (short[]) prevRow;
+ transferRowInt = prevRowInt = null;
+ transferRowByte = prevRowByte = null;
+ }
+
+ for (int indexInTransferRow = 0; indexInTransferRow < elementsInTransferRow;
+ indexInTransferRow += elementsInRowPerPixel, alphaPtr += bytesPerComponent)
+ {
+ // Copy the pixel values into the byte array
+ if (transferRowByte != null)
+ {
+ copyImageBytes(transferRowByte, indexInTransferRow, xValues, alphaImageData,
+ alphaPtr);
+ copyImageBytes(prevRowByte, indexInTransferRow, bValues, null, 0);
+ }
+ else if (transferRowInt != null)
+ {
+ copyIntToBytes(transferRowInt, indexInTransferRow, xValues, alphaImageData,
+ alphaPtr);
+ copyIntToBytes(prevRowInt, indexInTransferRow, bValues, null, 0);
+ }
+ else
+ {
+ // This must be short[]
+ copyShortsToBytes(transferRowShort, indexInTransferRow, xValues, alphaImageData, alphaPtr);
+ copyShortsToBytes(prevRowShort, indexInTransferRow, bValues, null, 0);
+ }
+
+ // Encode the pixel values in the different encodings
+ int length = xValues.length;
+ for (int bytePtr = 0; bytePtr < length; bytePtr++)
+ {
+ int x = xValues[bytePtr] & 0xFF;
+ int a = aValues[bytePtr] & 0xFF;
+ int b = bValues[bytePtr] & 0xFF;
+ int c = cValues[bytePtr] & 0xFF;
+ dataRawRowNone[writerPtr] = (byte) x;
+ dataRawRowSub[writerPtr] = pngFilterSub(x, a);
+ dataRawRowUp[writerPtr] = pngFilterUp(x, b);
+ dataRawRowAverage[writerPtr] = pngFilterAverage(x, a, b);
+ dataRawRowPaeth[writerPtr] = pngFilterPaeth(x, a, b, c);
+ writerPtr++;
+ }
+
+ // We shift the values into the prev / upper left values for the next pixel
+ System.arraycopy(xValues, 0, aValues, 0, bytesPerPixel);
+ System.arraycopy(bValues, 0, cValues, 0, bytesPerPixel);
+ }
+
+ byte[] rowToWrite = chooseDataRowToWrite();
+ // Write and compress the row as long it is hot (CPU cache wise)
+ zip.write(rowToWrite, 0, rowToWrite.length);
+
+ {
+ // We swap prev and transfer row, so that we have the prev row for the next row.
+ Object temp = prevRow;
+ prevRow = transferRow;
+ transferRow = temp;
+ }
+ }
+ zip.close();
+ deflater.end();
+
+ return preparePredictorPDImage(stream, bytesPerComponent * 8);
+ }
+
+ private void copyIntToBytes(int[] transferRow, int indexInTranferRow, byte[] targetValues,
+ byte[] alphaImageData, int alphaPtr)
+ {
+ int val = transferRow[indexInTranferRow];
+ byte b0 = (byte) ((val & 0xFF));
+ byte b1 = (byte) ((val >> 8) & 0xFF);
+ byte b2 = (byte) ((val >> 16) & 0xFF);
+
+ switch (imageType)
+ {
+ case BufferedImage.TYPE_INT_BGR:
+ {
+ targetValues[0] = b0;
+ targetValues[1] = b1;
+ targetValues[2] = b2;
+ break;
+ }
+ case BufferedImage.TYPE_INT_ARGB:
+ {
+ targetValues[0] = b2;
+ targetValues[1] = b1;
+ targetValues[2] = b0;
+ if (alphaImageData != null)
+ {
+ byte b3 = (byte) ((val >> 24) & 0xFF);
+ alphaImageData[alphaPtr] = b3;
+ }
+ break;
+ }
+ case BufferedImage.TYPE_INT_RGB:
+ targetValues[0] = b2;
+ targetValues[1] = b1;
+ targetValues[2] = b0;
+ break;
+ }
+ }
+
+ private void copyImageBytes(byte[] transferRow, int indexInTranferRow, byte[] targetValues,
+ byte[] alphaImageData, int alphaPtr)
+ {
+ System.arraycopy(transferRow, indexInTranferRow, targetValues, 0, targetValues.length);
+ if (alphaImageData != null)
+ {
+ alphaImageData[alphaPtr] = transferRow[indexInTranferRow + targetValues.length];
+ }
+ }
+
+ private static void copyShortsToBytes(short[] transferRow, int indexInTranferRow,
+ byte[] targetValues, byte[] alphaImageData, int alphaPtr)
+ {
+ for (int i = 0; i < targetValues.length;)
+ {
+ short val = transferRow[indexInTranferRow++];
+ targetValues[i++] = (byte) ((val >> 8) & 0xFF);
+ targetValues[i++] = (byte) (val & 0xFF);
+ }
+ if (alphaImageData != null)
+ {
+ short alpha = transferRow[indexInTranferRow];
+ alphaImageData[alphaPtr] = (byte) ((alpha >> 8) & 0xFF);
+ alphaImageData[alphaPtr + 1] = (byte) (alpha & 0xFF);
+ }
+ }
+
+ private PDImageXObject preparePredictorPDImage(ByteArrayOutputStream stream,
+ int bitsPerComponent) throws IOException
+ {
+ int height = image.getHeight();
+ int width = image.getWidth();
+
+ ColorSpace srcCspace = image.getColorModel().getColorSpace();
+ PDColorSpace pdColorSpace = srcCspace.getType() != ColorSpace.TYPE_CMYK
+ ? PDDeviceRGB.INSTANCE : PDDeviceCMYK.INSTANCE;
+
+ // Encode the image profile if the image has one
+ if (srcCspace instanceof ICC_ColorSpace)
+ {
+ ICC_ColorSpace icc_colorSpace = (ICC_ColorSpace) srcCspace;
+ ICC_Profile profile = icc_colorSpace.getProfile();
+ // We only encode a color profile if it is not sRGB
+ if (profile != ICC_Profile.getInstance(ColorSpace.CS_sRGB))
+ {
+ PDICCBased pdProfile = new PDICCBased(document);
+ OutputStream outputStream = pdProfile.getPDStream()
+ .createOutputStream(COSName.FLATE_DECODE);
+ outputStream.write(profile.getData());
+ outputStream.close();
+ pdProfile.getPDStream().getCOSObject().setInt(COSName.N,
+ srcCspace.getNumComponents());
+ }
+ }
+
+ PDImageXObject imageXObject = new PDImageXObject(document,
+ new ByteArrayInputStream(stream.toByteArray()), COSName.FLATE_DECODE, width,
+ height, bitsPerComponent, pdColorSpace);
+
+ COSDictionary decodeParms = new COSDictionary();
+ decodeParms.setItem(COSName.BITS_PER_COMPONENT, COSInteger.get(bitsPerComponent));
+ decodeParms.setItem(COSName.PREDICTOR, COSInteger.get(15));
+ decodeParms.setItem(COSName.COLUMNS, COSInteger.get(width));
+ decodeParms.setItem(COSName.COLORS, COSInteger.get(srcCspace.getNumComponents()));
+ imageXObject.getCOSObject().setItem(COSName.DECODE_PARMS, decodeParms);
+
+ if (image.getTransparency() != Transparency.OPAQUE)
+ {
+ PDImageXObject pdMask = prepareImageXObject(document, alphaImageData,
+ image.getWidth(), image.getHeight(), 8 * bytesPerComponent, PDDeviceGray.INSTANCE);
+ imageXObject.getCOSObject().setItem(COSName.SMASK, pdMask);
+ }
+ return imageXObject;
+ }
+
+
+ /**
+ * We look which row encoding is the "best" one, ie. has the lowest sum. We don't implement anything fancier to choose
+ * the right row encoding. This is just the recommend algorithm in the spec. The get the perfect encoding you would need
+ * to do a brute force check how all the different encoded rows compress in the zip stream together. You have would have
+ * to check 5*image-height permutations...
+ *
+ * @return the "best" row encoding of the row encodings
+ */
+ private byte[] chooseDataRowToWrite()
+ {
+ byte[] rowToWrite = dataRawRowNone;
+ long estCompressSum = estCompressSum(dataRawRowNone);
+ long estCompressSumSub = estCompressSum(dataRawRowSub);
+ long estCompressSumUp = estCompressSum(dataRawRowUp);
+ long estCompressSumAvg = estCompressSum(dataRawRowAverage);
+ long estCompressSumPaeth = estCompressSum(dataRawRowPaeth);
+ if (estCompressSum > estCompressSumSub)
+ {
+ rowToWrite = dataRawRowSub;
+ estCompressSum = estCompressSumSub;
+ }
+ if (estCompressSum > estCompressSumUp)
+ {
+ rowToWrite = dataRawRowUp;
+ estCompressSum = estCompressSumUp;
+ }
+ if (estCompressSum > estCompressSumAvg)
+ {
+ rowToWrite = dataRawRowAverage;
+ estCompressSum = estCompressSumAvg;
+ }
+ if (estCompressSum > estCompressSumPaeth)
+ {
+ rowToWrite = dataRawRowPaeth;
+ }
+ return rowToWrite;
+ }
+
+ /*
+ * PNG Filters, see https://www.w3.org/TR/PNG-Filters.html
+ */
+ private static byte pngFilterSub(int x, int a )
+ {
+ return (byte) ((x & 0xFF) - (a & 0xFF));
+ }
+
+ private static byte pngFilterUp(int x, int b)
+ {
+ // Same as pngFilterSub, just called with the prior row
+ return pngFilterSub(x, b);
+ }
+
+ private static byte pngFilterAverage(int x, int a, int b)
+ {
+ return (byte)(x - ((b + a) / 2));
+ }
+
+ private static byte pngFilterPaeth(int x, int a, int b, int c)
+ {
+ int p = a + b - c;
+ int pa = Math.abs(p - a);
+ int pb = Math.abs(p - b);
+ int pc = Math.abs(p - c);
+ final int Pr;
+ if (pa <= pb && pa <= pc)
+ Pr = a;
+ else if (pb <= pc)
+ Pr = b;
+ else
+ Pr = c;
+
+ int r = x - Pr;
+ return (byte) (r);
+ }
+
+ private static long estCompressSum(byte[] dataRawRowSub)
+ {
+ long sum = 0;
+ for (byte aDataRawRowSub : dataRawRowSub)
+ {
+ sum += aDataRawRowSub & 0xFF;
+ }
+ return sum;
+ }
+ }
}
diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LoadGovdocs.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LoadGovdocs.java
new file mode 100644
index 000000000..01f5d2504
--- /dev/null
+++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LoadGovdocs.java
@@ -0,0 +1,257 @@
+package org.apache.pdfbox.pdmodel.graphics.image;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.*;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+/**
+ * Load govdocs and use them to test the encoder. It will fetch the govdocs from the net. You can also pre-download them
+ * to speed up repeated tests. Just give the directory where you downloaded the zip-files as first command line argument
+ *
+ * @author Tilman Hausherr
+ */
+public class LoadGovdocs
+{
+ static Set suffixes = new HashSet(
+ Arrays.asList(ImageIO.getReaderFileSuffixes()));
+ static File outputDir = new File("/tmp/loadgovdocs_out");
+
+ public static void main(String[] args) throws IOException
+ {
+ System.out.println("supported suffixes: " + suffixes);
+ System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
+ outputDir.mkdirs();
+ if (args.length > 0)
+ {
+ String directory = args[0];
+ File[] files = new File(directory).listFiles();
+ if (files != null)
+ {
+ for (File file : files)
+ {
+ if (file.getName().endsWith(".zip"))
+ {
+ FileInputStream inputStream = new FileInputStream(file);
+ try
+ {
+ System.out.println("Processing " + file.getName());
+ processZipStream(inputStream);
+ }
+ finally
+ {
+ inputStream.close();
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int zipNum = 0; zipNum <= 1000; ++zipNum)
+ {
+ String urlStr = String.format(
+ "http://downloads.digitalcorpora.org/corpora/files/govdocs1/zipfiles/%03d.zip",
+ zipNum);
+ String zipName = urlStr.substring(urlStr.lastIndexOf('/') + 1);
+ processZipURL(urlStr, zipName);
+ new File(zipName).delete();
+ }
+ }
+ }
+
+ private static void loadZip(String urlStr, String zipName) throws IOException
+ {
+ System.out.println(urlStr);
+ URL url = new URL(urlStr);
+ InputStream is = url.openStream();
+ FileOutputStream output = new FileOutputStream(zipName);
+ try
+ {
+ int len;
+ byte[] buffer = new byte[1024];
+ while ((len = is.read(buffer)) > 0)
+ {
+ output.write(buffer, 0, len);
+ }
+ }
+ finally
+ {
+ output.close();
+ }
+ }
+
+ private static void processZipURL(String urlStr, String zipName) throws IOException
+ {
+ // loadZip(urlStr, zipName);
+ //
+ // InputStream is = new FileInputStream(zipName);
+
+ InputStream is = new URL(urlStr).openStream();
+ try
+ {
+ processZipStream(is);
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ private static void processZipStream(InputStream is) throws IOException
+ {
+ try
+ {
+ ZipInputStream zip = new ZipInputStream(new BufferedInputStream(is));
+ try
+ {
+ ZipEntry ze;
+ while ((ze = zip.getNextEntry()) != null)
+ {
+ if (ze.isDirectory())
+ {
+ continue;
+ }
+ String suffix = ze.getName().replaceFirst(".*\\.", ""); // works with a.b.c.d.png because regexp ist greedy!
+ if (suffixes.contains(suffix))
+ {
+ BufferedImage bim1;
+ String fileName = ze.getName();
+ fileName = fileName.substring(fileName.lastIndexOf('/') + 1);
+ byte[] originalImageBytes;
+ try
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try
+ {
+ int len;
+ byte[] buffer = new byte[1024];
+ while ((len = zip.read(buffer)) > 0)
+ {
+ baos.write(buffer, 0, len);
+ }
+ originalImageBytes = baos.toByteArray();
+ bim1 = ImageIO.read(new ByteArrayInputStream(originalImageBytes));
+ }
+ finally
+ {
+ baos.close();
+ }
+ }
+ catch (Throwable ex)
+ {
+ System.err.println(ze.getName() + " bad, skipped");
+ // ex.printStackTrace();
+ continue;
+ }
+
+ // now create PDFBox image and compare
+ PDDocument doc = new PDDocument();
+ try
+ {
+ // create
+ PDImageXObject imgXObject = LosslessFactory.createFromImage(doc, bim1);
+ System.out.println(imgXObject.getCOSObject());
+ if (imgXObject.getBitsPerComponent() != 8)
+ {
+ System.out.println("bpc: " + imgXObject.getBitsPerComponent());
+ }
+ BufferedImage bim2 = imgXObject.getImage();
+
+ // compare
+ boolean good = isEqual(ze, bim1, bim2);
+
+ if (!good)
+ {
+ LosslessFactory.usePredictorEncoder = false;
+ imgXObject = LosslessFactory.createFromImage(doc, bim1);
+ boolean isOldEncoderEqual = isEqual(ze, bim1,
+ imgXObject.getImage());
+ if (!isOldEncoderEqual)
+ {
+ // Old encoder also had a color mismatch
+ good = true;
+ }
+ LosslessFactory.usePredictorEncoder = true;
+ }
+ if (!good)
+ {
+ System.err.println(ze.getName() + ": images not equal");
+
+ FileOutputStream outputStream = new FileOutputStream(
+ new File(outputDir, "org-" + fileName));
+ try
+ {
+ outputStream.write(originalImageBytes);
+ }
+ finally
+ {
+ outputStream.close();
+ }
+ ImageIO.write(bim1, "png",
+ new File(outputDir, "src-" + fileName + ".png"));
+ ImageIO.write(bim1, "png",
+ new File(outputDir, "dst-" + fileName + ".png"));
+ System.err.println(ze.getName() + " error");
+ }
+ else
+ {
+ System.out.println(ze.getName() + " ok");
+ }
+ }
+ finally
+ {
+ doc.close();
+ }
+ }
+ }
+ }
+ finally
+ {
+ zip.close();
+ }
+ }
+ catch (EOFException ex)
+ {
+ // EOF, maybe network error. Skip.
+ ex.printStackTrace();
+ }
+ }
+
+ private static boolean isEqual(ZipEntry ze, BufferedImage bim1, BufferedImage bim2)
+ {
+ if (bim1.getWidth() != bim2.getWidth() || bim1.getHeight() != bim2.getHeight())
+ {
+ System.err.println(ze.getName() + ": sizes not equal");
+ }
+ boolean good = true;
+ for (int y = 0; y < bim1.getHeight() && good; ++y)
+ {
+ for (int x = 0; x < bim1.getWidth(); ++x)
+ {
+ int rgb1 = bim1.getRGB(x, y);
+ int rgb2 = bim2.getRGB(x, y);
+ if (rgb1 != rgb2
+ // don't bother about small differences
+ && (Math.abs((rgb1 & 0xFF) - (rgb2 & 0xFF)) > 1
+ || Math.abs(((rgb1 >> 8) & 0xFF) - ((rgb2 >> 8) & 0xFF)) > 1
+ || Math.abs(((rgb1 >> 16) & 0xFF) - ((rgb2 >> 16) & 0xFF)) > 1))
+ {
+ System.err.println(ze.getName() + ": "
+ + String.format("(%d,%d) %08X != %08X", x, y, rgb1, rgb2));
+ good = false;
+ break;
+ }
+ }
+ }
+ return good;
+ }
+
+}
diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryBenchmark.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryBenchmark.java
new file mode 100644
index 000000000..75982d928
--- /dev/null
+++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryBenchmark.java
@@ -0,0 +1,134 @@
+package org.apache.pdfbox.pdmodel.graphics.image;
+
+import org.apache.pdfbox.filter.Filter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import javax.imageio.ImageIO;
+import java.awt.*;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 4)
+@Threads(1)
+public class LosslessFactoryBenchmark
+{
+ private BufferedImage imgSmall;
+ private PDDocument doc;
+ private BufferedImage imgBig;
+ private BufferedImage imgBigBytes;
+
+ @Setup
+ public void setupBenchmark() throws IOException
+ {
+ System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
+ imgSmall = ImageIO.read(LosslessFactory.class.getResourceAsStream("png.png"));
+ imgBig = new BufferedImage(imgSmall.getWidth() * 10, imgSmall.getHeight() * 10,
+ BufferedImage.TYPE_INT_RGB);
+ imgBigBytes = new BufferedImage(imgSmall.getWidth() * 10, imgSmall.getHeight() * 10,
+ BufferedImage.TYPE_3BYTE_BGR);
+
+ Graphics2D graphics = imgBig.createGraphics();
+ graphics.drawImage(imgSmall, 0, 0, imgBig.getWidth(), imgBig.getWidth(), null);
+ graphics.dispose();
+
+ graphics = imgBigBytes.createGraphics();
+ graphics.drawImage(imgSmall, 0, 0, imgBigBytes.getWidth(), imgBigBytes.getWidth(), null);
+ graphics.dispose();
+
+ doc = new PDDocument();
+ }
+
+ @SuppressWarnings("WeakerAccess")
+ @Param({ "3", "6", "9" })
+ public String zipLevel;
+
+ @Benchmark()
+ public PDImageXObject rgbOnly() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = false;
+ return LosslessFactory.createFromImage(doc, imgSmall);
+ }
+
+ @Benchmark
+ public PDImageXObject predictor() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = true;
+ return LosslessFactory.createFromImage(doc, imgSmall);
+ }
+
+ @Benchmark()
+ public PDImageXObject rgbOnlyBig() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = false;
+ return LosslessFactory.createFromImage(doc, imgBig);
+ }
+
+ @Benchmark
+ public PDImageXObject predictorBig() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = true;
+ return LosslessFactory.createFromImage(doc, imgBig);
+ }
+
+ @Benchmark()
+ public PDImageXObject rgbOnlyBigBytes() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = false;
+ return LosslessFactory.createFromImage(doc, imgBigBytes);
+ }
+
+ @Benchmark
+ public PDImageXObject predictorBigBytes() throws IOException
+ {
+ System.setProperty(Filter.SYSPROP_DEFLATELEVEL, zipLevel);
+ LosslessFactory.usePredictorEncoder = true;
+ return LosslessFactory.createFromImage(doc, imgBigBytes);
+ }
+
+ private static boolean DO_PROFILE_LOOP = false;
+
+ public static void main(String[] args) throws RunnerException, IOException
+ {
+ if (DO_PROFILE_LOOP)
+ {
+ LosslessFactoryBenchmark benchmark = new LosslessFactoryBenchmark();
+ benchmark.setupBenchmark();
+ benchmark.zipLevel = "3";
+ PDImageXObject last = null;
+
+ benchmark.zipLevel = "9";
+ PDImageXObject tst = benchmark.predictorBig();
+ PDImageXObject tst2 = benchmark.rgbOnlyBig();
+
+
+ System.out.println(" " + tst.getStream().getLength() + " vs. " + tst2.getStream().getLength());
+
+ for (int i = 0; i < 1000000; i++)
+ {
+ PDImageXObject now = benchmark.predictorBig();
+ // Those lines are to keep the VM from optimizing the whole call out...
+ if (last == now)
+ throw new IllegalStateException();
+ last = now;
+ }
+ }
+
+ Options opt = new OptionsBuilder()
+ .include(".*" + LosslessFactoryBenchmark.class.getSimpleName() + ".*").forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java
index 6152dd432..e08e8ab6f 100644
--- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java
+++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java
@@ -15,29 +15,29 @@
*/
package org.apache.pdfbox.pdmodel.graphics.image;
-import java.awt.Color;
-import java.awt.Graphics;
-import java.awt.Graphics2D;
-import java.awt.GraphicsConfiguration;
-import java.awt.Transparency;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import java.io.IOException;
-import java.util.Random;
-import javax.imageio.ImageIO;
import junit.framework.TestCase;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
-import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.checkIdent;
-import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.colorCount;
-import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.doWritePDF;
-import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.validate;
import org.apache.pdfbox.rendering.PDFRenderer;
+import javax.imageio.ImageIO;
+import java.awt.*;
+import java.awt.color.ColorSpace;
+import java.awt.color.ICC_ColorSpace;
+import java.awt.color.ICC_Profile;
+import java.awt.image.*;
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Random;
+
+import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.*;
+
/**
* Unit tests for LosslessFactory
*
@@ -169,7 +169,7 @@ public class LosslessFactoryTest extends TestCase
*/
public void testCreateLosslessFromImageBITMASK4BYTE_ABGR() throws IOException
{
- doBitmaskTransparencyTest(BufferedImage.TYPE_INT_ARGB, "bitmask4babgr.pdf");
+ doBitmaskTransparencyTest(BufferedImage.TYPE_4BYTE_ABGR, "bitmask4babgr.pdf");
}
/**
@@ -399,4 +399,133 @@ public class LosslessFactoryTest extends TestCase
document.close();
}
+ /**
+ * Test lossless encoding of CMYK images
+ */
+ public void testCreateLosslessFromImageCMYK() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png"));
+
+ final ColorSpace targetCS = new ICC_ColorSpace(ICC_Profile
+ .getInstance(this.getClass().getResourceAsStream("/org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc")));
+ ColorConvertOp op = new ColorConvertOp(image.getColorModel().getColorSpace(), targetCS, null);
+ BufferedImage imageCMYK = op.filter(image, null);
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, imageCMYK);
+ validate(ximage, 8, imageCMYK.getWidth(), imageCMYK.getHeight(), "png", PDDeviceCMYK.INSTANCE.getName());
+
+ doWritePDF(document, ximage, testResultsDir, "cmyk.pdf");
+
+ // The image in CMYK got color-truncated because the ISO_Coated colorspace is way smaller
+ // than the sRGB colorspace. The image is converted back to sRGB when calling PDImageXObject.getImage().
+ // So to be able to check the image data we must also convert our CMYK Image back to sRGB
+ //BufferedImage compareImageRGB = new BufferedImage(imageCMYK.getWidth(), imageCMYK.getHeight(),
+ //BufferedImage.TYPE_INT_BGR);
+ //Graphics2D graphics = compareImageRGB.createGraphics();
+ //graphics.drawImage(imageCMYK, 0, 0, null);
+ //graphics.dispose();
+ //ImageIO.write(compareImageRGB, "TIFF", new File("/tmp/compare.tiff"));
+ //ImageIO.write(ximage.getImage(), "TIFF", new File("/tmp/compare2.tiff"));
+ //checkIdent(compareImageRGB, ximage.getImage());
+ }
+
+ public void testCreateLosslessFrom16Bit() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png"));
+
+ ColorSpace targetCS = ColorSpace.getInstance(ColorSpace.CS_sRGB);
+ int dataBufferType = DataBuffer.TYPE_USHORT;
+ final ColorModel colorModel = new ComponentColorModel(targetCS, false, false,
+ ColorModel.OPAQUE, dataBufferType);
+ WritableRaster targetRaster = Raster.createInterleavedRaster(dataBufferType, image.getWidth(), image.getHeight(),
+ targetCS.getNumComponents(), new Point(0, 0));
+ BufferedImage img16Bit = new BufferedImage(colorModel, targetRaster, false, new Hashtable());
+ ColorConvertOp op = new ColorConvertOp(image.getColorModel().getColorSpace(), targetCS, null);
+ op.filter(image, img16Bit);
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, img16Bit);
+ validate(ximage, 16, img16Bit.getWidth(), img16Bit.getHeight(), "png", PDDeviceRGB.INSTANCE.getName());
+ checkIdent(image, ximage.getImage());
+ doWritePDF(document, ximage, testResultsDir, "misc-16bit.pdf");
+ }
+
+ public void testCreateLosslessFromImageINT_BGR() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png"));
+
+ BufferedImage imgBgr = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_BGR);
+ Graphics2D graphics = imgBgr.createGraphics();
+ graphics.drawImage(image, 0, 0, null);
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, imgBgr);
+ validate(ximage, 8, imgBgr.getWidth(), imgBgr.getHeight(), "png", PDDeviceRGB.INSTANCE.getName());
+ checkIdent(image, ximage.getImage());
+ }
+
+ public void testCreateLosslessFromImageINT_RGB() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png"));
+
+ BufferedImage imgRgb = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB);
+ Graphics2D graphics = imgRgb.createGraphics();
+ graphics.drawImage(image, 0, 0, null);
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, imgRgb);
+ validate(ximage, 8, imgRgb.getWidth(), imgRgb.getHeight(), "png", PDDeviceRGB.INSTANCE.getName());
+ checkIdent(image, ximage.getImage());
+ }
+
+ public void testCreateLosslessFromImageBYTE_3BGR() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png"));
+
+ BufferedImage imgRgb = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_3BYTE_BGR);
+ Graphics2D graphics = imgRgb.createGraphics();
+ graphics.drawImage(image, 0, 0, null);
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, imgRgb);
+ validate(ximage, 8, imgRgb.getWidth(), imgRgb.getHeight(), "png", PDDeviceRGB.INSTANCE.getName());
+ checkIdent(image, ximage.getImage());
+ }
+
+ public void testCreateLosslessFromGovdocs_032_163() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("032163.jpg"));
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, image);
+ validate(ximage, 8, image.getWidth(), image.getHeight(), "png", PDDeviceRGB.INSTANCE.getName());
+ checkIdent(image, ximage.getImage());
+ }
+
+ public void testCreateLosslessFrom16BitPNG() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("16bit.png"));
+
+ assertEquals(64, image.getColorModel().getPixelSize());
+ assertEquals(Transparency.TRANSLUCENT, image.getColorModel().getTransparency());
+ assertEquals(4, image.getRaster().getNumDataElements());
+ assertEquals(java.awt.image.DataBuffer.TYPE_USHORT, image.getRaster().getDataBuffer().getDataType());
+
+ PDImageXObject ximage = LosslessFactory.createFromImage(document, image);
+
+ int w = image.getWidth();
+ int h = image.getHeight();
+ validate(ximage, 16, w, h, "png", PDDeviceRGB.INSTANCE.getName());
+ System.out.println(ximage.getImage());
+ checkIdent(image, ximage.getImage());
+ checkIdentRGB(image, ximage.getOpaqueImage());
+
+ assertNotNull(ximage.getSoftMask());
+ validate(ximage.getSoftMask(), 16, w, h, "png", PDDeviceGray.INSTANCE.getName());
+ assertEquals(35, colorCount(ximage.getSoftMask().getImage()));
+
+ doWritePDF(document, ximage, testResultsDir, "png16bit.pdf");
+ }
}
diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/032163.jpg b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/032163.jpg
new file mode 100644
index 000000000..e0da70415
Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/032163.jpg differ
diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/16bit.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/16bit.png
new file mode 100644
index 000000000..2b0777891
Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/16bit.png differ