/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.cos;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.io.PagedMultiRandomAccessFile;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
import org.apache.pdfbox.pdfparser.PDFXrefStreamParser;
import org.apache.pdfbox.persistence.util.COSObjectKey;
/**
* This is the in-memory representation of the PDF document. You need to call
* close() on this object when you are done using it!!
*
* @author Ben Litchfield
* @version $Revision: 1.28 $
*/
public class COSDocument extends COSBase {
/**
* Log instance.
*/
private static final Log log = LogFactory.getLog(COSDocument.class);
private float version;
/**
* Maps ObjectKeys to a COSObject. Note that references to these objects
* are also stored in COSDictionary objects that map a name to a specific object.
*/
private final Map objectPool = new HashMap();
/**
* Maps object and generation ids to object byte offsets.
*/
private final Map xrefTable = new HashMap();
/**
* Document trailer dictionary.
*/
private COSDictionary trailer;
/**
* This file will store the streams in order to conserve memory.
*/
private PagedMultiRandomAccessFile scratchFile = null;
private File tmpFile = null;
private String headerString = "%PDF-1.4";
private boolean warnMissingClose = true;
/**
* Constructor. Uses the java.io.tmpdir value to create a file
* to store the streams.
*
* @throws IOException If there is an error creating the tmp file.
*/
public COSDocument() throws IOException {
this(new File(System.getProperty("java.io.tmpdir")));
}
/**
* Constructor that will create a create a scratch file in the
* following directory.
*
* @param scratchDir The directory to store a scratch file.
*
* @throws IOException If there is an error creating the tmp file.
*/
public COSDocument(File scratchDir) throws IOException {
tmpFile = File.createTempFile("pdfbox", "tmp", scratchDir);
scratchFile = new PagedMultiRandomAccessFile(new RandomAccessFile(tmpFile, "rw"));
}
/**
* Constructor that will use the following random access file for storage
* of the PDF streams. The client of this method is responsible for deleting
* the storage if necessary that this file will write to. The close method
* will close the file though.
*
* @param file The random access file to use for storage.
*/
public COSDocument(RandomAccess file) {
// scratchFile = file;
throw new RuntimeException("Not yet implemented."); //$NON-NLS-1$
}
/**
* Returns a new scratch file.
*
* @return the newly created scratch file
*/
public RandomAccess getScratchFile() {
return scratchFile.getNewRandomAcess();
}
/**
* This will get the first dictionary object by type.
*
* @param type The type of the object.
*
* @return This will return an object with the specified type.
* @throws IOException If there is an error getting the object
*/
public COSObject getObjectByType(String type) throws IOException {
return getObjectByType(COSName.getPDFName(type));
}
/**
* This will get the first dictionary object by type.
*
* @param type The type of the object.
*
* @return This will return an object with the specified type.
* @throws IOException If there is an error getting the object
*/
public COSObject getObjectByType(COSName type) throws IOException {
for (COSObject object : objectPool.values()) {
COSBase realObject = object.getObject();
if (realObject instanceof COSDictionary) {
try {
COSDictionary dic = (COSDictionary) realObject;
COSName objectType = (COSName) dic.getItem(COSName.TYPE);
if (objectType != null && objectType.equals(type)) {
return object;
}
} catch (ClassCastException e) {
log.warn(e, e);
}
}
}
return null;
}
/**
* This will get all dictionary objects by type.
*
* @param type The type of the object.
*
* @return This will return an object with the specified type.
* @throws IOException If there is an error getting the object
*/
public List getObjectsByType(String type) throws IOException {
return getObjectsByType(COSName.getPDFName(type));
}
/**
* This will get a dictionary object by type.
*
* @param type The type of the object.
*
* @return This will return an object with the specified type.
* @throws IOException If there is an error getting the object
*/
public List getObjectsByType(COSName type) throws IOException {
List retval = new ArrayList();
for (COSObject object : objectPool.values()) {
COSBase realObject = object.getObject();
if (realObject instanceof COSDictionary) {
try {
COSDictionary dic = (COSDictionary) realObject;
COSName objectType = (COSName) dic.getItem(COSName.TYPE);
if (objectType != null && objectType.equals(type)) {
retval.add(object);
}
} catch (ClassCastException e) {
log.warn(e, e);
}
}
}
return retval;
}
/**
* This will print contents to stdout.
*/
public void print() {
for (COSObject object : objectPool.values()) {
System.out.println(object);
}
}
/**
* This will set the version of this PDF document.
*
* @param versionValue The version of the PDF document.
*/
public void setVersion(float versionValue) {
version = versionValue;
}
/**
* This will get the version of this PDF document.
*
* @return This documents version.
*/
public float getVersion() {
return version;
}
/**
* This will tell if this is an encrypted document.
*
* @return true If this document is encrypted.
*/
public boolean isEncrypted() {
boolean encrypted = false;
if (trailer != null) {
encrypted = trailer.getDictionaryObject(COSName.ENCRYPT) != null;
}
return encrypted;
}
/**
* This will get the encryption dictionary if the document is encrypted or null
* if the document is not encrypted.
*
* @return The encryption dictionary.
*/
public COSDictionary getEncryptionDictionary() {
return (COSDictionary) trailer.getDictionaryObject(COSName.ENCRYPT);
}
/**
* This will set the encryption dictionary, this should only be called when
* encypting the document.
*
* @param encDictionary The encryption dictionary.
*/
public void setEncryptionDictionary(COSDictionary encDictionary) {
trailer.setItem(COSName.ENCRYPT, encDictionary);
}
/**
* This will get the document ID.
*
* @return The document id.
*/
public COSArray getDocumentID() {
return (COSArray) getTrailer().getItem(COSName.ID);
}
/**
* This will set the document ID.
*
* @param id The document id.
*/
public void setDocumentID(COSArray id) {
getTrailer().setItem(COSName.ID, id);
}
/**
* This will get the document catalog.
*
* Maybe this should move to an object at PDFEdit level
*
* @return catalog is the root of all document activities
*
* @throws IOException If no catalog can be found.
*/
public COSObject getCatalog() throws IOException {
COSObject catalog = getObjectByType(COSName.CATALOG);
if (catalog == null) {
throw new IOException("Catalog cannot be found");
}
return catalog;
}
/**
* This will get a list of all available objects.
*
* @return A list of all objects.
*/
public List getObjects() {
return new ArrayList(objectPool.values());
}
/**
* This will get the document trailer.
*
* @return the document trailer dict
*/
public COSDictionary getTrailer() {
return trailer;
}
/**
* // MIT added, maybe this should not be supported as trailer is a persistence construct.
* This will set the document trailer.
*
* @param newTrailer the document trailer dictionary
*/
public void setTrailer(COSDictionary newTrailer) {
trailer = newTrailer;
}
/**
* visitor pattern double dispatch method.
*
* @param visitor The object to notify when visiting this object.
* @return any object, depending on the visitor implementation, or null
* @throws COSVisitorException If an error occurs while visiting this object.
*/
@Override
public Object accept(ICOSVisitor visitor) throws COSVisitorException {
return visitor.visitFromDocument(this);
}
/**
* This will close all storage and delete the tmp files.
*
* @throws IOException If there is an error close resources.
*/
public void close() throws IOException {
if (scratchFile != null) {
scratchFile.close();
scratchFile = null;
}
if (tmpFile != null) {
tmpFile.delete();
tmpFile = null;
}
}
/**
* Warn the user in the finalizer if he didn't close the PDF document. The method also
* closes the document just in case, to avoid abandoned temporary files. It's still a good
* idea for the user to close the PDF document at the earliest possible to conserve resources.
* @throws IOException if an error occurs while closing the temporary files
*/
@Override
protected void finalize() throws IOException {
if (this.warnMissingClose && (tmpFile != null || scratchFile != null)) {
Throwable t = new Throwable("Warning: You did not close the PDF Document");
t.printStackTrace();
}
close();
}
/**
* Controls whether this instance shall issue a warning if the PDF document wasn't closed
* properly through a call to the {@link #close()} method. If the PDF document is held in
* a cache governed by soft references it is impossible to reliably close the document
* before the warning is raised. By default, the warning is enabled.
* @param warn true enables the warning, false disables it.
*/
public void setWarnMissingClose(boolean warn) {
this.warnMissingClose = warn;
}
/**
* @return Returns the headerString.
*/
public String getHeaderString() {
return headerString;
}
/**
* @param header The headerString to set.
*/
public void setHeaderString(String header) {
headerString = header;
}
/**
* This method will search the list of objects for types of ObjStm. If it finds
* them then it will parse out all of the objects from the stream that is contains.
*
* @throws IOException If there is an error parsing the stream.
*/
public void dereferenceObjectStreams() throws IOException {
for (COSObject objStream : getObjectsByType("ObjStm")) {
COSStream stream = (COSStream) objStream.getObject();
PDFObjectStreamParser parser = new PDFObjectStreamParser(stream, this);
parser.parse();
for (COSObject next : parser.getObjects()) {
COSObjectKey key = new COSObjectKey(next);
COSObject obj = getObjectFromPool(key);
obj.setObject(next.getObject());
}
}
}
/**
* This will get an object from the pool.
*
* @param key The object key.
*
* @return The object in the pool or a new one if it has not been parsed yet.
*
* @throws IOException If there is an error getting the proxy object.
*/
public COSObject getObjectFromPool(COSObjectKey key) throws IOException {
COSObject obj = null;
if (key != null) {
obj = objectPool.get(key);
}
if (obj == null) {
// this was a forward reference, make "proxy" object
obj = new COSObject(null);
if (key != null) {
obj.setObjectNumber(COSInteger.get(key.getNumber()));
obj.setGenerationNumber(COSInteger.get(key.getGeneration()));
objectPool.put(key, obj);
}
}
return obj;
}
/**
* Used to populate the XRef HashMap. Will add an Xreftable entry
* that maps ObjectKeys to byte offsets in the file.
* @param objKey The objkey, with id and gen numbers
* @param offset The byte offset in this file
*/
public void setXRef(COSObjectKey objKey, int offset) {
xrefTable.put(objKey, offset);
}
/**
* Returns the xrefTable which is a mapping of ObjectKeys
* to byte offsets in the file.
* @return mapping of ObjectsKeys to byte offsets
*/
public Map getXrefTable() {
return xrefTable;
}
/**
* This method will search the list of objects for types of XRef and
* uses the parsed data to populate the trailer information as well as
* the xref Map.
*
* @throws IOException if there is an error parsing the stream
*/
public void parseXrefStreams() throws IOException {
COSDictionary trailerDict = new COSDictionary();
for (COSObject xrefStream : getObjectsByType("XRef")) {
COSStream stream = (COSStream) xrefStream.getObject();
trailerDict.addAll(stream);
PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this);
parser.parse();
}
setTrailer(trailerDict);
}
}