### Eclipse Workspace Patch 1.0
#P pdfbox-trunk
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (revision 909807)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (working copy)
@@ -16,10 +16,14 @@
*/
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Map;
+
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import org.apache.pdfbox.pdmodel.common.COSDictionaryMap;
+import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
/**
* A root of a structure tree.
@@ -27,18 +31,19 @@
* @author Ben Litchfield
* @version $Revision: 1.2 $
*/
-public class PDStructureTreeRoot implements COSObjectable
+public class PDStructureTreeRoot extends PDStructureNode
{
- private COSDictionary dictionary;
+ public static final String TYPE = "StructTreeRoot";
+
+
/**
* Default Constructor.
*
*/
public PDStructureTreeRoot()
{
- dictionary = new COSDictionary();
- dictionary.setName( COSName.TYPE, "StructTreeRoot" );
+ super(TYPE);
}
/**
@@ -48,26 +53,82 @@
*/
public PDStructureTreeRoot( COSDictionary dic )
{
- dictionary = dic;
+ super(dic);
}
+
/**
- * Convert this standard java object to a COS object.
- *
- * @return The cos object that matches this Java object.
+ * Returns the ID tree.
+ *
+ * @return the ID tree
*/
- public COSBase getCOSObject()
+ public PDNameTreeNode getIDTree()
{
- return dictionary;
+ COSDictionary idTreeDic = (COSDictionary) this.getCOSDictionary()
+ .getDictionaryObject("IDTree");
+ if (idTreeDic != null)
+ {
+ return new PDNameTreeNode(idTreeDic, PDStructureElement.class);
+ }
+ return null;
}
/**
- * Get the low level dictionary that this object wraps.
- *
- * @return The cos dictionary that matches this Java object.
+ * Sets the ID tree.
+ *
+ * @param idTree the ID tree
*/
- public COSDictionary getCOSDictionary()
+ public void setIDTree(PDNameTreeNode idTree)
{
- return dictionary;
+ this.getCOSDictionary().setItem("IDTree", idTree);
}
+
+ /**
+ * Returns the next key in the parent tree.
+ *
+ * @return the next key in the parent tree
+ */
+ public int getParentTreeNextKey()
+ {
+ return this.getCOSDictionary().getInt("ParentTreeNextKey");
+ }
+
+ /**
+ * Returns the role map.
+ *
+ * @return the role map
+ */
+ @SuppressWarnings("unchecked")
+ public Map getRoleMap()
+ {
+ COSBase rm = this.getCOSDictionary().getDictionaryObject("RoleMap");
+ if (rm instanceof COSDictionary)
+ {
+ try
+ {
+ return COSDictionaryMap.convertBasicTypesToMap((COSDictionary) rm);
+ }
+ catch (IOException e)
+ {
+ e.printStackTrace();
+ }
+ }
+ return new Hashtable();
+ }
+
+ /**
+ * Sets the role map.
+ *
+ * @param roleMap the role map
+ */
+ public void setRoleMap(Map roleMap)
+ {
+ COSDictionary rmDic = new COSDictionary();
+ for (String key : roleMap.keySet())
+ {
+ rmDic.setName(key, roleMap.get(key));
+ }
+ this.getCOSDictionary().setItem("RoleMap", rmDic);
+ }
+
}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html (revision 0)
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+The marked content package provides a mechanism for modeling marked-content
+sequences.
+
+
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java (revision 0)
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.pdmodel.common.COSArrayList;
+import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+
+/**
+ * A node in the structure tree.
+ *
+ * @author Koch
+ * @version $Revision: $
+ */
+public abstract class PDStructureNode implements COSObjectable
+{
+
+ /**
+ * Creates a node in the structure tree. Can be either a structure tree root,
+ * or a structure element.
+ *
+ * @param node the node dictionary
+ * @return the structure node
+ */
+ public static PDStructureNode create(COSDictionary node)
+ {
+ String type = node.getNameAsString(COSName.TYPE);
+ if ("StructTreeRoot".equals(type))
+ {
+ return new PDStructureTreeRoot(node);
+ }
+ if ((type == null) || "StructElem".equals(type))
+ {
+ return new PDStructureElement(node);
+ }
+ throw new IllegalArgumentException("Dictionary must not include a Type entry with a value that is neither StructTreeRoot nor StructElem.");
+ }
+
+
+ private COSDictionary dictionary;
+
+ protected COSDictionary getCOSDictionary()
+ {
+ return dictionary;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param type the type
+ */
+ protected PDStructureNode(String type)
+ {
+ this.dictionary = new COSDictionary();
+ this.dictionary.setName(COSName.TYPE, type);
+ }
+
+ /**
+ * Constructor for an existing structure node.
+ *
+ * @param dictionary The existing dictionary.
+ */
+ protected PDStructureNode(COSDictionary dictionary)
+ {
+ this.dictionary = dictionary;
+ }
+
+
+ public COSBase getCOSObject()
+ {
+ return this.dictionary;
+ }
+
+ /**
+ * Returns the type.
+ *
+ * @return the type
+ */
+ public String getType()
+ {
+ return this.getCOSDictionary().getNameAsString(COSName.TYPE);
+ }
+
+ /**
+ * Returns a list of objects for the kids (K).
+ *
+ * @return a list of objects for the kids
+ */
+ public List getKids()
+ {
+ List kidObjects = new ArrayList();
+ COSBase k = this.getCOSDictionary().getDictionaryObject("K");
+ if (k instanceof COSArray)
+ {
+ Iterator kids = ((COSArray) k).iterator();
+ while (kids.hasNext())
+ {
+ COSBase kid = kids.next();
+ Object kidObject = this.createObject(kid);
+ if (kidObject != null)
+ {
+ kidObjects.add(kidObject);
+ }
+ }
+ }
+ else
+ {
+ Object kidObject = this.createObject(k);
+ if (kidObject != null)
+ {
+ kidObjects.add(kidObject);
+ }
+ }
+ return kidObjects;
+ }
+
+ /**
+ * Sets the kids (K).
+ *
+ * @param kids the kids
+ */
+ public void setKids(List kids)
+ {
+ this.getCOSDictionary().setItem("K",
+ COSArrayList.converterToCOSArray(kids));
+ }
+
+ /**
+ * Appends a structure element kid.
+ *
+ * @param structureElement the structure element
+ */
+ public void appendKid(PDStructureElement structureElement)
+ {
+ this.appendObjectableKid(structureElement);
+ structureElement.setParent(this);
+ }
+
+ /**
+ * Appends an objectable kid.
+ *
+ * @param objectable the objectable
+ */
+ protected void appendObjectableKid(COSObjectable objectable)
+ {
+ if (objectable == null)
+ {
+ return;
+ }
+ this.appendKid(objectable.getCOSObject());
+ }
+
+ /**
+ * Appends a COS base kid.
+ *
+ * @param object the COS base
+ */
+ protected void appendKid(COSBase object)
+ {
+ if (object == null)
+ {
+ return;
+ }
+ COSBase k = this.getCOSDictionary().getDictionaryObject("K");
+ if (k == null)
+ {
+ // currently no kid: set new kid as kids
+ this.getCOSDictionary().setItem("K", object);
+ }
+ else if (k instanceof COSArray)
+ {
+ // currently more than one kid: add new kid to existing array
+ COSArray array = (COSArray) k;
+ array.add(object);
+ }
+ else
+ {
+ // currently one kid: put current and new kid into array and set array as kids
+ COSArray array = new COSArray();
+ array.add(k);
+ array.add(object);
+ this.getCOSDictionary().setItem("K", array);
+ }
+ }
+
+ /**
+ * Inserts a structure element kid before a reference kid.
+ *
+ * @param newKid the structure element
+ * @param refKid the reference kid
+ */
+ public void insertBefore(PDStructureElement newKid, Object refKid)
+ {
+ this.insertBefore((COSObjectable) newKid, refKid);
+ }
+
+ /**
+ * Inserts an objectable kid before a reference kid.
+ *
+ * @param newKid the objectable
+ * @param refKid the reference kid
+ */
+ protected void insertBefore(COSObjectable newKid, Object refKid)
+ {
+ if (newKid == null)
+ {
+ return;
+ }
+ this.insertBefore(newKid.getCOSObject(), refKid);
+ }
+
+ /**
+ * Inserts an COS base kid before a reference kid.
+ *
+ * @param newKid the COS base
+ * @param refKid the reference kid
+ */
+ protected void insertBefore(COSBase newKid, Object refKid)
+ {
+ if ((newKid == null) || (refKid == null))
+ {
+ return;
+ }
+ COSBase k = this.getCOSDictionary().getDictionaryObject("K");
+ if (k == null)
+ {
+ return;
+ }
+ COSBase refKidBase = null;
+ if (refKid instanceof COSObjectable)
+ {
+ refKidBase = ((COSObjectable) refKid).getCOSObject();
+ }
+ else if (refKid instanceof COSInteger)
+ {
+ refKidBase = (COSInteger) refKid;
+ }
+ if (k instanceof COSArray)
+ {
+ COSArray array = (COSArray) k;
+ int refIndex = array.indexOfObject(refKidBase);
+ array.add(refIndex, newKid.getCOSObject());
+ }
+ else
+ {
+ boolean onlyKid = k.equals(refKidBase);
+ if (!onlyKid && (k instanceof COSObject))
+ {
+ COSBase kObj = ((COSObject) k).getObject();
+ onlyKid = kObj.equals(refKidBase);
+ }
+ if (onlyKid)
+ {
+ COSArray array = new COSArray();
+ array.add(newKid);
+ array.add(refKidBase);
+ this.getCOSDictionary().setItem("K", array);
+ }
+ }
+ }
+
+ /**
+ * Removes a structure element kid.
+ *
+ * @param structureElement the structure element
+ * @return true
if the kid was removed, false
otherwise
+ */
+ public boolean removeKid(PDStructureElement structureElement)
+ {
+ boolean removed = this.removeObjectableKid(structureElement);
+ if (removed)
+ {
+ structureElement.setParent(null);
+ }
+ return removed;
+ }
+
+ /**
+ * Removes an objectable kid.
+ *
+ * @param objectable the objectable
+ * @return true
if the kid was removed, false
otherwise
+ */
+ protected boolean removeObjectableKid(COSObjectable objectable)
+ {
+ if (objectable == null)
+ {
+ return false;
+ }
+ return this.removeKid(objectable.getCOSObject());
+ }
+
+ /**
+ * Removes a COS base kid.
+ *
+ * @param object the COS base
+ * @return true
if the kid was removed, false
otherwise
+ */
+ protected boolean removeKid(COSBase object)
+ {
+ if (object == null)
+ {
+ return false;
+ }
+ COSBase k = this.getCOSDictionary().getDictionaryObject("K");
+ if (k == null)
+ {
+ // no kids: objectable is not a kid
+ return false;
+ }
+ else if (k instanceof COSArray)
+ {
+ // currently more than one kid: remove kid from existing array
+ COSArray array = (COSArray) k;
+ boolean removed = array.removeObject(object);
+ // if now only one kid: set remaining kid as kids
+ if (array.size() == 1)
+ {
+ this.getCOSDictionary().setItem("K", array.getObject(0));
+ }
+ return removed;
+ }
+ else
+ {
+ // currently one kid: if current kid equals given object, remove kids entry
+ boolean onlyKid = k.equals(object);
+ if (!onlyKid && (k instanceof COSObject))
+ {
+ COSBase kObj = ((COSObject) k).getObject();
+ onlyKid = kObj.equals(object);
+ }
+ if (onlyKid)
+ {
+ this.getCOSDictionary().setItem("K", null);
+ return true;
+ }
+ return false;
+ }
+ }
+
+ /**
+ * Creates an object for a kid of this structure node.
+ * The type of object depends on the type of the kid. It can be
+ *
+ * a {@link PDStructureElement},
+ * a {@link PDAnnotation},
+ * a {@link PDXObject},
+ * a {@link PDMarkedContentReference}
+ * a {@link Integer}
+ *
+ *
+ * @param kid the kid
+ * @return the object
+ */
+ protected Object createObject(COSBase kid)
+ {
+ COSDictionary kidDic = null;
+ if (kid instanceof COSDictionary)
+ {
+ kidDic = (COSDictionary) kid;
+ }
+ else if (kid instanceof COSObject)
+ {
+ COSBase base = ((COSObject) kid).getObject();
+ if (base instanceof COSDictionary)
+ {
+ kidDic = (COSDictionary) base;
+ }
+ }
+ if (kidDic != null)
+ {
+ String type = kidDic.getNameAsString("Type");
+ if ((type == null) || PDStructureElement.TYPE.equals(type))
+ {
+ // A structure element dictionary denoting another structure
+ // element
+ return new PDStructureElement(kidDic);
+ }
+ else if (PDObjectReference.TYPE.equals(type))
+ {
+ // An object reference dictionary denoting a PDF object
+ return new PDObjectReference(kidDic);
+ }
+ else if ("MCR".equals(type))
+ {
+ // A marked-content reference dictionary denoting a
+ // marked-content sequence
+ return new PDMarkedContentReference(kidDic);
+ }
+ }
+ else if (kid instanceof COSInteger)
+ {
+ // An integer marked-content identifier denoting a
+ // marked-content sequence
+ COSInteger mcid = (COSInteger) kid;
+ return mcid.intValue();
+ }
+ return null;
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java (revision 909807)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java (working copy)
@@ -16,10 +16,17 @@
*/
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
/**
* A structure element.
@@ -27,18 +34,22 @@
* @author Ben Litchfield
* @version $Revision: 1.3 $
*/
-public class PDStructureElement implements COSObjectable
+public class PDStructureElement extends PDStructureNode
{
- private COSDictionary dictionary;
+ public static final String TYPE = "StructElem";
+
/**
- * Default Constructor.
+ * Constructor with required values.
*
+ * @param structureType the structure type
+ * @param parent the parent structure node
*/
- public PDStructureElement()
+ public PDStructureElement(String structureType, PDStructureNode parent)
{
- dictionary = new COSDictionary();
- dictionary.setName( COSName.TYPE, "StructElem" );
+ super(TYPE);
+ this.setStructureType(structureType);
+ this.setParent(parent);
}
/**
@@ -48,26 +59,500 @@
*/
public PDStructureElement( COSDictionary dic )
{
- dictionary = dic;
+ super(dic);
}
+
/**
- * Convert this standard java object to a COS object.
- *
- * @return The cos object that matches this Java object.
+ * Returns the structure type (S).
+ *
+ * @return the structure type
*/
- public COSBase getCOSObject()
+ public String getStructureType()
{
- return dictionary;
+ return this.getCOSDictionary().getNameAsString("S");
}
/**
- * Get the low level dictionary that this object wraps.
- *
- * @return The cos dictionary that matches this Java object.
+ * Sets the structure type (S).
+ *
+ * @param structureType the structure type
*/
- public COSDictionary getCOSDictionary()
+ public void setStructureType(String structureType)
{
- return dictionary;
+ this.getCOSDictionary().setName("S", structureType);
}
+
+ /**
+ * Returns the parent in the structure hierarchy (P).
+ *
+ * @return the parent in the structure hierarchy
+ */
+ public PDStructureNode getParent()
+ {
+ COSDictionary p = (COSDictionary) this.getCOSDictionary()
+ .getDictionaryObject(COSName.P);
+ if (p == null)
+ {
+ return null;
+ }
+ return PDStructureNode.create((COSDictionary) p);
+ }
+
+ /**
+ * Sets the parent in the structure hierarchy (P).
+ *
+ * @param structureNode the parent in the structure hierarchy
+ */
+ public void setParent(PDStructureNode structureNode)
+ {
+ this.getCOSDictionary().setItem(COSName.P, structureNode);
+ }
+
+ /**
+ * Returns the element identifier (ID).
+ *
+ * @return the element identifier
+ */
+ public String getElementIdentifier()
+ {
+ return this.getCOSDictionary().getString("ID");
+ }
+
+ /**
+ * Sets the element identifier (ID).
+ *
+ * @param id the element identifier
+ */
+ public void setElementIdentifier(String id)
+ {
+ this.getCOSDictionary().setString("ID", id);
+ }
+
+ /**
+ * Returns the page on which some or all of the content items designated by
+ * the K entry shall be rendered (Pg).
+ *
+ * @return the page on which some or all of the content items designated by
+ * the K entry shall be rendered
+ */
+ public PDPage getPage()
+ {
+ COSDictionary pageDic = (COSDictionary) this.getCOSDictionary()
+ .getDictionaryObject("Pg");
+ if (pageDic == null)
+ {
+ return null;
+ }
+ return new PDPage(pageDic);
+ }
+
+ /**
+ * Sets the page on which some or all of the content items designated by
+ * the K entry shall be rendered (Pg).
+ * @param page the page on which some or all of the content items designated
+ * by the K entry shall be rendered.
+ */
+ public void setPage(PDPage page)
+ {
+ this.getCOSDictionary().setItem("Pg", page);
+ }
+
+ /**
+ * Returns the class names together with their revision numbers (C).
+ *
+ * @return the class names
+ */
+ public Revisions getClassNames()
+ {
+ String key = "C";
+ Revisions classNames = new Revisions();
+ COSBase c = this.getCOSDictionary().getDictionaryObject(key);
+ if (c instanceof COSName)
+ {
+ classNames.addObject(((COSName) c).getName(), 0);
+ }
+ if (c instanceof COSArray)
+ {
+ COSArray array = (COSArray) c;
+ Iterator it = array.iterator();
+ String className = null;
+ while (it.hasNext())
+ {
+ COSBase item = it.next();
+ if (item instanceof COSName)
+ {
+ className = ((COSName) item).getName();
+ classNames.addObject(className, 0);
+ }
+ else if (item instanceof COSInteger)
+ {
+ classNames.setRevisionNumber(className,
+ ((COSInteger) item).intValue());
+ }
+ }
+ }
+ return classNames;
+ }
+
+ /**
+ * Sets the class names together with their revision numbers (C).
+ *
+ * @param classNames the class names
+ */
+ public void setClassNames(Revisions classNames)
+ {
+ String key = "C";
+ if ((classNames.size() == 1) && (classNames.getRevisionNumber(0) == 0))
+ {
+ String className = classNames.getObject(0);
+ this.getCOSDictionary().setName(key, className);
+ return;
+ }
+ COSArray array = new COSArray();
+ for (int i = 0; i < classNames.size(); i++)
+ {
+ String className = classNames.getObject(i);
+ int revisionNumber = classNames.getRevisionNumber(i);
+ if (revisionNumber < 0)
+ {
+ // TODO throw Exception because revision number must be > -1?
+ }
+ array.add(COSName.getPDFName(className));
+ array.add(COSInteger.get(revisionNumber));
+ }
+ this.getCOSDictionary().setItem(key, array);
+ }
+
+ /**
+ * Adds a class name.
+ *
+ * @param className the class name
+ */
+ public void addClassName(String className)
+ {
+ String key = "C";
+ COSBase c = this.getCOSDictionary().getDictionaryObject(key);
+ COSArray array = null;
+ if (c instanceof COSArray)
+ {
+ array = (COSArray) c;
+ }
+ else
+ {
+ array = new COSArray();
+ if (c != null)
+ {
+ array.add(c);
+ array.add(COSInteger.get(0));
+ }
+ }
+ this.getCOSDictionary().setItem(key, array);
+ array.add(COSName.getPDFName(className));
+ array.add(COSInteger.get(this.getRevisionNumber()));
+ }
+
+ /**
+ * Removes a class name.
+ *
+ * @param className the class name
+ */
+ public void removeClassName(String className)
+ {
+ String key = "C";
+ COSBase c = this.getCOSDictionary().getDictionaryObject(key);
+ COSName name = COSName.getPDFName(className);
+ if (c instanceof COSArray)
+ {
+ COSArray array = (COSArray) c;
+ array.remove(name);
+ if ((array.size() == 2) && (array.getInt(1) == 0))
+ {
+ this.getCOSDictionary().setItem(key, array.getObject(0));
+ }
+ }
+ else
+ {
+ COSBase directC = c;
+ if (c instanceof COSObject)
+ {
+ directC = ((COSObject) c).getObject();
+ }
+ if (name.equals(directC))
+ {
+ this.getCOSDictionary().setItem(key, null);
+ }
+ }
+ }
+
+ /**
+ * Returns the revision number (R).
+ *
+ * @return the revision number
+ */
+ public int getRevisionNumber()
+ {
+ return this.getCOSDictionary().getInt(COSName.R, 0);
+ }
+
+ /**
+ * Sets the revision number (R).
+ *
+ * @param revisionNumber the revision number
+ */
+ public void setRevisionNumber(int revisionNumber)
+ {
+ this.getCOSDictionary().setInt(COSName.R, revisionNumber);
+ }
+
+ /**
+ * Returns the title (T).
+ *
+ * @return the title
+ */
+ public String getTitle()
+ {
+ return this.getCOSDictionary().getString("T");
+ }
+
+ /**
+ * Sets the title (T).
+ *
+ * @param title the title
+ */
+ public void setTitle(String title)
+ {
+ this.getCOSDictionary().setString("T", title);
+ }
+
+ /**
+ * Returns the language (Lang).
+ *
+ * @return the language
+ */
+ public String getLanguage()
+ {
+ return this.getCOSDictionary().getString("Lang");
+ }
+
+ /**
+ * Sets the language (Lang).
+ *
+ * @param language the language
+ */
+ public void setLanguage(String language)
+ {
+ this.getCOSDictionary().setString("Lang", language);
+ }
+
+ /**
+ * Returns the alternate description (Alt).
+ *
+ * @return the alternate description
+ */
+ public String getAlternateDescription()
+ {
+ return this.getCOSDictionary().getString("Alt");
+ }
+
+ /**
+ * Sets the alternate description (Alt).
+ *
+ * @param alternateDescription the alternate description
+ */
+ public void setAlternateDescription(String alternateDescription)
+ {
+ this.getCOSDictionary().setString("Alt", alternateDescription);
+ }
+
+ /**
+ * Returns the expanded form (E).
+ *
+ * @return the expanded form
+ */
+ public String getExpandedForm()
+ {
+ return this.getCOSDictionary().getString("E");
+ }
+
+ /**
+ * Sets the expanded form (E).
+ *
+ * @param expandedForm the expanded form
+ */
+ public void setExpandedForm(String expandedForm)
+ {
+ this.getCOSDictionary().setString("E", expandedForm);
+ }
+
+ /**
+ * Returns the actual text (ActualText).
+ *
+ * @return the actual text
+ */
+ public String getActualText()
+ {
+ return this.getCOSDictionary().getString("ActualText");
+ }
+
+ /**
+ * Sets the actual text (ActualText).
+ *
+ * @param actualText the actual text
+ */
+ public void setActualText(String actualText)
+ {
+ this.getCOSDictionary().setString("ActualText", actualText);
+ }
+
+ /**
+ * Returns the standard structure type, the actual structure type is mapped
+ * to in the role map.
+ *
+ * @return the standard structure type
+ */
+ public String getStandardStructureType()
+ {
+ String type = this.getStructureType();
+ String mappedType;
+ while (true)
+ {
+ mappedType = this.getRoleMap().get(type);
+ if ((mappedType == null) || type.equals(mappedType))
+ {
+ break;
+ }
+ type = mappedType;
+ }
+ return type;
+ }
+
+ /**
+ * Appends a marked-content sequence kid.
+ *
+ * @param markedContent the marked-content sequence
+ */
+ public void appendKid(PDMarkedContent markedContent)
+ {
+ this.appendKid(COSInteger.get(markedContent.getMCID()));
+ }
+
+ /**
+ * Appends a marked-content reference kid.
+ *
+ * @param markedContentReference the marked-content reference
+ */
+ public void appendKid(PDMarkedContentReference markedContentReference)
+ {
+ this.appendObjectableKid(markedContentReference);
+ }
+
+ /**
+ * Appends an object reference kid.
+ *
+ * @param objectReference the object reference
+ */
+ public void appendKid(PDObjectReference objectReference)
+ {
+ this.appendObjectableKid(objectReference);
+ }
+
+ /**
+ * Inserts a marked-content identifier kid before a reference kid.
+ *
+ * @param markedContentIdentifier the marked-content identifier
+ * @param refKid the reference kid
+ */
+ public void insertBefore(COSInteger markedContentIdentifier, Object refKid)
+ {
+ this.insertBefore(markedContentIdentifier, refKid);
+ }
+
+ /**
+ * Inserts a marked-content reference kid before a reference kid.
+ *
+ * @param markedContentReference the marked-content reference
+ * @param refKid the reference kid
+ */
+ public void insertBefore(PDMarkedContentReference markedContentReference, Object refKid)
+ {
+ this.insertBefore(markedContentReference, refKid);
+ }
+
+ /**
+ * Inserts an object reference kid before a reference kid.
+ *
+ * @param objectReference the object reference
+ * @param refKid the reference kid
+ */
+ public void insertBefore(PDObjectReference objectReference, Object refKid)
+ {
+ this.insertBefore(objectReference, refKid);
+ }
+
+ /**
+ * Removes a marked-content identifier kid.
+ *
+ * @param markedContentIdentifier the marked-content identifier
+ */
+ public void removeKid(COSInteger markedContentIdentifier)
+ {
+ this.removeKid((COSBase) markedContentIdentifier);
+ }
+
+ /**
+ * Removes a marked-content reference kid.
+ *
+ * @param markedContentReference the marked-content reference
+ */
+ public void removeKid(PDMarkedContentReference markedContentReference)
+ {
+ this.removeObjectableKid(markedContentReference);
+ }
+
+ /**
+ * Removes an object reference kid.
+ *
+ * @param objectReference the object reference
+ */
+ public void removeKid(PDObjectReference objectReference)
+ {
+ this.removeObjectableKid(objectReference);
+ }
+
+
+ /**
+ * Returns the structure tree root.
+ *
+ * @return the structure tree root
+ */
+ private PDStructureTreeRoot getStructureTreeRoot()
+ {
+ PDStructureNode parent = this.getParent();
+ while (parent instanceof PDStructureElement)
+ {
+ parent = ((PDStructureElement) parent).getParent();
+ }
+ if (parent instanceof PDStructureTreeRoot)
+ {
+ return (PDStructureTreeRoot) parent;
+ }
+ return null;
+ }
+
+ /**
+ * Returns the role map.
+ *
+ * @return the role map
+ */
+ private Map getRoleMap()
+ {
+ PDStructureTreeRoot root = this.getStructureTreeRoot();
+ if (root != null)
+ {
+ return root.getRoleMap();
+ }
+ return null;
+ }
+
}
Index: src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java
===================================================================
--- src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java (revision 0)
+++ src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java (revision 0)
@@ -0,0 +1,41 @@
+package org.apache.pdfbox.util.operator;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.util.PDFMarkedContentExtractor;
+import org.apache.pdfbox.util.PDFOperator;
+
+public class BeginMarkedContentSequenceWithProperties extends OperatorProcessor
+{
+
+ /**
+ * process : BDC : Begins a marked-content sequence with property list.
+ */
+ @Override
+ public void process(PDFOperator operator, List arguments)
+ throws IOException
+ {
+ COSName tag = null;
+ COSDictionary properties = null;
+ for (COSBase argument : arguments)
+ {
+ if (argument instanceof COSName)
+ {
+ tag = (COSName) argument;
+ }
+ else if (argument instanceof COSDictionary)
+ {
+ properties = (COSDictionary) argument;
+ }
+ }
+ if (this.context instanceof PDFMarkedContentExtractor)
+ {
+ ((PDFMarkedContentExtractor) this.context).beginMarkedContentSequence(tag, properties);
+ }
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java
===================================================================
--- src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java (revision 0)
+++ src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java (revision 0)
@@ -0,0 +1,35 @@
+package org.apache.pdfbox.util.operator;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.util.PDFMarkedContentExtractor;
+import org.apache.pdfbox.util.PDFOperator;
+
+public class BeginMarkedContentSequence extends OperatorProcessor
+{
+
+ /**
+ * process : BMC : Begins a marked-content sequence.
+ */
+ @Override
+ public void process(PDFOperator operator, List arguments)
+ throws IOException
+ {
+ COSName tag = null;
+ for (COSBase argument : arguments)
+ {
+ if (argument instanceof COSName)
+ {
+ tag = (COSName) argument;
+ }
+ }
+ if (this.context instanceof PDFMarkedContentExtractor)
+ {
+ ((PDFMarkedContentExtractor) this.context).beginMarkedContentSequence(tag, null);
+ }
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
===================================================================
--- src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (revision 0)
+++ src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (revision 0)
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Stack;
+
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
+
+public class PDFMarkedContentExtractor extends PDFStreamEngine
+{
+ private boolean suppressDuplicateOverlappingText = true;
+
+ protected List markedContents = new ArrayList();
+ private Stack currentMarkedContents = new Stack();
+
+ private Map> characterListMapping =
+ new HashMap>();
+
+ /**
+ * encoding that text will be written in (or null).
+ */
+ protected String outputEncoding;
+
+ /**
+ * The normalizer is used to remove text ligatures/presentation forms
+ * and to correct the direction of right to left text, such as Arabic and Hebrew.
+ */
+ private TextNormalize normalize = null;
+
+ /**
+ * Instantiate a new PDFTextStripper object. This object will load properties from
+ * Resources/PDFTextStripper.properties and will not do anything special to
+ * convert the text to a more encoding-specific output.
+ * @throws IOException If there is an error loading the properties.
+ */
+ public PDFMarkedContentExtractor() throws IOException
+ {
+ super( ResourceLoader.loadProperties( "Resources/PDFMarkedContentExtractor.properties", true ) );
+ this.outputEncoding = null;
+ this.normalize = new TextNormalize(this.outputEncoding);
+ }
+
+
+ /**
+ * Instantiate a new PDFTextStripper object. Loading all of the operator mappings
+ * from the properties object that is passed in. Does not convert the text
+ * to more encoding-specific output.
+ *
+ * @param props The properties containing the mapping of operators to PDFOperator
+ * classes.
+ *
+ * @throws IOException If there is an error reading the properties.
+ */
+ public PDFMarkedContentExtractor( Properties props ) throws IOException
+ {
+ super( props );
+ this.outputEncoding = null;
+ this.normalize = new TextNormalize(this.outputEncoding);
+ }
+ /**
+ * Instantiate a new PDFTextStripper object. This object will load properties from
+ * Resources/PDFTextStripper.properties and will apply encoding-specific
+ * conversions to the output text.
+ *
+ * @param encoding The encoding that the output will be written in.
+ *
+ * @throws IOException If there is an error reading the properties.
+ */
+ public PDFMarkedContentExtractor( String encoding ) throws IOException
+ {
+ super( ResourceLoader.loadProperties( "Resources/PDFMarkedContentExtractor.properties", true ));
+ this.outputEncoding = encoding;
+ this.normalize = new TextNormalize(this.outputEncoding);
+ }
+
+
+ /**
+ * This will determine of two floating point numbers are within a specified variance.
+ *
+ * @param first The first number to compare to.
+ * @param second The second number to compare to.
+ * @param variance The allowed variance.
+ */
+ private boolean within( float first, float second, float variance )
+ {
+ return second > first - variance && second < first + variance;
+ }
+
+
+ public void beginMarkedContentSequence(COSName tag, COSDictionary properties)
+ {
+ PDMarkedContent markedContent = new PDMarkedContent(tag, properties);
+ if (this.currentMarkedContents.isEmpty())
+ {
+ this.markedContents.add(markedContent);
+ }
+ else
+ {
+ PDMarkedContent currentMarkedContent =
+ this.currentMarkedContents.peek();
+ if (currentMarkedContent != null)
+ {
+ currentMarkedContent.addMarkedContent(markedContent);
+ }
+ }
+ this.currentMarkedContents.push(markedContent);
+ }
+
+ public void endMarkedContentSequence()
+ {
+ if (!this.currentMarkedContents.isEmpty())
+ {
+ this.currentMarkedContents.pop();
+ }
+ }
+
+ public void xobject(PDXObject xobject)
+ {
+ if (!this.currentMarkedContents.isEmpty())
+ {
+ this.currentMarkedContents.peek().addXObject(xobject);
+ }
+ }
+
+
+ /**
+ * This will process a TextPosition object and add the
+ * text to the list of characters on a page. It takes care of
+ * overlapping text.
+ *
+ * @param text The text to process.
+ */
+ protected void processTextPosition( TextPosition text )
+ {
+ boolean showCharacter = true;
+ if( this.suppressDuplicateOverlappingText )
+ {
+ showCharacter = false;
+ String textCharacter = text.getCharacter();
+ float textX = text.getX();
+ float textY = text.getY();
+ List sameTextCharacters = this.characterListMapping.get( textCharacter );
+ if( sameTextCharacters == null )
+ {
+ sameTextCharacters = new ArrayList();
+ this.characterListMapping.put( textCharacter, sameTextCharacters );
+ }
+
+ // RDD - Here we compute the value that represents the end of the rendered
+ // text. This value is used to determine whether subsequent text rendered
+ // on the same line overwrites the current text.
+ //
+ // We subtract any positive padding to handle cases where extreme amounts
+ // of padding are applied, then backed off (not sure why this is done, but there
+ // are cases where the padding is on the order of 10x the character width, and
+ // the TJ just backs up to compensate after each character). Also, we subtract
+ // an amount to allow for kerning (a percentage of the width of the last
+ // character).
+ //
+ boolean suppressCharacter = false;
+ float tolerance = (text.getWidth()/textCharacter.length())/3.0f;
+ for( int i=0; i textList = new ArrayList();
+
+ /* In the wild, some PDF encoded documents put diacritics (accents on
+ * top of characters) into a separate Tj element. When displaying them
+ * graphically, the two chunks get overlayed. With text output though,
+ * we need to do the overlay. This code recombines the diacritic with
+ * its associated character if the two are consecutive.
+ */
+ if(textList.isEmpty())
+ {
+ textList.add(text);
+ }
+ else
+ {
+ /* test if we overlap the previous entry.
+ * Note that we are making an assumption that we need to only look back
+ * one TextPosition to find what we are overlapping.
+ * This may not always be true. */
+ TextPosition previousTextPosition = (TextPosition)textList.get(textList.size()-1);
+ if(text.isDiacritic() && previousTextPosition.contains(text))
+ {
+ previousTextPosition.mergeDiacritic(text, this.normalize);
+ }
+ /* If the previous TextPosition was the diacritic, merge it into this
+ * one and remove it from the list. */
+ else if(previousTextPosition.isDiacritic() && text.contains(previousTextPosition))
+ {
+ text.mergeDiacritic(previousTextPosition, this.normalize);
+ textList.remove(textList.size()-1);
+ textList.add(text);
+ }
+ else
+ {
+ textList.add(text);
+ }
+ }
+ if (!this.currentMarkedContents.isEmpty())
+ {
+ this.currentMarkedContents.peek().addText(text);
+ }
+ }
+ }
+
+
+ public List getMarkedContents()
+ {
+ return this.markedContents;
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (revision 0)
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.documentinterchange.markedcontent;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
+import org.apache.pdfbox.util.TextPosition;
+
+/**
+ * A marked content.
+ *
+ * @author Koch
+ * @version $Revision: $
+ */
+public class PDMarkedContent
+{
+
+ private String tag;
+ private COSDictionary properties;
+ private List contents;
+
+
+ /**
+ * Creates a new marked content object.
+ *
+ * @param tag the tag
+ * @param properties the properties
+ */
+ public PDMarkedContent(COSName tag, COSDictionary properties)
+ {
+ this.tag = tag == null ? null : tag.getName();
+ this.properties = properties;
+ this.contents = new ArrayList();
+ }
+
+
+ /**
+ * Gets the tag.
+ *
+ * @return the tag
+ */
+ public String getTag()
+ {
+ return this.tag;
+ }
+
+ /**
+ * Gets the properties.
+ *
+ * @return the properties
+ */
+ public COSDictionary getProperties()
+ {
+ return this.properties;
+ }
+
+ /**
+ * Gets the marked-content identifier.
+ *
+ * @return the marked-content identifier
+ */
+ public int getMCID()
+ {
+ return this.getProperties() == null ? null :
+ this.getProperties().getInt("MCID");
+ }
+
+ /**
+ * Gets the language (Lang).
+ *
+ * @return the language
+ */
+ public String getLanguage()
+ {
+ return this.getProperties() == null ? null :
+ this.getProperties().getNameAsString("Lang");
+ }
+
+ /**
+ * Gets the actual text (ActualText).
+ *
+ * @return the actual text
+ */
+ public String getActualText()
+ {
+ return this.getProperties() == null ? null :
+ this.getProperties().getString("ActualText");
+ }
+
+ /**
+ * Gets the alternate description (Alt).
+ *
+ * @return the alternate description
+ */
+ public String getAlternateDescription()
+ {
+ return this.getProperties() == null ? null :
+ this.getProperties().getString("Alt");
+ }
+
+ /**
+ * Gets the contents of the marked content sequence. Can be
+ *
+ * {@link TextPosition},
+ * {@link PDMarkedContent}, or
+ * {@link PDXObject}.
+ *
+ *
+ * @return the contents of the marked content sequence
+ */
+ public List getContents()
+ {
+ return this.contents;
+ }
+
+ /**
+ * Adds a text position to the contents.
+ *
+ * @param text the text position
+ */
+ public void addText(TextPosition text)
+ {
+ this.getContents().add(text);
+ }
+
+ /**
+ * Adds a marked content to the contents.
+ *
+ * @param markedContent the marked content
+ */
+ public void addMarkedContent(PDMarkedContent markedContent)
+ {
+ this.getContents().add(markedContent);
+ }
+
+ /**
+ * Adds an XObject to the contents.
+ *
+ * @param xobject the XObject
+ */
+ public void addXObject(PDXObject xobject)
+ {
+ this.getContents().add(xobject);
+ }
+
+
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder("tag=").append(this.tag)
+ .append(", properties=").append(this.properties);
+ sb.append(", contents=").append(this.contents);
+ return sb.toString();
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java (revision 0)
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ *
+ * @author Koch
+ * @version $Revision: $
+ *
+ * @param the type of object to store the revision numbers with
+ */
+public class Revisions
+{
+
+ private List objects;
+ private List revisionNumbers;
+
+ private List getObjects()
+ {
+ if (this.objects == null)
+ {
+ this.objects = new ArrayList();
+ }
+ return this.objects;
+ }
+
+ private List getRevisionNumbers()
+ {
+ if (this.revisionNumbers == null)
+ {
+ this.revisionNumbers = new ArrayList();
+ }
+ return this.revisionNumbers;
+ }
+
+
+ /**
+ *
+ */
+ public Revisions()
+ {
+ }
+
+
+ /**
+ * Returns the object at the specified position.
+ *
+ * @param index the position
+ * @return the object
+ * @throws IndexOutOfBoundsException if the index is out of range
+ */
+ public T getObject(int index) throws IndexOutOfBoundsException
+ {
+ return this.getObjects().get(index);
+ }
+
+ /**
+ * Returns the revision number at the specified position.
+ *
+ * @param index the position
+ * @return the revision number
+ * @throws IndexOutOfBoundsException if the index is out of range
+ */
+ public int getRevisionNumber(int index) throws IndexOutOfBoundsException
+ {
+ return this.getRevisionNumbers().get(index);
+ }
+
+ /**
+ * Adds an object with a specified revision number.
+ *
+ * @param object the object
+ * @param revisionNumber the revision number
+ */
+ protected void addObject(T object, int revisionNumber)
+ {
+ this.getObjects().add(object);
+ this.getRevisionNumbers().add(revisionNumber);
+ }
+
+ /**
+ * Sets the revision number of a specified object.
+ *
+ * @param object the object
+ * @param revisionNumber the revision number
+ */
+ protected void setRevisionNumber(T object, int revisionNumber)
+ {
+ int index = this.getObjects().indexOf(object);
+ if (index > -1)
+ {
+ this.getRevisionNumbers().set(index, revisionNumber);
+ }
+ }
+
+ /**
+ * Returns the size.
+ *
+ * @return the size
+ */
+ public int size()
+ {
+ return this.getObjects().size();
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < this.getObjects().size(); i++)
+ {
+ if (i > 0)
+ {
+ sb.append("; ");
+ }
+ sb.append("object=").append(this.getObjects().get(i))
+ .append(", revisionNumber=").append(this.getRevisionNumber(i));
+ }
+ return sb.toString();
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java (revision 0)
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+
+/**
+ * An object reference.
+ *
+ * @author Koch
+ * @version $Revision: $
+ */
+public class PDObjectReference implements COSObjectable
+{
+
+ public static final String TYPE = "OBJR";
+
+ private COSDictionary dictionary;
+
+ protected COSDictionary getCOSDictionary()
+ {
+ return this.dictionary;
+ }
+
+ /**
+ * Default Constructor.
+ *
+ */
+ public PDObjectReference()
+ {
+ this.dictionary = new COSDictionary();
+ this.dictionary.setName(COSName.TYPE, TYPE);
+ }
+
+ /**
+ * Constructor for an existing object reference.
+ *
+ * @param dictionary The existing dictionary.
+ */
+ public PDObjectReference(COSDictionary dictionary)
+ {
+ this.dictionary = dictionary;
+ }
+
+
+ public COSBase getCOSObject()
+ {
+ return this.dictionary;
+ }
+
+ /**
+ * Gets a higher-level object for the referenced object.
+ * Currently this method may return a {@link PDAnnotation},
+ * a {@link PDXObject} or null
.
+ *
+ * @return a higher-level object for the referenced object
+ */
+ public COSObjectable getReferencedObject()
+ {
+ COSBase obj = this.getCOSDictionary().getDictionaryObject("Obj");
+ try
+ {
+ return PDAnnotation.createAnnotation(obj);
+ }
+ catch (IOException e)
+ {
+ // No Annotation
+ try
+ {
+ return PDXObject.createXObject(obj);
+ }
+ catch (IOException e1)
+ {
+ // No XObject
+ // TODO what else can be the target of the object reference?
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Sets the referenced annotation.
+ *
+ * @param annotation the referenced annotation
+ */
+ public void setReferencedObject(PDAnnotation annotation)
+ {
+ this.getCOSDictionary().setItem("Obj", annotation);
+ }
+
+ /**
+ * Sets the referenced XObject.
+ *
+ * @param xobject the referenced XObject
+ */
+ public void setReferencedObject(PDXObject xobject)
+ {
+ this.getCOSDictionary().setItem("Obj", xobject);
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java (revision 0)
+++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java (revision 0)
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.common.COSObjectable;
+
+/**
+ * A marked-content reference.
+ *
+ * @author Koch
+ * @version $Revision: $
+ */
+public class PDMarkedContentReference implements COSObjectable
+{
+
+ public static final String TYPE = "MCR";
+
+ private COSDictionary dictionary;
+
+ protected COSDictionary getCOSDictionary()
+ {
+ return this.dictionary;
+ }
+
+ /**
+ * Default constructor
+ */
+ public PDMarkedContentReference()
+ {
+ this.dictionary = new COSDictionary();
+ this.dictionary.setName(COSName.TYPE, TYPE);
+ }
+
+ /**
+ * Constructor for an existing marked content reference.
+ *
+ * @param pageDic the page dictionary
+ * @param mcid the marked content indentifier
+ */
+ public PDMarkedContentReference(COSDictionary dictionary)
+ {
+ this.dictionary = dictionary;
+ }
+
+
+ public COSBase getCOSObject()
+ {
+ return this.dictionary;
+ }
+
+ /**
+ * Gets the page.
+ *
+ * @return the page
+ */
+ public PDPage getPage()
+ {
+ COSDictionary pg = (COSDictionary) this.getCOSDictionary().getDictionaryObject("Pg");
+ if (pg != null)
+ {
+ return new PDPage(pg);
+ }
+ return null;
+ }
+
+ /**
+ * Sets the page.
+ *
+ * @param page the page
+ */
+ public void setPage(PDPage page)
+ {
+ this.getCOSDictionary().setItem("Pg", page);
+ }
+
+ /**
+ * Gets the marked content identifier.
+ *
+ * @return the marked content identifier
+ */
+ public int getMCID()
+ {
+ return this.getCOSDictionary().getInt("MCID");
+ }
+
+ /**
+ * Sets the marked content identifier.
+ *
+ * @param mcid the marked content identifier
+ */
+ public void setMCID(int mcid)
+ {
+ this.getCOSDictionary().setInt("MCID", mcid);
+ }
+
+
+ @Override
+ public String toString()
+ {
+ return new StringBuilder()
+ .append("mcid=").append(this.getMCID()).toString();
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java
===================================================================
--- src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java (revision 0)
+++ src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java (revision 0)
@@ -0,0 +1,26 @@
+package org.apache.pdfbox.util.operator;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.util.PDFMarkedContentExtractor;
+import org.apache.pdfbox.util.PDFOperator;
+
+public class EndMarkedContentSequence extends OperatorProcessor
+{
+
+ /**
+ * process : EMC : Ends a marked-content sequence begun by BMC or BDC.
+ */
+ @Override
+ public void process(PDFOperator operator, List arguments)
+ throws IOException
+ {
+ if (this.context instanceof PDFMarkedContentExtractor)
+ {
+ ((PDFMarkedContentExtractor) this.context).endMarkedContentSequence();
+ }
+ }
+
+}
Index: src/main/java/org/apache/pdfbox/util/operator/Invoke.java
===================================================================
--- src/main/java/org/apache/pdfbox/util/operator/Invoke.java (revision 909807)
+++ src/main/java/org/apache/pdfbox/util/operator/Invoke.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
+import org.apache.pdfbox.util.PDFMarkedContentExtractor;
import org.apache.pdfbox.util.PDFOperator;
import java.io.IOException;
@@ -54,6 +55,10 @@
Map xobjects = context.getXObjects();
PDXObject xobject = (PDXObject) xobjects.get(name.getName());
+ if (this.context instanceof PDFMarkedContentExtractor)
+ {
+ ((PDFMarkedContentExtractor) this.context).xobject(xobject);
+ }
if(xobject instanceof PDXObjectForm)
{