### Eclipse Workspace Patch 1.0 #P pdfbox-trunk Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (revision 909807) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (working copy) @@ -16,10 +16,14 @@ */ package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; +import java.io.IOException; +import java.util.Hashtable; +import java.util.Map; + import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.COSDictionaryMap; +import org.apache.pdfbox.pdmodel.common.PDNameTreeNode; /** * A root of a structure tree. @@ -27,18 +31,19 @@ * @author Ben Litchfield * @version $Revision: 1.2 $ */ -public class PDStructureTreeRoot implements COSObjectable +public class PDStructureTreeRoot extends PDStructureNode { - private COSDictionary dictionary; + public static final String TYPE = "StructTreeRoot"; + + /** * Default Constructor. * */ public PDStructureTreeRoot() { - dictionary = new COSDictionary(); - dictionary.setName( COSName.TYPE, "StructTreeRoot" ); + super(TYPE); } /** @@ -48,26 +53,82 @@ */ public PDStructureTreeRoot( COSDictionary dic ) { - dictionary = dic; + super(dic); } + /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. + * Returns the ID tree. + * + * @return the ID tree */ - public COSBase getCOSObject() + public PDNameTreeNode getIDTree() { - return dictionary; + COSDictionary idTreeDic = (COSDictionary) this.getCOSDictionary() + .getDictionaryObject("IDTree"); + if (idTreeDic != null) + { + return new PDNameTreeNode(idTreeDic, PDStructureElement.class); + } + return null; } /** - * Get the low level dictionary that this object wraps. - * - * @return The cos dictionary that matches this Java object. + * Sets the ID tree. + * + * @param idTree the ID tree */ - public COSDictionary getCOSDictionary() + public void setIDTree(PDNameTreeNode idTree) { - return dictionary; + this.getCOSDictionary().setItem("IDTree", idTree); } + + /** + * Returns the next key in the parent tree. + * + * @return the next key in the parent tree + */ + public int getParentTreeNextKey() + { + return this.getCOSDictionary().getInt("ParentTreeNextKey"); + } + + /** + * Returns the role map. + * + * @return the role map + */ + @SuppressWarnings("unchecked") + public Map getRoleMap() + { + COSBase rm = this.getCOSDictionary().getDictionaryObject("RoleMap"); + if (rm instanceof COSDictionary) + { + try + { + return COSDictionaryMap.convertBasicTypesToMap((COSDictionary) rm); + } + catch (IOException e) + { + e.printStackTrace(); + } + } + return new Hashtable(); + } + + /** + * Sets the role map. + * + * @param roleMap the role map + */ + public void setRoleMap(Map roleMap) + { + COSDictionary rmDic = new COSDictionary(); + for (String key : roleMap.keySet()) + { + rmDic.setName(key, roleMap.get(key)); + } + this.getCOSDictionary().setItem("RoleMap", rmDic); + } + } Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html (revision 0) @@ -0,0 +1,26 @@ + + + + + + + +The marked content package provides a mechanism for modeling marked-content +sequences. + + Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java (revision 0) @@ -0,0 +1,426 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.pdmodel.common.COSArrayList; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; + +/** + * A node in the structure tree. + * + * @author Koch + * @version $Revision: $ + */ +public abstract class PDStructureNode implements COSObjectable +{ + + /** + * Creates a node in the structure tree. Can be either a structure tree root, + * or a structure element. + * + * @param node the node dictionary + * @return the structure node + */ + public static PDStructureNode create(COSDictionary node) + { + String type = node.getNameAsString(COSName.TYPE); + if ("StructTreeRoot".equals(type)) + { + return new PDStructureTreeRoot(node); + } + if ((type == null) || "StructElem".equals(type)) + { + return new PDStructureElement(node); + } + throw new IllegalArgumentException("Dictionary must not include a Type entry with a value that is neither StructTreeRoot nor StructElem."); + } + + + private COSDictionary dictionary; + + protected COSDictionary getCOSDictionary() + { + return dictionary; + } + + /** + * Constructor. + * + * @param type the type + */ + protected PDStructureNode(String type) + { + this.dictionary = new COSDictionary(); + this.dictionary.setName(COSName.TYPE, type); + } + + /** + * Constructor for an existing structure node. + * + * @param dictionary The existing dictionary. + */ + protected PDStructureNode(COSDictionary dictionary) + { + this.dictionary = dictionary; + } + + + public COSBase getCOSObject() + { + return this.dictionary; + } + + /** + * Returns the type. + * + * @return the type + */ + public String getType() + { + return this.getCOSDictionary().getNameAsString(COSName.TYPE); + } + + /** + * Returns a list of objects for the kids (K). + * + * @return a list of objects for the kids + */ + public List getKids() + { + List kidObjects = new ArrayList(); + COSBase k = this.getCOSDictionary().getDictionaryObject("K"); + if (k instanceof COSArray) + { + Iterator kids = ((COSArray) k).iterator(); + while (kids.hasNext()) + { + COSBase kid = kids.next(); + Object kidObject = this.createObject(kid); + if (kidObject != null) + { + kidObjects.add(kidObject); + } + } + } + else + { + Object kidObject = this.createObject(k); + if (kidObject != null) + { + kidObjects.add(kidObject); + } + } + return kidObjects; + } + + /** + * Sets the kids (K). + * + * @param kids the kids + */ + public void setKids(List kids) + { + this.getCOSDictionary().setItem("K", + COSArrayList.converterToCOSArray(kids)); + } + + /** + * Appends a structure element kid. + * + * @param structureElement the structure element + */ + public void appendKid(PDStructureElement structureElement) + { + this.appendObjectableKid(structureElement); + structureElement.setParent(this); + } + + /** + * Appends an objectable kid. + * + * @param objectable the objectable + */ + protected void appendObjectableKid(COSObjectable objectable) + { + if (objectable == null) + { + return; + } + this.appendKid(objectable.getCOSObject()); + } + + /** + * Appends a COS base kid. + * + * @param object the COS base + */ + protected void appendKid(COSBase object) + { + if (object == null) + { + return; + } + COSBase k = this.getCOSDictionary().getDictionaryObject("K"); + if (k == null) + { + // currently no kid: set new kid as kids + this.getCOSDictionary().setItem("K", object); + } + else if (k instanceof COSArray) + { + // currently more than one kid: add new kid to existing array + COSArray array = (COSArray) k; + array.add(object); + } + else + { + // currently one kid: put current and new kid into array and set array as kids + COSArray array = new COSArray(); + array.add(k); + array.add(object); + this.getCOSDictionary().setItem("K", array); + } + } + + /** + * Inserts a structure element kid before a reference kid. + * + * @param newKid the structure element + * @param refKid the reference kid + */ + public void insertBefore(PDStructureElement newKid, Object refKid) + { + this.insertBefore((COSObjectable) newKid, refKid); + } + + /** + * Inserts an objectable kid before a reference kid. + * + * @param newKid the objectable + * @param refKid the reference kid + */ + protected void insertBefore(COSObjectable newKid, Object refKid) + { + if (newKid == null) + { + return; + } + this.insertBefore(newKid.getCOSObject(), refKid); + } + + /** + * Inserts an COS base kid before a reference kid. + * + * @param newKid the COS base + * @param refKid the reference kid + */ + protected void insertBefore(COSBase newKid, Object refKid) + { + if ((newKid == null) || (refKid == null)) + { + return; + } + COSBase k = this.getCOSDictionary().getDictionaryObject("K"); + if (k == null) + { + return; + } + COSBase refKidBase = null; + if (refKid instanceof COSObjectable) + { + refKidBase = ((COSObjectable) refKid).getCOSObject(); + } + else if (refKid instanceof COSInteger) + { + refKidBase = (COSInteger) refKid; + } + if (k instanceof COSArray) + { + COSArray array = (COSArray) k; + int refIndex = array.indexOfObject(refKidBase); + array.add(refIndex, newKid.getCOSObject()); + } + else + { + boolean onlyKid = k.equals(refKidBase); + if (!onlyKid && (k instanceof COSObject)) + { + COSBase kObj = ((COSObject) k).getObject(); + onlyKid = kObj.equals(refKidBase); + } + if (onlyKid) + { + COSArray array = new COSArray(); + array.add(newKid); + array.add(refKidBase); + this.getCOSDictionary().setItem("K", array); + } + } + } + + /** + * Removes a structure element kid. + * + * @param structureElement the structure element + * @return true if the kid was removed, false otherwise + */ + public boolean removeKid(PDStructureElement structureElement) + { + boolean removed = this.removeObjectableKid(structureElement); + if (removed) + { + structureElement.setParent(null); + } + return removed; + } + + /** + * Removes an objectable kid. + * + * @param objectable the objectable + * @return true if the kid was removed, false otherwise + */ + protected boolean removeObjectableKid(COSObjectable objectable) + { + if (objectable == null) + { + return false; + } + return this.removeKid(objectable.getCOSObject()); + } + + /** + * Removes a COS base kid. + * + * @param object the COS base + * @return true if the kid was removed, false otherwise + */ + protected boolean removeKid(COSBase object) + { + if (object == null) + { + return false; + } + COSBase k = this.getCOSDictionary().getDictionaryObject("K"); + if (k == null) + { + // no kids: objectable is not a kid + return false; + } + else if (k instanceof COSArray) + { + // currently more than one kid: remove kid from existing array + COSArray array = (COSArray) k; + boolean removed = array.removeObject(object); + // if now only one kid: set remaining kid as kids + if (array.size() == 1) + { + this.getCOSDictionary().setItem("K", array.getObject(0)); + } + return removed; + } + else + { + // currently one kid: if current kid equals given object, remove kids entry + boolean onlyKid = k.equals(object); + if (!onlyKid && (k instanceof COSObject)) + { + COSBase kObj = ((COSObject) k).getObject(); + onlyKid = kObj.equals(object); + } + if (onlyKid) + { + this.getCOSDictionary().setItem("K", null); + return true; + } + return false; + } + } + + /** + * Creates an object for a kid of this structure node. + * The type of object depends on the type of the kid. It can be + *
    + *
  • a {@link PDStructureElement},
  • + *
  • a {@link PDAnnotation},
  • + *
  • a {@link PDXObject},
  • + *
  • a {@link PDMarkedContentReference}
  • + *
  • a {@link Integer}
  • + *
+ * + * @param kid the kid + * @return the object + */ + protected Object createObject(COSBase kid) + { + COSDictionary kidDic = null; + if (kid instanceof COSDictionary) + { + kidDic = (COSDictionary) kid; + } + else if (kid instanceof COSObject) + { + COSBase base = ((COSObject) kid).getObject(); + if (base instanceof COSDictionary) + { + kidDic = (COSDictionary) base; + } + } + if (kidDic != null) + { + String type = kidDic.getNameAsString("Type"); + if ((type == null) || PDStructureElement.TYPE.equals(type)) + { + // A structure element dictionary denoting another structure + // element + return new PDStructureElement(kidDic); + } + else if (PDObjectReference.TYPE.equals(type)) + { + // An object reference dictionary denoting a PDF object + return new PDObjectReference(kidDic); + } + else if ("MCR".equals(type)) + { + // A marked-content reference dictionary denoting a + // marked-content sequence + return new PDMarkedContentReference(kidDic); + } + } + else if (kid instanceof COSInteger) + { + // An integer marked-content identifier denoting a + // marked-content sequence + COSInteger mcid = (COSInteger) kid; + return mcid.intValue(); + } + return null; + } + +} Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java (revision 909807) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java (working copy) @@ -16,10 +16,17 @@ */ package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; +import java.util.Iterator; +import java.util.Map; + +import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; /** * A structure element. @@ -27,18 +34,22 @@ * @author Ben Litchfield * @version $Revision: 1.3 $ */ -public class PDStructureElement implements COSObjectable +public class PDStructureElement extends PDStructureNode { - private COSDictionary dictionary; + public static final String TYPE = "StructElem"; + /** - * Default Constructor. + * Constructor with required values. * + * @param structureType the structure type + * @param parent the parent structure node */ - public PDStructureElement() + public PDStructureElement(String structureType, PDStructureNode parent) { - dictionary = new COSDictionary(); - dictionary.setName( COSName.TYPE, "StructElem" ); + super(TYPE); + this.setStructureType(structureType); + this.setParent(parent); } /** @@ -48,26 +59,500 @@ */ public PDStructureElement( COSDictionary dic ) { - dictionary = dic; + super(dic); } + /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. + * Returns the structure type (S). + * + * @return the structure type */ - public COSBase getCOSObject() + public String getStructureType() { - return dictionary; + return this.getCOSDictionary().getNameAsString("S"); } /** - * Get the low level dictionary that this object wraps. - * - * @return The cos dictionary that matches this Java object. + * Sets the structure type (S). + * + * @param structureType the structure type */ - public COSDictionary getCOSDictionary() + public void setStructureType(String structureType) { - return dictionary; + this.getCOSDictionary().setName("S", structureType); } + + /** + * Returns the parent in the structure hierarchy (P). + * + * @return the parent in the structure hierarchy + */ + public PDStructureNode getParent() + { + COSDictionary p = (COSDictionary) this.getCOSDictionary() + .getDictionaryObject(COSName.P); + if (p == null) + { + return null; + } + return PDStructureNode.create((COSDictionary) p); + } + + /** + * Sets the parent in the structure hierarchy (P). + * + * @param structureNode the parent in the structure hierarchy + */ + public void setParent(PDStructureNode structureNode) + { + this.getCOSDictionary().setItem(COSName.P, structureNode); + } + + /** + * Returns the element identifier (ID). + * + * @return the element identifier + */ + public String getElementIdentifier() + { + return this.getCOSDictionary().getString("ID"); + } + + /** + * Sets the element identifier (ID). + * + * @param id the element identifier + */ + public void setElementIdentifier(String id) + { + this.getCOSDictionary().setString("ID", id); + } + + /** + * Returns the page on which some or all of the content items designated by + * the K entry shall be rendered (Pg). + * + * @return the page on which some or all of the content items designated by + * the K entry shall be rendered + */ + public PDPage getPage() + { + COSDictionary pageDic = (COSDictionary) this.getCOSDictionary() + .getDictionaryObject("Pg"); + if (pageDic == null) + { + return null; + } + return new PDPage(pageDic); + } + + /** + * Sets the page on which some or all of the content items designated by + * the K entry shall be rendered (Pg). + * @param page the page on which some or all of the content items designated + * by the K entry shall be rendered. + */ + public void setPage(PDPage page) + { + this.getCOSDictionary().setItem("Pg", page); + } + + /** + * Returns the class names together with their revision numbers (C). + * + * @return the class names + */ + public Revisions getClassNames() + { + String key = "C"; + Revisions classNames = new Revisions(); + COSBase c = this.getCOSDictionary().getDictionaryObject(key); + if (c instanceof COSName) + { + classNames.addObject(((COSName) c).getName(), 0); + } + if (c instanceof COSArray) + { + COSArray array = (COSArray) c; + Iterator it = array.iterator(); + String className = null; + while (it.hasNext()) + { + COSBase item = it.next(); + if (item instanceof COSName) + { + className = ((COSName) item).getName(); + classNames.addObject(className, 0); + } + else if (item instanceof COSInteger) + { + classNames.setRevisionNumber(className, + ((COSInteger) item).intValue()); + } + } + } + return classNames; + } + + /** + * Sets the class names together with their revision numbers (C). + * + * @param classNames the class names + */ + public void setClassNames(Revisions classNames) + { + String key = "C"; + if ((classNames.size() == 1) && (classNames.getRevisionNumber(0) == 0)) + { + String className = classNames.getObject(0); + this.getCOSDictionary().setName(key, className); + return; + } + COSArray array = new COSArray(); + for (int i = 0; i < classNames.size(); i++) + { + String className = classNames.getObject(i); + int revisionNumber = classNames.getRevisionNumber(i); + if (revisionNumber < 0) + { + // TODO throw Exception because revision number must be > -1? + } + array.add(COSName.getPDFName(className)); + array.add(COSInteger.get(revisionNumber)); + } + this.getCOSDictionary().setItem(key, array); + } + + /** + * Adds a class name. + * + * @param className the class name + */ + public void addClassName(String className) + { + String key = "C"; + COSBase c = this.getCOSDictionary().getDictionaryObject(key); + COSArray array = null; + if (c instanceof COSArray) + { + array = (COSArray) c; + } + else + { + array = new COSArray(); + if (c != null) + { + array.add(c); + array.add(COSInteger.get(0)); + } + } + this.getCOSDictionary().setItem(key, array); + array.add(COSName.getPDFName(className)); + array.add(COSInteger.get(this.getRevisionNumber())); + } + + /** + * Removes a class name. + * + * @param className the class name + */ + public void removeClassName(String className) + { + String key = "C"; + COSBase c = this.getCOSDictionary().getDictionaryObject(key); + COSName name = COSName.getPDFName(className); + if (c instanceof COSArray) + { + COSArray array = (COSArray) c; + array.remove(name); + if ((array.size() == 2) && (array.getInt(1) == 0)) + { + this.getCOSDictionary().setItem(key, array.getObject(0)); + } + } + else + { + COSBase directC = c; + if (c instanceof COSObject) + { + directC = ((COSObject) c).getObject(); + } + if (name.equals(directC)) + { + this.getCOSDictionary().setItem(key, null); + } + } + } + + /** + * Returns the revision number (R). + * + * @return the revision number + */ + public int getRevisionNumber() + { + return this.getCOSDictionary().getInt(COSName.R, 0); + } + + /** + * Sets the revision number (R). + * + * @param revisionNumber the revision number + */ + public void setRevisionNumber(int revisionNumber) + { + this.getCOSDictionary().setInt(COSName.R, revisionNumber); + } + + /** + * Returns the title (T). + * + * @return the title + */ + public String getTitle() + { + return this.getCOSDictionary().getString("T"); + } + + /** + * Sets the title (T). + * + * @param title the title + */ + public void setTitle(String title) + { + this.getCOSDictionary().setString("T", title); + } + + /** + * Returns the language (Lang). + * + * @return the language + */ + public String getLanguage() + { + return this.getCOSDictionary().getString("Lang"); + } + + /** + * Sets the language (Lang). + * + * @param language the language + */ + public void setLanguage(String language) + { + this.getCOSDictionary().setString("Lang", language); + } + + /** + * Returns the alternate description (Alt). + * + * @return the alternate description + */ + public String getAlternateDescription() + { + return this.getCOSDictionary().getString("Alt"); + } + + /** + * Sets the alternate description (Alt). + * + * @param alternateDescription the alternate description + */ + public void setAlternateDescription(String alternateDescription) + { + this.getCOSDictionary().setString("Alt", alternateDescription); + } + + /** + * Returns the expanded form (E). + * + * @return the expanded form + */ + public String getExpandedForm() + { + return this.getCOSDictionary().getString("E"); + } + + /** + * Sets the expanded form (E). + * + * @param expandedForm the expanded form + */ + public void setExpandedForm(String expandedForm) + { + this.getCOSDictionary().setString("E", expandedForm); + } + + /** + * Returns the actual text (ActualText). + * + * @return the actual text + */ + public String getActualText() + { + return this.getCOSDictionary().getString("ActualText"); + } + + /** + * Sets the actual text (ActualText). + * + * @param actualText the actual text + */ + public void setActualText(String actualText) + { + this.getCOSDictionary().setString("ActualText", actualText); + } + + /** + * Returns the standard structure type, the actual structure type is mapped + * to in the role map. + * + * @return the standard structure type + */ + public String getStandardStructureType() + { + String type = this.getStructureType(); + String mappedType; + while (true) + { + mappedType = this.getRoleMap().get(type); + if ((mappedType == null) || type.equals(mappedType)) + { + break; + } + type = mappedType; + } + return type; + } + + /** + * Appends a marked-content sequence kid. + * + * @param markedContent the marked-content sequence + */ + public void appendKid(PDMarkedContent markedContent) + { + this.appendKid(COSInteger.get(markedContent.getMCID())); + } + + /** + * Appends a marked-content reference kid. + * + * @param markedContentReference the marked-content reference + */ + public void appendKid(PDMarkedContentReference markedContentReference) + { + this.appendObjectableKid(markedContentReference); + } + + /** + * Appends an object reference kid. + * + * @param objectReference the object reference + */ + public void appendKid(PDObjectReference objectReference) + { + this.appendObjectableKid(objectReference); + } + + /** + * Inserts a marked-content identifier kid before a reference kid. + * + * @param markedContentIdentifier the marked-content identifier + * @param refKid the reference kid + */ + public void insertBefore(COSInteger markedContentIdentifier, Object refKid) + { + this.insertBefore(markedContentIdentifier, refKid); + } + + /** + * Inserts a marked-content reference kid before a reference kid. + * + * @param markedContentReference the marked-content reference + * @param refKid the reference kid + */ + public void insertBefore(PDMarkedContentReference markedContentReference, Object refKid) + { + this.insertBefore(markedContentReference, refKid); + } + + /** + * Inserts an object reference kid before a reference kid. + * + * @param objectReference the object reference + * @param refKid the reference kid + */ + public void insertBefore(PDObjectReference objectReference, Object refKid) + { + this.insertBefore(objectReference, refKid); + } + + /** + * Removes a marked-content identifier kid. + * + * @param markedContentIdentifier the marked-content identifier + */ + public void removeKid(COSInteger markedContentIdentifier) + { + this.removeKid((COSBase) markedContentIdentifier); + } + + /** + * Removes a marked-content reference kid. + * + * @param markedContentReference the marked-content reference + */ + public void removeKid(PDMarkedContentReference markedContentReference) + { + this.removeObjectableKid(markedContentReference); + } + + /** + * Removes an object reference kid. + * + * @param objectReference the object reference + */ + public void removeKid(PDObjectReference objectReference) + { + this.removeObjectableKid(objectReference); + } + + + /** + * Returns the structure tree root. + * + * @return the structure tree root + */ + private PDStructureTreeRoot getStructureTreeRoot() + { + PDStructureNode parent = this.getParent(); + while (parent instanceof PDStructureElement) + { + parent = ((PDStructureElement) parent).getParent(); + } + if (parent instanceof PDStructureTreeRoot) + { + return (PDStructureTreeRoot) parent; + } + return null; + } + + /** + * Returns the role map. + * + * @return the role map + */ + private Map getRoleMap() + { + PDStructureTreeRoot root = this.getStructureTreeRoot(); + if (root != null) + { + return root.getRoleMap(); + } + return null; + } + } Index: src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java =================================================================== --- src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java (revision 0) +++ src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequenceWithProperties.java (revision 0) @@ -0,0 +1,41 @@ +package org.apache.pdfbox.util.operator; + +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.util.PDFMarkedContentExtractor; +import org.apache.pdfbox.util.PDFOperator; + +public class BeginMarkedContentSequenceWithProperties extends OperatorProcessor +{ + + /** + * process : BDC : Begins a marked-content sequence with property list. + */ + @Override + public void process(PDFOperator operator, List arguments) + throws IOException + { + COSName tag = null; + COSDictionary properties = null; + for (COSBase argument : arguments) + { + if (argument instanceof COSName) + { + tag = (COSName) argument; + } + else if (argument instanceof COSDictionary) + { + properties = (COSDictionary) argument; + } + } + if (this.context instanceof PDFMarkedContentExtractor) + { + ((PDFMarkedContentExtractor) this.context).beginMarkedContentSequence(tag, properties); + } + } + +} Index: src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java =================================================================== --- src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java (revision 0) +++ src/main/java/org/apache/pdfbox/util/operator/BeginMarkedContentSequence.java (revision 0) @@ -0,0 +1,35 @@ +package org.apache.pdfbox.util.operator; + +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.util.PDFMarkedContentExtractor; +import org.apache.pdfbox.util.PDFOperator; + +public class BeginMarkedContentSequence extends OperatorProcessor +{ + + /** + * process : BMC : Begins a marked-content sequence. + */ + @Override + public void process(PDFOperator operator, List arguments) + throws IOException + { + COSName tag = null; + for (COSBase argument : arguments) + { + if (argument instanceof COSName) + { + tag = (COSName) argument; + } + } + if (this.context instanceof PDFMarkedContentExtractor) + { + ((PDFMarkedContentExtractor) this.context).beginMarkedContentSequence(tag, null); + } + } + +} Index: src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java =================================================================== --- src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (revision 0) +++ src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (revision 0) @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Stack; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; +import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; + +public class PDFMarkedContentExtractor extends PDFStreamEngine +{ + private boolean suppressDuplicateOverlappingText = true; + + protected List markedContents = new ArrayList(); + private Stack currentMarkedContents = new Stack(); + + private Map> characterListMapping = + new HashMap>(); + + /** + * encoding that text will be written in (or null). + */ + protected String outputEncoding; + + /** + * The normalizer is used to remove text ligatures/presentation forms + * and to correct the direction of right to left text, such as Arabic and Hebrew. + */ + private TextNormalize normalize = null; + + /** + * Instantiate a new PDFTextStripper object. This object will load properties from + * Resources/PDFTextStripper.properties and will not do anything special to + * convert the text to a more encoding-specific output. + * @throws IOException If there is an error loading the properties. + */ + public PDFMarkedContentExtractor() throws IOException + { + super( ResourceLoader.loadProperties( "Resources/PDFMarkedContentExtractor.properties", true ) ); + this.outputEncoding = null; + this.normalize = new TextNormalize(this.outputEncoding); + } + + + /** + * Instantiate a new PDFTextStripper object. Loading all of the operator mappings + * from the properties object that is passed in. Does not convert the text + * to more encoding-specific output. + * + * @param props The properties containing the mapping of operators to PDFOperator + * classes. + * + * @throws IOException If there is an error reading the properties. + */ + public PDFMarkedContentExtractor( Properties props ) throws IOException + { + super( props ); + this.outputEncoding = null; + this.normalize = new TextNormalize(this.outputEncoding); + } + /** + * Instantiate a new PDFTextStripper object. This object will load properties from + * Resources/PDFTextStripper.properties and will apply encoding-specific + * conversions to the output text. + * + * @param encoding The encoding that the output will be written in. + * + * @throws IOException If there is an error reading the properties. + */ + public PDFMarkedContentExtractor( String encoding ) throws IOException + { + super( ResourceLoader.loadProperties( "Resources/PDFMarkedContentExtractor.properties", true )); + this.outputEncoding = encoding; + this.normalize = new TextNormalize(this.outputEncoding); + } + + + /** + * This will determine of two floating point numbers are within a specified variance. + * + * @param first The first number to compare to. + * @param second The second number to compare to. + * @param variance The allowed variance. + */ + private boolean within( float first, float second, float variance ) + { + return second > first - variance && second < first + variance; + } + + + public void beginMarkedContentSequence(COSName tag, COSDictionary properties) + { + PDMarkedContent markedContent = new PDMarkedContent(tag, properties); + if (this.currentMarkedContents.isEmpty()) + { + this.markedContents.add(markedContent); + } + else + { + PDMarkedContent currentMarkedContent = + this.currentMarkedContents.peek(); + if (currentMarkedContent != null) + { + currentMarkedContent.addMarkedContent(markedContent); + } + } + this.currentMarkedContents.push(markedContent); + } + + public void endMarkedContentSequence() + { + if (!this.currentMarkedContents.isEmpty()) + { + this.currentMarkedContents.pop(); + } + } + + public void xobject(PDXObject xobject) + { + if (!this.currentMarkedContents.isEmpty()) + { + this.currentMarkedContents.peek().addXObject(xobject); + } + } + + + /** + * This will process a TextPosition object and add the + * text to the list of characters on a page. It takes care of + * overlapping text. + * + * @param text The text to process. + */ + protected void processTextPosition( TextPosition text ) + { + boolean showCharacter = true; + if( this.suppressDuplicateOverlappingText ) + { + showCharacter = false; + String textCharacter = text.getCharacter(); + float textX = text.getX(); + float textY = text.getY(); + List sameTextCharacters = this.characterListMapping.get( textCharacter ); + if( sameTextCharacters == null ) + { + sameTextCharacters = new ArrayList(); + this.characterListMapping.put( textCharacter, sameTextCharacters ); + } + + // RDD - Here we compute the value that represents the end of the rendered + // text. This value is used to determine whether subsequent text rendered + // on the same line overwrites the current text. + // + // We subtract any positive padding to handle cases where extreme amounts + // of padding are applied, then backed off (not sure why this is done, but there + // are cases where the padding is on the order of 10x the character width, and + // the TJ just backs up to compensate after each character). Also, we subtract + // an amount to allow for kerning (a percentage of the width of the last + // character). + // + boolean suppressCharacter = false; + float tolerance = (text.getWidth()/textCharacter.length())/3.0f; + for( int i=0; i textList = new ArrayList(); + + /* In the wild, some PDF encoded documents put diacritics (accents on + * top of characters) into a separate Tj element. When displaying them + * graphically, the two chunks get overlayed. With text output though, + * we need to do the overlay. This code recombines the diacritic with + * its associated character if the two are consecutive. + */ + if(textList.isEmpty()) + { + textList.add(text); + } + else + { + /* test if we overlap the previous entry. + * Note that we are making an assumption that we need to only look back + * one TextPosition to find what we are overlapping. + * This may not always be true. */ + TextPosition previousTextPosition = (TextPosition)textList.get(textList.size()-1); + if(text.isDiacritic() && previousTextPosition.contains(text)) + { + previousTextPosition.mergeDiacritic(text, this.normalize); + } + /* If the previous TextPosition was the diacritic, merge it into this + * one and remove it from the list. */ + else if(previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) + { + text.mergeDiacritic(previousTextPosition, this.normalize); + textList.remove(textList.size()-1); + textList.add(text); + } + else + { + textList.add(text); + } + } + if (!this.currentMarkedContents.isEmpty()) + { + this.currentMarkedContents.peek().addText(text); + } + } + } + + + public List getMarkedContents() + { + return this.markedContents; + } + +} Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (revision 0) @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.markedcontent; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; +import org.apache.pdfbox.util.TextPosition; + +/** + * A marked content. + * + * @author Koch + * @version $Revision: $ + */ +public class PDMarkedContent +{ + + private String tag; + private COSDictionary properties; + private List contents; + + + /** + * Creates a new marked content object. + * + * @param tag the tag + * @param properties the properties + */ + public PDMarkedContent(COSName tag, COSDictionary properties) + { + this.tag = tag == null ? null : tag.getName(); + this.properties = properties; + this.contents = new ArrayList(); + } + + + /** + * Gets the tag. + * + * @return the tag + */ + public String getTag() + { + return this.tag; + } + + /** + * Gets the properties. + * + * @return the properties + */ + public COSDictionary getProperties() + { + return this.properties; + } + + /** + * Gets the marked-content identifier. + * + * @return the marked-content identifier + */ + public int getMCID() + { + return this.getProperties() == null ? null : + this.getProperties().getInt("MCID"); + } + + /** + * Gets the language (Lang). + * + * @return the language + */ + public String getLanguage() + { + return this.getProperties() == null ? null : + this.getProperties().getNameAsString("Lang"); + } + + /** + * Gets the actual text (ActualText). + * + * @return the actual text + */ + public String getActualText() + { + return this.getProperties() == null ? null : + this.getProperties().getString("ActualText"); + } + + /** + * Gets the alternate description (Alt). + * + * @return the alternate description + */ + public String getAlternateDescription() + { + return this.getProperties() == null ? null : + this.getProperties().getString("Alt"); + } + + /** + * Gets the contents of the marked content sequence. Can be + *
    + *
  • {@link TextPosition},
  • + *
  • {@link PDMarkedContent}, or
  • + *
  • {@link PDXObject}.
  • + *
+ * + * @return the contents of the marked content sequence + */ + public List getContents() + { + return this.contents; + } + + /** + * Adds a text position to the contents. + * + * @param text the text position + */ + public void addText(TextPosition text) + { + this.getContents().add(text); + } + + /** + * Adds a marked content to the contents. + * + * @param markedContent the marked content + */ + public void addMarkedContent(PDMarkedContent markedContent) + { + this.getContents().add(markedContent); + } + + /** + * Adds an XObject to the contents. + * + * @param xobject the XObject + */ + public void addXObject(PDXObject xobject) + { + this.getContents().add(xobject); + } + + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder("tag=").append(this.tag) + .append(", properties=").append(this.properties); + sb.append(", contents=").append(this.contents); + return sb.toString(); + } + +} Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java (revision 0) @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import java.util.ArrayList; +import java.util.List; + +/** + * + * @author Koch + * @version $Revision: $ + * + * @param the type of object to store the revision numbers with + */ +public class Revisions +{ + + private List objects; + private List revisionNumbers; + + private List getObjects() + { + if (this.objects == null) + { + this.objects = new ArrayList(); + } + return this.objects; + } + + private List getRevisionNumbers() + { + if (this.revisionNumbers == null) + { + this.revisionNumbers = new ArrayList(); + } + return this.revisionNumbers; + } + + + /** + * + */ + public Revisions() + { + } + + + /** + * Returns the object at the specified position. + * + * @param index the position + * @return the object + * @throws IndexOutOfBoundsException if the index is out of range + */ + public T getObject(int index) throws IndexOutOfBoundsException + { + return this.getObjects().get(index); + } + + /** + * Returns the revision number at the specified position. + * + * @param index the position + * @return the revision number + * @throws IndexOutOfBoundsException if the index is out of range + */ + public int getRevisionNumber(int index) throws IndexOutOfBoundsException + { + return this.getRevisionNumbers().get(index); + } + + /** + * Adds an object with a specified revision number. + * + * @param object the object + * @param revisionNumber the revision number + */ + protected void addObject(T object, int revisionNumber) + { + this.getObjects().add(object); + this.getRevisionNumbers().add(revisionNumber); + } + + /** + * Sets the revision number of a specified object. + * + * @param object the object + * @param revisionNumber the revision number + */ + protected void setRevisionNumber(T object, int revisionNumber) + { + int index = this.getObjects().indexOf(object); + if (index > -1) + { + this.getRevisionNumbers().set(index, revisionNumber); + } + } + + /** + * Returns the size. + * + * @return the size + */ + public int size() + { + return this.getObjects().size(); + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < this.getObjects().size(); i++) + { + if (i > 0) + { + sb.append("; "); + } + sb.append("object=").append(this.getObjects().get(i)) + .append(", revisionNumber=").append(this.getRevisionNumber(i)); + } + return sb.toString(); + } + +} Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java (revision 0) @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import java.io.IOException; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; + +/** + * An object reference. + * + * @author Koch + * @version $Revision: $ + */ +public class PDObjectReference implements COSObjectable +{ + + public static final String TYPE = "OBJR"; + + private COSDictionary dictionary; + + protected COSDictionary getCOSDictionary() + { + return this.dictionary; + } + + /** + * Default Constructor. + * + */ + public PDObjectReference() + { + this.dictionary = new COSDictionary(); + this.dictionary.setName(COSName.TYPE, TYPE); + } + + /** + * Constructor for an existing object reference. + * + * @param dictionary The existing dictionary. + */ + public PDObjectReference(COSDictionary dictionary) + { + this.dictionary = dictionary; + } + + + public COSBase getCOSObject() + { + return this.dictionary; + } + + /** + * Gets a higher-level object for the referenced object. + * Currently this method may return a {@link PDAnnotation}, + * a {@link PDXObject} or null. + * + * @return a higher-level object for the referenced object + */ + public COSObjectable getReferencedObject() + { + COSBase obj = this.getCOSDictionary().getDictionaryObject("Obj"); + try + { + return PDAnnotation.createAnnotation(obj); + } + catch (IOException e) + { + // No Annotation + try + { + return PDXObject.createXObject(obj); + } + catch (IOException e1) + { + // No XObject + // TODO what else can be the target of the object reference? + } + } + return null; + } + + /** + * Sets the referenced annotation. + * + * @param annotation the referenced annotation + */ + public void setReferencedObject(PDAnnotation annotation) + { + this.getCOSDictionary().setItem("Obj", annotation); + } + + /** + * Sets the referenced XObject. + * + * @param xobject the referenced XObject + */ + public void setReferencedObject(PDXObject xobject) + { + this.getCOSDictionary().setItem("Obj", xobject); + } + +} Index: src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java =================================================================== --- src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java (revision 0) +++ src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java (revision 0) @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.COSObjectable; + +/** + * A marked-content reference. + * + * @author Koch + * @version $Revision: $ + */ +public class PDMarkedContentReference implements COSObjectable +{ + + public static final String TYPE = "MCR"; + + private COSDictionary dictionary; + + protected COSDictionary getCOSDictionary() + { + return this.dictionary; + } + + /** + * Default constructor + */ + public PDMarkedContentReference() + { + this.dictionary = new COSDictionary(); + this.dictionary.setName(COSName.TYPE, TYPE); + } + + /** + * Constructor for an existing marked content reference. + * + * @param pageDic the page dictionary + * @param mcid the marked content indentifier + */ + public PDMarkedContentReference(COSDictionary dictionary) + { + this.dictionary = dictionary; + } + + + public COSBase getCOSObject() + { + return this.dictionary; + } + + /** + * Gets the page. + * + * @return the page + */ + public PDPage getPage() + { + COSDictionary pg = (COSDictionary) this.getCOSDictionary().getDictionaryObject("Pg"); + if (pg != null) + { + return new PDPage(pg); + } + return null; + } + + /** + * Sets the page. + * + * @param page the page + */ + public void setPage(PDPage page) + { + this.getCOSDictionary().setItem("Pg", page); + } + + /** + * Gets the marked content identifier. + * + * @return the marked content identifier + */ + public int getMCID() + { + return this.getCOSDictionary().getInt("MCID"); + } + + /** + * Sets the marked content identifier. + * + * @param mcid the marked content identifier + */ + public void setMCID(int mcid) + { + this.getCOSDictionary().setInt("MCID", mcid); + } + + + @Override + public String toString() + { + return new StringBuilder() + .append("mcid=").append(this.getMCID()).toString(); + } + +} Index: src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java =================================================================== --- src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java (revision 0) +++ src/main/java/org/apache/pdfbox/util/operator/EndMarkedContentSequence.java (revision 0) @@ -0,0 +1,26 @@ +package org.apache.pdfbox.util.operator; + +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.util.PDFMarkedContentExtractor; +import org.apache.pdfbox.util.PDFOperator; + +public class EndMarkedContentSequence extends OperatorProcessor +{ + + /** + * process : EMC : Ends a marked-content sequence begun by BMC or BDC. + */ + @Override + public void process(PDFOperator operator, List arguments) + throws IOException + { + if (this.context instanceof PDFMarkedContentExtractor) + { + ((PDFMarkedContentExtractor) this.context).endMarkedContentSequence(); + } + } + +} Index: src/main/java/org/apache/pdfbox/util/operator/Invoke.java =================================================================== --- src/main/java/org/apache/pdfbox/util/operator/Invoke.java (revision 909807) +++ src/main/java/org/apache/pdfbox/util/operator/Invoke.java (working copy) @@ -23,6 +23,7 @@ import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; +import org.apache.pdfbox.util.PDFMarkedContentExtractor; import org.apache.pdfbox.util.PDFOperator; import java.io.IOException; @@ -54,6 +55,10 @@ Map xobjects = context.getXObjects(); PDXObject xobject = (PDXObject) xobjects.get(name.getName()); + if (this.context instanceof PDFMarkedContentExtractor) + { + ((PDFMarkedContentExtractor) this.context).xobject(xobject); + } if(xobject instanceof PDXObjectForm) {