Index: src/main/java/org/apache/pdfbox/cos/COSDocument.java
===================================================================
--- src/main/java/org/apache/pdfbox/cos/COSDocument.java	(revision 767669)
+++ src/main/java/org/apache/pdfbox/cos/COSDocument.java	(working copy)
@@ -45,16 +45,16 @@
     private float version;
 
     /**
-     * added objects (actually preserving original sequence).
+     * Maps ObjectKeys to a COSObject. Note that references to these objects
+     * are also stored in COSDictionary objects that map a name to a specific object. 
      */
-    private List objects = new ArrayList();
-
+    private Map objectPool = new HashMap();
+    
     /**
-     * a pool of objects read/referenced so far
-     * used to resolve indirect object references.
+     * Maps object and generation ids to object byte offsets
      */
-    private Map objectPool = new HashMap();
-
+    private Map xrefTable = new HashMap();
+    
     /**
      * Document trailer dictionary.
      */
@@ -139,7 +139,7 @@
     public COSObject getObjectByType( COSName type ) throws IOException
     {
         COSObject retval = null;
-        Iterator iter = objects.iterator();
+        Iterator iter = objectPool.values().iterator();
         while( iter.hasNext() && retval == null)
         {
             COSObject object = (COSObject)iter.next();
@@ -185,7 +185,7 @@
     public List getObjectsByType( COSName type ) throws IOException
     {
         List retval = new ArrayList();
-        Iterator iter = objects.iterator();
+        Iterator iter = objectPool.values().iterator();
         while( iter.hasNext() )
         {
             COSObject object = (COSObject)iter.next();
@@ -213,7 +213,7 @@
      */
     public void print()
     {
-        Iterator iter = objects.iterator();
+        Iterator iter = objectPool.values().iterator();
         while( iter.hasNext() )
         {
             COSObject object = (COSObject)iter.next();
@@ -299,29 +299,6 @@
     }
 
     /**
-     * This will create an object for this document.
-     *
-     * Create an indirect object out of the direct type and include in the document
-     * for later lookup via document a map from direct object to indirect object
-     * is maintained. this provides better support for manual PDF construction.
-     *
-     * @param base the base object to wrap in an indirect object.
-     *
-     * @return The pdf object that wraps the base, or creates a new one.
-     */
-    /**
-    public COSObject createObject( COSBase base )
-    {
-        COSObject obj = (COSObject)objectMap.get(base);
-        if (obj == null)
-        {
-            obj = new COSObject( base );
-            obj.addTo(this);
-        }
-        return obj;
-    }**/
-
-    /**
      * This will get the document catalog.
      *
      * Maybe this should move to an object at PDFEdit level
@@ -347,7 +324,7 @@
      */
     public List getObjects()
     {
-        return new ArrayList(objects);
+        return new ArrayList(objectPool.values());
     }
 
     /**
@@ -456,28 +433,6 @@
     }
 
     /**
-     * This will add an object to this document.
-     * the method checks if obj is already present as there may be cyclic dependencies
-     *
-     * @param obj The object to add to the document.
-     * @return The object that was actually added to this document, if an object reference already
-     * existed then that will be returned.
-     *
-     * @throws IOException If there is an error adding the object.
-     */
-    public COSObject addObject(COSObject obj) throws IOException
-    {
-        COSObjectKey key = null;
-        if( obj.getObjectNumber() != null )
-        {
-            key = new COSObjectKey( obj );
-        }
-        COSObject fromPool = getObjectFromPool( key );
-        fromPool.setObject( obj.getObject() );
-        return fromPool;
-    }
-
-    /**
      * This will get an object from the pool.
      *
      * @param key The object key.
@@ -503,9 +458,25 @@
                 obj.setGenerationNumber( new COSInteger( key.getGeneration() ) );
                 objectPool.put(key, obj);
             }
-            objects.add( obj );
-        }
-
+        }  
         return obj;
     }
+    /**
+     * Used to populate the XRef HashMap. Will add an Xreftable entry
+     * that maps ObjectKeys to byte offsets in the file. 
+     * @param objKey The objkey, with id and gen numbers
+     * @param currOffset The byte offset in this file
+     */
+    public void setXRef(COSObjectKey objKey, int offset) {
+        xrefTable.put(objKey, new Integer(offset));
+    }
+    
+    /**
+     * Returns the xrefTable which is a mapping of ObjectKeys
+     * to byte offsets in the file. 
+     * @return
+     */
+    public Map getXrefTable(){
+        return xrefTable;
+    }
 }
Index: src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java	(revision 767669)
+++ src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java	(working copy)
@@ -20,15 +20,16 @@
 import java.io.InputStream;
 import java.io.IOException;
 
-import java.rmi.server.LogStream;
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
 import java.util.logging.Level;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.exceptions.LoggingObject;
@@ -53,10 +54,15 @@
 
     private static final String PDF_HEADER = "%PDF-";
     private static final String FDF_HEADER = "%FDF-";
-    private COSDocument document;
-    private boolean forceParsing = false;
-
+    private boolean forceParsing = false; 
+    
     /**
+     * A list of duplicate objects found when Parsing the PDF
+     * File. 
+     */
+    private List conflictList = new ArrayList();
+   
+    /**
      * Temp file directory.
      */
     private File tempDirectory = null;
@@ -193,6 +199,7 @@
                 {
                     document.dereferenceObjectStreams();
                 }
+                ConflictObj.resolveConflicts(document, conflictList);     
             }
             catch( IOException e ){
                 /*
@@ -226,6 +233,7 @@
             pdfSource.close();
         }
     }
+    
     /**
      * Skip to the start of the next object.  This is used to recover
      * from a corrupt object. This should handle all cases that parseObject
@@ -299,14 +307,14 @@
          */
         if (header.startsWith(PDF_HEADER)) {
             if(!header.matches(PDF_HEADER + "\\d.\\d")) {
-                String headerGarbage = header.substring(PDF_HEADER.length()+3, header.length());
+                String headerGarbage = header.substring(PDF_HEADER.length()+3, header.length()) + "\n";
                 header = header.substring(0, PDF_HEADER.length()+3);
                 pdfSource.unread(headerGarbage.getBytes());
             }
         }
         else {
             if(!header.matches(FDF_HEADER + "\\d.\\d")) {
-                String headerGarbage = header.substring(FDF_HEADER.length()+3, header.length());
+                String headerGarbage = header.substring(FDF_HEADER.length()+3, header.length()) + "\n";
                 header = header.substring(0, FDF_HEADER.length()+3);
                 pdfSource.unread(headerGarbage.getBytes());
             }
@@ -383,6 +391,7 @@
      * @throws IOException If an IO error occurs.
      */
     private boolean parseObject() throws IOException{
+        int currentObjByteOffset = pdfSource.getOffset();
         boolean isEndOfFile = false; 
         skipSpaces();
         //peek at the next character to determine the type of object we are parsing
@@ -488,10 +497,20 @@
                 }
                 endObjectKey = readString();
             }
+            
             COSObjectKey key = new COSObjectKey( number, genNum );
             COSObject pdfObject = document.getObjectFromPool( key );
-            pdfObject.setObject(pb);
-
+            if(pdfObject.getObject() == null){
+                pdfObject.setObject(pb);
+            }
+            /*
+             * If the object we returned already has a baseobject, then we have a conflict
+             * which we will resolve using information after we parse the xref table.
+             */
+            else{
+                addObjectToConflicts(currentObjByteOffset, key, pb); 
+            }
+            
             if( !endObjectKey.equals( "endobj" ) )
             {
                 if( !pdfSource.isEOF() )
@@ -533,6 +552,22 @@
         }
         return isEndOfFile;
     }
+    
+   /**
+    * Adds a new ConflictObj to the conflictList
+    * @param offset the offset of the ConflictObj
+    * @param key The COSObjectKey of this object
+    * @param pb The COSBase of this conflictObj
+    * @throws IOException
+    */
+    private void addObjectToConflicts(int offset, COSObjectKey key, COSBase pb) throws IOException{
+        COSObject obj = new COSObject(null);
+        obj.setObjectNumber( new COSInteger( key.getNumber() ) );
+        obj.setGenerationNumber( new COSInteger( key.getGeneration() ) );
+        obj.setObject(pb);
+        ConflictObj conflictObj = new ConflictObj(offset, key, obj);
+        conflictList.add(conflictObj);   
+    }
 
     /**
      * This will parse the startxref section from the stream.
@@ -578,7 +613,7 @@
          * Each starts with a starting object id and a count.
          */
         while(true){
-            int start = readInt(); // first obj id
+            int currObjID = readInt(); // first obj id
             int count = readInt(); // the number of objects in the xref table
             skipSpaces();
             for(int i = 0; i < count; i++){
@@ -589,10 +624,25 @@
                     break;
                 }
                 //Ignore table contents
-                readLine();
+                String currentLine = readLine();
+                String[] splitString = currentLine.split(" ");
+                if(splitString[2].equals("n")){
+                    try{
+                        int currOffset = Integer.parseInt(splitString[0]);
+                        int currGenID = Integer.parseInt(splitString[1]);
+                        COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
+                        document.setXRef(objKey, currOffset);
+                    }
+                    catch(NumberFormatException e){
+                        throw new IOException(e.getMessage());
+                    }
+                }
+                else if(!splitString[2].equals("f")){
+                    throw new IOException("Corrupt XRefTable Entry - ObjID:" + currObjID);
+                }
+                currObjID++;
                 skipSpaces();
             }
-            addXref(new PDFXref(start, count));
             skipSpaces();
             char c = (char)pdfSource.peek();
             if(c < '0' || c > '9'){
@@ -622,6 +672,7 @@
             if (nextLine.startsWith("trailer")) {
                 byte[] b = nextLine.getBytes();
                 int len = "trailer".length();
+                pdfSource.unread('\n');
                 pdfSource.unread(b, len, b.length-len);
             }
             else {
@@ -647,4 +698,50 @@
         skipSpaces();
         return true;
     }
+    
+    /*
+     * Used to resolve conflicts when a PDF Document has multiple objects with
+     * the same id number. Ideally, we could use the Xref table when parsing
+     * the document to be able to determine which of the objects with the same ID
+     * is correct, but we do not have access to the Xref Table during parsing.
+     * Instead, we queue up the conflicts and resolve them after the Xref has
+     * been parsed. The Objects listed in the Xref Table are kept and the 
+     * others are ignored. 
+     */
+    private static class ConflictObj{
+
+        private int offset;
+        private COSObjectKey key;
+        private COSObject pdfObject;
+        
+        public ConflictObj(int offset, COSObjectKey key,
+                COSObject pdfObject) {
+            this.offset = offset;
+            this.key = key;
+            this.pdfObject = pdfObject;
+        }
+        public String toString(){
+            return "Object(" + offset + ", " + key + ")";
+        }
+        
+        /**
+         * Sometimes pdf files have objects with the same ID number yet are
+         * not referenced by the Xref table and therefore should be excluded.             
+         * This method goes through the conflicts list and replaces the object stored
+         * in the objects array with this one if it is referenced by the xref
+         * table. 
+         * @throws IOException
+         */
+        private static void resolveConflicts(COSDocument document, List conflictList) throws IOException{
+            Iterator conflicts = conflictList.iterator();
+            while(conflicts.hasNext()){
+                ConflictObj o = (ConflictObj)conflicts.next();
+                Integer offset = new Integer(o.offset);
+                if(document.getXrefTable().containsValue(offset)){
+                    COSObject pdfObject = document.getObjectFromPool(o.key);
+                    pdfObject.setObject(o.pdfObject.getObject());
+                }
+            }
+        }
+    }
 }
Index: src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
===================================================================
--- src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java	(revision 767669)
+++ src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java	(working copy)
@@ -21,8 +21,6 @@
 import java.io.IOException;
 import java.io.OutputStream;
 
-import java.util.ArrayList;
-import java.util.List;
 import java.util.logging.Level;
 
 import org.apache.pdfbox.io.ByteArrayPushBackInputStream;
@@ -72,16 +70,9 @@
     //protected PushBackByteArrayStream pdfSource;
     protected PushBackInputStream pdfSource;
 
+    protected COSDocument document;
+    
     /**
-     * moved xref here, is a persistence construct
-     * maybe not needed anyway when not read from behind with delayed
-     * access to objects.
-     */
-    private List xrefs = new ArrayList();
-
-    private COSDocument document;
-
-    /**
      * Constructor.
      *
      * @param input The input stream to read the data from.
@@ -1066,34 +1057,23 @@
     }
 
     /**
-     * This will read bytes until the end of line marker occurs.
+     * This will read bytes until the first end of line marker occurs.
+     * Note: if you later unread the results of this function, you'll
+     * need to add a newline character to the end of the string.
      *
      * @return The characters between the current position and the end of the line.
      *
      * @throws IOException If there is an error reading from the stream.
      */
-    protected String readLine() throws IOException
-    {
-        int c = pdfSource.read();
-        while(isWhitespace(c) && c != -1)
-        {
-            c = pdfSource.read();
-        }
+    protected String readLine() throws IOException {
         StringBuffer buffer = new StringBuffer( 11 );
-
-        while( !isEOL(c) && c != -1 )
-        {
+        
+        int c;
+        while ((c = pdfSource.read()) != -1) {
+            if (isEOL(c))
+                break;
             buffer.append( (char)c );
-            c = pdfSource.read();
         }
-        while( isEOL(c) && c != -1 )
-        {
-            c = pdfSource.read();
-        }
-        if (c != -1)
-        {
-            pdfSource.unread(c);
-        }
         return buffer.toString();
     }
 
@@ -1218,24 +1198,4 @@
         }
         return retval;
     }
-
-    /**
-     * This will add an xref.
-     *
-     * @param xref The xref to add.
-     */
-    public void addXref( PDFXref xref )
-    {
-        xrefs.add(xref);
-    }
-
-    /**
-     * This will get all of the xrefs.
-     *
-     * @return A list of all xrefs.
-     */
-    public List getXrefs()
-    {
-        return xrefs;
-    }
 }
Index: src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
===================================================================
--- src/main/java/org/apache/pdfbox/io/PushBackInputStream.java	(revision 767669)
+++ src/main/java/org/apache/pdfbox/io/PushBackInputStream.java	(working copy)
@@ -27,7 +27,11 @@
  */
 public class PushBackInputStream extends java.io.PushbackInputStream
 {
-
+    /*
+     * The current position in the file. 
+     */
+    private int offset = 0;
+    
     /**
      * Constructor.
      *
@@ -61,8 +65,65 @@
         }
         return result;
     }
-
+    
     /**
+     * Returns the current byte offset in the file
+     * @return the int byte offset
+     */
+    public int getOffset(){
+        return offset;
+    }
+    
+    /**
+     * {@inheritDoc} 
+     */
+    public int read() throws IOException{
+        int retval = super.read();
+        if (retval != -1)
+            offset++;
+        return retval;
+    }
+    
+    /**
+     * {@inheritDoc} 
+     */
+    public int read(byte[] b) throws IOException{
+        return this.read(b, 0, b.length);
+    }
+    /**
+     * {@inheritDoc} 
+     */
+    public int read(byte[] b, int off, int len) throws IOException{
+        int retval = super.read(b, off, len);
+        if (retval != -1)
+            offset += retval;
+        return retval;
+    }
+    
+    /**
+     * {@inheritDoc} 
+     */
+    public void unread(int b) throws IOException{
+        offset--;
+        super.unread(b);
+    }
+    
+    /**
+     * {@inheritDoc} 
+     */
+    public void unread(byte[] b) throws IOException{
+        this.unread(b, 0, b.length);
+    }
+    
+    /**
+     * {@inheritDoc} 
+     */
+    public void unread(byte[] b, int off, int len) throws IOException{
+        offset -= len;
+        super.unread(b, off, len);
+    }
+    
+    /**
      * A simple test to see if we are at the end of the stream.
      *
      * @return true if we are at the end of the stream.
