Index: src/changes/changes.xml
===================================================================
--- src/changes/changes.xml	(revision 1049113)
+++ src/changes/changes.xml	(working copy)
@@ -45,6 +45,9 @@
   </properties>
   <body>
     <release version="1.2" date="as in SVN" description="Release 1.2">
+      <action issue="COMPRESS-124" type="add" date="2010-12-15">
+        The TAR package now detects GNU sparse entries and skips them.
+      </action>
       <action issue="COMPRESS-122" type="add" date="2010-10-29">
         TarArchiveEntry provides access to the flags that determine
         whether it is an archived symbolic link, pipe or other
Index: src/main/java/org/apache/commons/compress/archivers/tar/package.html
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/package.html	(revision 1049113)
+++ src/main/java/org/apache/commons/compress/archivers/tar/package.html	(working copy)
@@ -24,7 +24,16 @@
     <p>There are many different format dialects that call themselves
       TAR.  The classes of this package can read and write archives in
       the traditional pre-POSIX <b>ustar</b> format and support GNU
-      specific extensions for long filenames that GNU tar itself by
-      now refers to as <b>oldgnu</b>.</p>
+      specific extensions for long filenames and sparse files that GNU
+      tar itself by now refers to as <b>oldgnu</b>.</p>
+
+    <p><b>ATTENTION:</b>GNU sparse files support is only partial implemented
+      in the way that sparse chunks are not written back during reading an
+      archive. Reading an archive with sparse files and writing those files
+      leads to invalid output files.<br>
+      It was not the intend of the contributor to offer fully implemented
+      GNU sparse file support, as it was only necessary to be able to
+      read archives containing sparse files without the need to actually
+      write the sparse files correctly.</p>
   </body>
 </html>
Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java	(revision 1049113)
+++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java	(working copy)
@@ -19,6 +19,7 @@
 package org.apache.commons.compress.archivers.tar;
 
 import java.io.File;
+import java.nio.ByteBuffer;
 import java.util.Date;
 import java.util.Locale;
 
@@ -51,7 +52,7 @@
  * defaults and the File is set to null.
  *
  * <p>
- * The C structure for a Tar Entry's header is:
+ * The C structure for a POSIX Tar Entry's header is:
  * <pre>
  * struct header {
  * char name[100];     // TarConstants.NAMELEN    - offset   0
@@ -77,6 +78,30 @@
  * All unused bytes are set to null.
  * New-style GNU tar files are slightly different from the above.
  * </pre>
+ *
+ * <p>
+ * The C structure for a old GNU Tar Entry's header is:
+ * <pre>
+ * struct oldgnu_header {
+ * char unused_pad1[345]; // TarConstants.PAD1LEN_GNU       - offset 0
+ * char atime[12];        // TarConstants.ATIMELEN_GNU      - offset 345
+ * char ctime[12];        // TarConstants.CTIMELEN_GNU      - offset 357
+ * char offset[12];       // TarConstants.OFFSETLEN_GNU     - offset 369
+ * char longnames[4];     // TarConstants.LONGNAMESLEN_GNU  - offset 381
+ * char unused_pad2;      // TarConstants.PAD2LEN_GNU       - offset 385
+ * struct sparse sp[4];   // TarConstants.SPARSELEN_GNU     - offset 386
+ * char isextended;       // TarConstants.ISEXTENDEDLEN_GNU - offset 482
+ * char realsize[12];     // TarConstants.REALSIZELEN_GNU   - offset 483
+ * char unused_pad[17];   // TarConstants.PAD3LEN_GNU       - offset 495
+ * };
+ * </pre>
+ * Whereas, "struct sparse" is:
+ * <pre>
+ * struct sparse {
+ * char offset[12];   // offset 0
+ * char numbytes[12]; // offset 12
+ * };
+ * </pre>
  * 
  * @NotThreadSafe
  */
@@ -123,6 +148,12 @@
     /** The entry's minor device number. */
     private int devMinor;
 
+    /** If an extension sparse header follows. */
+    private boolean isExtended;
+
+    /** The entry's real size in case of a sparse file. */
+    private long realSize;
+
     /** The entry's file reference */
     private File file;
 
@@ -538,8 +569,35 @@
         this.size = size;
     }
 
+    /**
+     * Indicates in case of a sparse file if an extension sparse header
+     * follows.
+     *
+     * @return true if an extension sparse header follows.
+     */
+    public boolean isExtended() {
+        return isExtended;
+    }
 
     /**
+     * Get this entry's real file size in case of a sparse file.
+     *
+     * @return This entry's real file size.
+     */
+    public long getRealSize() {
+        return realSize;
+    }
+
+    /**
+     * Indicate if this entry is a GNU sparse block 
+     *
+     * @return true if this is a sparse extension provided by GNU tar
+     */
+    public boolean isGNUSparse() {
+        return linkFlag == LF_GNUTYPE_SPARSE;
+    }
+
+    /**
      * Indicate if this entry is a GNU long name block
      *
      * @return true if this is a long name extension provided by GNU tar
@@ -749,14 +807,35 @@
         offset += DEVLEN;
         devMinor = (int) TarUtils.parseOctal(header, offset, DEVLEN);
         offset += DEVLEN;
-        String prefix = TarUtils.parseName(header, offset, PREFIXLEN);
-        // SunOS tar -E does not add / to directory names, so fix up to be consistent
-        if (isDirectory() && !name.endsWith("/")){
-            name = name + "/";
+
+        int type = evaluateType(header);
+        switch (type) {
+        	  case FORMAT_OLDGNU: {
+                offset += ATIMELEN_GNU;
+                offset += CTIMELEN_GNU;
+                offset += OFFSETLEN_GNU;
+                offset += LONGNAMESLEN_GNU;
+                offset += PAD2LEN_GNU;
+                offset += SPARSELEN_GNU;
+                isExtended = TarUtils.parseBoolean(header, offset);
+                offset += ISEXTENDEDLEN_GNU;
+                realSize = TarUtils.parseOctal(header, offset, REALSIZELEN_GNU);
+                offset += REALSIZELEN_GNU;
+                break;
+            }
+        	  case FORMAT_POSIX:
+            default: {
+                String prefix = TarUtils.parseName(header, offset, PREFIXLEN);
+                // SunOS tar -E does not add / to directory names, so fix up to be consistent
+                if (isDirectory() && !name.endsWith("/")){
+                  name = name + "/";
+                  }
+                if (prefix.length() >0){
+                  name = prefix + "/" + name;
+                  }
+                break;
+            }
         }
-        if (prefix.length() >0){
-            name = prefix + "/" + name;
-        }
     }
 
     /**
@@ -801,5 +880,20 @@
         }
         return fileName;
     }
+
+    /**
+     * Evaluate an entry's header format from a header buffer.
+     *
+     * @param header The tar entry header buffer to evaluate the format for.
+     * @return format type
+     */
+    private int evaluateType(byte[] header) {
+        final ByteBuffer magic = ByteBuffer.wrap(header, MAGIC_OFFSET, MAGICLEN);
+        if (magic.compareTo(ByteBuffer.wrap(MAGIC_GNU.getBytes())) == 0)
+            return FORMAT_OLDGNU;
+        if (magic.compareTo(ByteBuffer.wrap(MAGIC_POSIX.getBytes())) == 0)
+            return FORMAT_POSIX;
+        return 0;
+    }
 }
 
Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java	(revision 1049113)
+++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java	(working copy)
@@ -185,23 +185,17 @@
             readBuf = null;
         }
 
-        byte[] headerBuf = buffer.readRecord();
-
-        if (headerBuf == null) {
-            hasHitEOF = true;
-        } else if (buffer.isEOFRecord(headerBuf)) {
-            hasHitEOF = true;
-        }
-
+        byte[] headerBuf = getRecord();
         if (hasHitEOF) {
             currEntry = null;
-        } else {
-            currEntry = new TarArchiveEntry(headerBuf);
-            entryOffset = 0;
-            entrySize = currEntry.getSize();
+            return null;
         }
 
-        if (currEntry != null && currEntry.isGNULongNameEntry()) {
+        currEntry = new TarArchiveEntry(headerBuf);
+        entryOffset = 0;
+        entrySize = currEntry.getSize();
+
+        if (currEntry.isGNULongNameEntry()) {
             // read in the name
             StringBuffer longName = new StringBuffer();
             byte[] buf = new byte[SMALL_BUFFER_SIZE];
@@ -223,13 +217,45 @@
             currEntry.setName(longName.toString());
         }
 
-        if (currEntry != null && currEntry.isPaxHeader()){ // Process Pax headers
+        if (currEntry.isPaxHeader()){ // Process Pax headers
             paxHeaders();
         }
 
+        if (currEntry.isGNUSparse()){ // Process sparse files
+            readGNUSparse();
+        }
+
         return currEntry;
     }
 
+    /**
+     * Get the next record in this tar archive. This will skip
+     * over any remaining data in the current entry, if there
+     * is one, and place the input stream at the header of the
+     * next entry.
+     * If there are no more entries in the archive, null will
+     * be returned to indicate that the end of the archive has
+     * been reached.
+     *
+     * @return The next header in the archive, or null.
+     * @throws IOException on error
+     */
+    private byte[] getRecord() throws IOException {
+        if (hasHitEOF) {
+            return null;
+        }
+
+        byte[] headerBuf = buffer.readRecord();
+
+        if (headerBuf == null) {
+            hasHitEOF = true;
+        } else if (buffer.isEOFRecord(headerBuf)) {
+            hasHitEOF = true;
+        }
+
+        return hasHitEOF ? null : headerBuf;
+    }
+
     private void paxHeaders() throws IOException{
         BufferedReader br = new BufferedReader(new InputStreamReader(this, "UTF-8"));
         Map headers = new HashMap();
@@ -301,6 +327,35 @@
         }
     }
 
+    /**
+     * Adds the sparse chunks from the current entry to the sparse chunks,
+     * including any additional sparse entries following the current entry.
+     * 
+     * @throws IOException on error 
+     * 
+     * @todo Sparse files get not yet really processed. 
+     */
+    private void readGNUSparse() throws IOException {
+        /* we do not really process sparse files yet
+        sparses = new ArrayList();
+        sparses.addAll(currEntry.getSparses());
+        */
+        if (currEntry.isExtended()) {
+            TarArchiveSparseEntry entry;
+            do {
+                byte[] headerBuf = getRecord();
+                if (hasHitEOF) {
+                    currEntry = null;
+                    break;
+                }
+                entry = new TarArchiveSparseEntry(headerBuf);
+                /* we do not really process sparse files yet
+                sparses.addAll(entry.getSparses());
+                */
+            } while (entry.isExtended());
+        }
+    }
+
     public ArchiveEntry getNextEntry() throws IOException {
         return getNextTarEntry();
     }
@@ -317,8 +372,13 @@
      * @param numToRead The number of bytes to read.
      * @return The number of bytes read, or -1 at EOF.
      * @throws IOException on error
+     * 
+     * @todo Sparse files get not yet really processed. 
      */
     public int read(byte[] buf, int offset, int numToRead) throws IOException {
+        /* we do not really process sparse files yet
+         * fill in sparse chunks into the the reading 
+         */
         int totalRead = 0;
 
         if (entryOffset >= entrySize) {
Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java	(revision 0)
+++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java	(revision 0)
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.archivers.tar;
+
+import java.io.IOException;
+
+/**
+ * This class represents a sparse entry in a Tar archive.
+ *
+ * <p>
+ * The C structure for a sparse entry is:
+ * <pre>
+ * struct posix_header {
+ * struct sparse sp[21]; // TarConstants.SPARSELEN_GNU_SPARSE     - offset 0
+ * char isextended;      // TarConstants.ISEXTENDEDLEN_GNU_SPARSE - offset 504
+ * };
+ * </pre>
+ * Whereas, "struct sparse" is:
+ * <pre>
+ * struct sparse {
+ * char offset[12];   // offset 0
+ * char numbytes[12]; // offset 12
+ * };
+ * </pre>
+ */
+
+public class TarArchiveSparseEntry implements TarConstants {
+    /** If an extension sparse header follows. */
+    private boolean isExtended;
+
+    /**
+     * Construct an entry from an archive's header bytes. File is set
+     * to null.
+     *
+     * @param headerBuf The header bytes from a tar archive entry.
+     * @throws IOException on unknown format
+     */
+    public TarArchiveSparseEntry(byte[] headerBuf) throws IOException {
+        int offset = 0;
+        offset += SPARSELEN_GNU_SPARSE;
+        isExtended = TarUtils.parseBoolean(headerBuf, offset);
+    }
+
+    public boolean isExtended() {
+        return isExtended;
+    }
+}
Index: src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java	(revision 1049113)
+++ src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java	(working copy)
@@ -21,11 +21,23 @@
 /**
  * This interface contains all the definitions used in the package.
  *
+ * For tar formats (FORMAT_OLDGNU, FORMAT_POSIX, etc.) see GNU tar
+ * <I>tar.h</I> type <I>enum archive_format</I>
  */
 // CheckStyle:InterfaceIsTypeCheck OFF (bc)
 public interface TarConstants {
 
     /**
+     * GNU format as per before tar 1.12.
+     */
+    int    FORMAT_OLDGNU = 2;
+
+    /**
+     * Pure Posix format.
+     */
+    int    FORMAT_POSIX = 3;
+
+    /**
      * The length of the name field in a header buffer.
      */
     int    NAMELEN = 100;
@@ -102,6 +114,66 @@
     int    PREFIXLEN = 155;
 
     /**
+     * The length of the access time field in an old GNU header buffer.
+     * 
+     */
+    int    ATIMELEN_GNU = 12;
+
+    /**
+     * The length of the created time field in an old GNU header buffer.
+     * 
+     */
+    int    CTIMELEN_GNU = 12;
+
+    /**
+     * The length of the multivolume start offset field in an old GNU header buffer. 
+     * 
+     */
+    int    OFFSETLEN_GNU = 12;
+
+    /**
+     * The length of the long names field in an old GNU header buffer. 
+     * 
+     */
+    int    LONGNAMESLEN_GNU = 4;
+
+    /**
+     * The length of the padding field in an old GNU header buffer. 
+     * 
+     */
+    int    PAD2LEN_GNU = 1;
+
+    /**
+     * The sum of the length of all sparse headers in an old GNU header buffer. 
+     * 
+     */
+    int    SPARSELEN_GNU = 96;
+
+    /**
+     * The length of the is extension field in an old GNU header buffer. 
+     * 
+     */
+    int    ISEXTENDEDLEN_GNU = 1;
+
+    /**
+     * The length of the real size field in an old GNU header buffer. 
+     * 
+     */
+    int    REALSIZELEN_GNU = 12;
+
+    /**
+     * The sum of the length of all sparse headers in a sparse header buffer. 
+     * 
+     */
+    int    SPARSELEN_GNU_SPARSE = 504;
+
+    /**
+     * The length of the is extension field in a sparse header buffer. 
+     * 
+     */
+    int    ISEXTENDEDLEN_GNU_SPARSE = 1;
+
+    /**
      * LF_ constants represent the "link flag" of an entry, or more commonly,
      * the "entry type". This is the "old way" of indicating a normal file.
      */
@@ -152,6 +224,12 @@
      */
     byte LF_GNUTYPE_LONGNAME = (byte) 'L';
 
+    /**
+     * Sparse file type.
+     * @since Apache Commons Compress 1.1.1
+     */
+    byte LF_GNUTYPE_SPARSE = (byte) 'S';
+
     // See "http://www.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_02"
 
     /**
Index: src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
===================================================================
--- src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java	(revision 1049113)
+++ src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java	(working copy)
@@ -106,6 +106,21 @@
         return result;
     }
 
+    /**
+     * Parse a boolean byte from a buffer.
+     * Leading spaces and NUL are ignored.
+     * The buffer may contain trailing spaces or NULs.
+     *
+     * @param buffer The buffer from which to parse.
+     * @param offset The offset into the buffer from which to parse.
+     * @param length The maximum number of bytes to parse - must be at least 1 byte.
+     * @return The boolean value of the bytes.
+     * @throws IllegalArgumentException if an invalid byte is detected.
+     */
+    public static boolean parseBoolean(final byte[] buffer, final int offset) {
+        return (buffer[offset] == 1);
+    }
+
     // Helper method to generate the exception message
     private static String exceptionMessage(byte[] buffer, final int offset,
             final int length, int current, final byte currentByte) {
Index: src/site/xdoc/index.xml
===================================================================
--- src/site/xdoc/index.xml	(revision 1049113)
+++ src/site/xdoc/index.xml	(working copy)
@@ -69,7 +69,9 @@
           <p>The ar, cpio, tar and zip formats are supported as
             archivers where the <a href="zip.html">zip</a>
             implementation provides capabilities that go beyond the
-            features found in java.util.zip.</p>
+            features found in java.util.zip and the <a href="tar.html">tar</a>
+            implementation is able to handle archives containing so called
+            GNU sparse files.</p>
 
           <p>The compress component provides abstract base classes for
             compressors and archivers together with factories that can
Index: src/site/xdoc/tar.xml
===================================================================
--- src/site/xdoc/tar.xml	(revision 0)
+++ src/site/xdoc/tar.xml	(revision 0)
@@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+<!--
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-->
+<document>
+  <properties>
+    <title>Commons Compress TAR package</title>
+    <author email="dev@commons.apache.org">Commons Documentation Team</author>
+  </properties>
+  <body>
+    <section name="The TAR package">
+
+      <p>The TAR package provides features not found
+        in other Java implementations:</p>
+
+      <ul>
+        <li>Support for sparse files in old GNU format.</li>
+      </ul>
+
+      <subsection name="Old GNU format sparse files">
+
+        <p>With <a href="http://www.gnu.org/software/tar/">GNU Tar</a>
+          one can generate TAR archives storing so called GNU sparse files
+          if invoking GNU Tar with the <code>--sparse</code> option (see the
+          <a href="http://www.gnu.org/software/tar/manual/tar.html#SEC134">
+          online manual</a> for further details).</p>
+
+        <p>At the time of writing the support for sparse files, it was not
+          necessary to correctly extract in a TAR package contained sparse
+          files. As the extraction of such is tedious, it was decided to
+          enhance the TAR package in a way that it at least extracts all
+          non-sparse files correctly but simply skips any contained sparse
+          file chunks. This means any extracted sparse files are corrupt as
+          their respective sparse chunks are not filled back in during the
+          extraction.</p>
+
+      </subsection>
+
+    </section>
+  </body>
+</document>
