Index: src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java =================================================================== --- src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java (revision 1297936) +++ src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java (working copy) @@ -25,19 +25,35 @@ public void testName(){ byte [] buff = new byte[20]; - String sb1 = "abcdefghijklmnopqrstuvwxyz"; - int off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1); + String sb1 = "abcefghijklmnopqrstuvwxyz"; + int off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1, TarArchiveCodec.CODEC_8BIT); assertEquals(off, 20); - String sb2 = TarUtils.parseName(buff, 1, 10); + String sb2 = TarUtils.parseName(buff, 1, 10, TarArchiveCodec.CODEC_8BIT); assertEquals(sb2,sb1.substring(0,10)); - sb2 = TarUtils.parseName(buff, 1, 19); + sb2 = TarUtils.parseName(buff, 1, 19, TarArchiveCodec.CODEC_8BIT); assertEquals(sb2,sb1.substring(0,19)); buff = new byte[30]; - off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1); + off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1, TarArchiveCodec.CODEC_8BIT); assertEquals(off, 30); - sb2 = TarUtils.parseName(buff, 1, buff.length-1); + sb2 = TarUtils.parseName(buff, 1, buff.length-1, TarArchiveCodec.CODEC_8BIT); assertEquals(sb1, sb2); } + + public void testNameUTF8() throws Exception{ + byte [] buff = new byte[20]; + String sb1 = "abcd\uC3A8efghijklmnopqrstuvwxyz"; + int off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1, TarArchiveCodec.CODEC_UTF8); + assertEquals(off, 20); + String sb2 = TarUtils.parseName(buff, 1, 10, TarArchiveCodec.CODEC_UTF8); + assertEquals(sb2,sb1.substring(0,8)); + sb2 = TarUtils.parseName(buff, 1, 19, TarArchiveCodec.CODEC_UTF8); + assertEquals(sb2,sb1.substring(0,17)); + buff = new byte[30]; + off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1, TarArchiveCodec.CODEC_UTF8); + assertEquals(off, 30); + sb2 = TarUtils.parseName(buff, 1, buff.length-1, TarArchiveCodec.CODEC_UTF8); + assertEquals(sb1, sb2); + } public void testParseOctal() throws Exception{ long value; @@ -198,8 +214,8 @@ private void checkName(String string) { byte buff[] = new byte[100]; - int len = TarUtils.formatNameBytes(string, buff, 0, buff.length); - assertEquals(string, TarUtils.parseName(buff, 0, len)); + int len = TarUtils.formatNameBytes(string, buff, 0, buff.length, TarArchiveCodec.CODEC_8BIT); + assertEquals(string, TarUtils.parseName(buff, 0, len, TarArchiveCodec.CODEC_8BIT)); } public void testReadNegativeBinary8Byte() { Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java =================================================================== --- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (revision 1297936) +++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (working copy) @@ -52,32 +52,48 @@ private byte[] readBuf; protected final TarBuffer buffer; private TarArchiveEntry currEntry; + private TarArchiveCodec codec; /** * Constructor for TarInputStream. * @param is the input stream to use */ public TarArchiveInputStream(InputStream is) { - this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); + this(is, TarArchiveCodec.CODEC_8BIT, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); } /** * Constructor for TarInputStream. * @param is the input stream to use + * @param encoding the encoding to use for converting from + * bytes to file names. + */ + public TarArchiveInputStream(InputStream is, String encoding) { + this(is, TarArchiveCodec.getArchiveCodec(encoding), + TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param codec the codec to use for converting file names * @param blockSize the block size to use */ - public TarArchiveInputStream(InputStream is, int blockSize) { - this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); + public TarArchiveInputStream(InputStream is, TarArchiveCodec codec, int blockSize) { + this(is, codec, blockSize, TarBuffer.DEFAULT_RCDSIZE); } /** * Constructor for TarInputStream. * @param is the input stream to use + * @param codec the codec to use for converting file names * @param blockSize the block size to use * @param recordSize the record size to use */ - public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { + public TarArchiveInputStream(InputStream is, TarArchiveCodec codec, + int blockSize, int recordSize) { this.buffer = new TarBuffer(is, blockSize, recordSize); + this.codec = codec; this.readBuf = null; this.hasHitEOF = false; } @@ -196,7 +212,7 @@ } try { - currEntry = new TarArchiveEntry(headerBuf); + currEntry = new TarArchiveEntry(headerBuf, codec); } catch (IllegalArgumentException e) { IOException ioe = new IOException("Error detected parsing the header"); ioe.initCause(e); Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java =================================================================== --- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (revision 1297936) +++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (working copy) @@ -67,6 +67,8 @@ private int longFileMode = LONGFILE_ERROR; private int bigFileMode = BIGFILE_ERROR; + private TarArchiveCodec codec; + private boolean closed = false; /** Indicates if putArchiveEntry has been called without closeArchiveEntry */ @@ -82,28 +84,41 @@ * @param os the output stream to use */ public TarArchiveOutputStream(OutputStream os) { - this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); + this(os, TarArchiveCodec.CODEC_8BIT, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); } /** * Constructor for TarInputStream. * @param os the output stream to use + * @param encoding the encoding to use for converting name to bytes. + */ + public TarArchiveOutputStream(OutputStream os, String encoding) { + this(os, TarArchiveCodec.getArchiveCodec(encoding), + TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); + } + + + /** + * Constructor for TarInputStream. + * @param os the output stream to use * @param blockSize the block size to use */ public TarArchiveOutputStream(OutputStream os, int blockSize) { - this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE); + this(os, TarArchiveCodec.CODEC_8BIT, blockSize, TarBuffer.DEFAULT_RCDSIZE); } /** * Constructor for TarInputStream. * @param os the output stream to use + * @param codec the codec to use for converting name to bytes * @param blockSize the block size to use * @param recordSize the record size to use */ - public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { + public TarArchiveOutputStream(OutputStream os, TarArchiveCodec codec, int blockSize, int recordSize) { out = new CountingOutputStream(os); this.buffer = new TarBuffer(out, blockSize, recordSize); + this.codec = codec; this.assemLen = 0; this.assemBuf = new byte[recordSize]; this.recordBuf = new byte[recordSize]; @@ -247,7 +262,7 @@ writePaxHeaders(entry.getName(), paxHeaders); } - entry.writeEntryHeader(recordBuf, bigFileMode == BIGFILE_STAR); + entry.writeEntryHeader(recordBuf, bigFileMode == BIGFILE_STAR, codec); buffer.writeRecord(recordBuf); currBytes = 0; Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveCodec.java =================================================================== --- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveCodec.java (revision 0) +++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveCodec.java (revision 0) @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import java.io.UnsupportedEncodingException; + +/** + * Class for encoder/decoder functionality which can be + * used to encode / decode the file names in encodings other + * than the standard 8bit only. + */ +abstract class TarArchiveCodec { + + final static TarArchiveCodec CODEC_8BIT = new Tar8BitArchiveCodec(); + final static TarArchiveCodec CODEC_UTF8 = new TarDefaultArchiveCodec("UTF-8"); + + /** + * The decoding method for decoding the file name. + * @param bytes the byte array containing the encoded name. + * @param offset the offset in the byte array to start decoding the name from. + * @param len the number of bytes in the byte array that make up the name. + * @return the string representation of the name. + * @throws UnsupportedEncodingException thrown when using an unsupported encoding. + */ + abstract String decode(byte[] bytes, int offset, int len) throws UnsupportedEncodingException; + + /** + * The encoding method for encoding the file name. + * @param bytes the byte array where the name is to be stored. + * @param offset the offset in the byte array to start encoding the name to. + * @param len the maximum number of bytes to copy into the byte array. + * @throws UnsupportedEncodingException thrown when using an unsupported encoding. + */ + abstract void encode(String name, byte[] bytes, int offset, int len) throws UnsupportedEncodingException; + + /** + * @param encoding the encoding to use for en/decoding file names. + * @return the codec instance configured for the specified encoding. + */ + static TarArchiveCodec getArchiveCodec(String encoding) { + return new TarDefaultArchiveCodec(encoding); + } +} +/** + * archive codec with standard 8bit naming functionality. + */ +class Tar8BitArchiveCodec extends TarArchiveCodec { + + /** + * {@inheritDoc} + * @see org.apache.commons.compress.archivers.tar.TarArchiveCodec#decode(byte[], int, int) + */ + @Override + public String decode(byte[] bytes, int offset, int len) { + + StringBuffer result = new StringBuffer(len); + int end = offset + len; + + for (int i = offset; i < end; ++i) { + byte b = bytes[i]; + result.append((char) (b & 0xFF)); // Allow for sign-extension + } + return result.toString(); + } + + /** + * {@inheritDoc} + * @see org.apache.commons.compress.archivers.tar.TarArchiveCodec#encode(java.lang.String, byte[], int, int) + */ + @Override + void encode(String name, byte[] bytes, int offset, int len) + throws UnsupportedEncodingException { + + for (int i = 0; i < len && i < name.length(); ++i) { + bytes[offset + i] = (byte) name.charAt(i); + } + } +} + +/** + * archive codec which uses standard string encoding + * mechanisms for en/decoding names. + */ +class TarDefaultArchiveCodec extends TarArchiveCodec { + + String encoding; + + /** + * @param encoding the encoding to use for de/coding the + * file names. + */ + TarDefaultArchiveCodec(String encoding) { + this.encoding = encoding; + } + + /** + * {@inheritDoc} + * @see org.apache.commons.compress.archivers.tar.TarArchiveCodec#decode(byte[], int, int) + */ + @Override + public String decode(byte[] bytes, int offset, int len) + throws UnsupportedEncodingException { + + return new String(bytes, offset, len, encoding); + } + + /** + * {@inheritDoc} + * @see org.apache.commons.compress.archivers.tar.TarArchiveCodec#encode(java.lang.String, byte[], int, int) + */ + @Override + void encode(String name, byte[] bytes, int offset, int len) + throws UnsupportedEncodingException { + + byte []encoded = name.getBytes(encoding); + System.arraycopy(encoded, 0, bytes, offset, len); + } +} Index: src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java =================================================================== --- src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (revision 1297936) +++ src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (working copy) @@ -18,7 +18,9 @@ */ package org.apache.commons.compress.archivers.tar; +import java.io.UnsupportedEncodingException; import java.math.BigInteger; +import java.util.Arrays; /** * This class provides static utility methods to work with byte streams. @@ -208,21 +210,23 @@ * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse. + * @param codec The codec to use for de/coding the file names. + * @throws IllegalArgumentException if the codec is misconfigured. * @return The entry name. */ - public static String parseName(byte[] buffer, final int offset, final int length) { - StringBuffer result = new StringBuffer(length); - int end = offset + length; - - for (int i = offset; i < end; ++i) { - byte b = buffer[i]; - if (b == 0) { // Trailing null - break; - } - result.append((char) (b & 0xFF)); // Allow for sign-extension - } - - return result.toString(); + public static String parseName(byte[] buffer, final int offset, + final int length, TarArchiveCodec codec) { + + int len = 0; + while (len < length && buffer[offset + len] != '\0') { + len++; + } + + try { + return codec.decode(buffer, offset, len); + } catch (UnsupportedEncodingException ne) { + throw new IllegalArgumentException(ne); + } } /** @@ -238,21 +242,21 @@ * @param buf The buffer where the name is to be stored. * @param offset The starting offset into the buffer * @param length The maximum number of header bytes to copy. + * @param codec The codec to use for encoding the name. * @return The updated offset, i.e. offset + length + * @throws IllegalArgumentException thrown when the codec is + * misconfigured. */ - public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { - int i; + public static int formatNameBytes(String name, byte[] buf, final int offset, + final int length, TarArchiveCodec codec) { - // copy until end of input or output is reached. - for (i = 0; i < length && i < name.length(); ++i) { - buf[offset + i] = (byte) name.charAt(i); + Arrays.fill(buf, offset, offset + length, (byte)0); + try { + codec.encode(name, buf, offset, length); + } catch (UnsupportedEncodingException u) { + throw new IllegalArgumentException("Unsupported encoding", u); } - // Pad any remaining output bytes with NUL - for (; i < length; ++i) { - buf[offset + i] = 0; - } - return offset + length; } Index: src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java =================================================================== --- src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java (revision 1297936) +++ src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java (working copy) @@ -311,10 +311,24 @@ */ public TarArchiveEntry(byte[] headerBuf) { this(); - parseTarHeader(headerBuf); + parseTarHeader(headerBuf, TarArchiveCodec.CODEC_8BIT); } + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @param codec The codec to use when parsing the file name. + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public TarArchiveEntry(byte[] headerBuf, TarArchiveCodec codec) { + this(); + parseTarHeader(headerBuf, codec); + } + + /** * Determine if the two entries are equal. Equality is determined * by the header names being equal. * @@ -747,22 +761,35 @@ * @param outbuf The tar entry header buffer to fill in. */ public void writeEntryHeader(byte[] outbuf) { - writeEntryHeader(outbuf, false); + writeEntryHeader(outbuf, false, TarArchiveCodec.CODEC_8BIT); } /** * Write an entry's header information to a header buffer. * + *
This method does not use the star/GNU tar/BSD tar extensions.
+ * * @param outbuf The tar entry header buffer to fill in. + * @param encoding The encoding to use when converting name to bytes. + */ + public void writeEntryHeader(byte[] outbuf, String encoding) { + writeEntryHeader(outbuf, false, TarArchiveCodec.getArchiveCodec(encoding)); + } + + /** + * Write an entry's header information to a header buffer. + * + * @param outbuf The tar entry header buffer to fill in. * @param starMode whether to use the star/GNU tar/BSD tar * extension for numeric fields if their value doesn't fit in the * maximum size of standard tar archives + * @param codec The codec to use for encoding names to bytes. * @since Apache Commons Compress 1.4 */ - public void writeEntryHeader(byte[] outbuf, boolean starMode) { + public void writeEntryHeader(byte[] outbuf, boolean starMode, TarArchiveCodec codec) { int offset = 0; - offset = TarUtils.formatNameBytes(name, outbuf, offset, NAMELEN); + offset = TarUtils.formatNameBytes(name, outbuf, offset, NAMELEN, codec); offset = writeEntryHeaderField(mode, outbuf, offset, MODELEN, starMode); offset = writeEntryHeaderField(userId, outbuf, offset, UIDLEN, starMode); @@ -779,11 +806,11 @@ } outbuf[offset++] = linkFlag; - offset = TarUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN); - offset = TarUtils.formatNameBytes(magic, outbuf, offset, MAGICLEN); - offset = TarUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN); - offset = TarUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN); - offset = TarUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN); + offset = TarUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN, codec); + offset = TarUtils.formatNameBytes(magic, outbuf, offset, MAGICLEN, codec); + offset = TarUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN, codec); + offset = TarUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN, codec); + offset = TarUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN, codec); offset = writeEntryHeaderField(devMajor, outbuf, offset, DEVLEN, starMode); offset = writeEntryHeaderField(devMinor, outbuf, offset, DEVLEN, @@ -815,12 +842,13 @@ * Parse an entry's header information from a header buffer. * * @param header The tar entry header buffer to get information from. + * @param codec the encoder/decoder to use when parsing file name's. * @throws IllegalArgumentException if any of the numeric fields have an invalid format */ - public void parseTarHeader(byte[] header) { + public void parseTarHeader(byte[] header, TarArchiveCodec codec) { int offset = 0; - name = TarUtils.parseName(header, offset, NAMELEN); + name = TarUtils.parseName(header, offset, NAMELEN, codec); offset += NAMELEN; mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN); offset += MODELEN; @@ -834,15 +862,15 @@ offset += MODTIMELEN; offset += CHKSUMLEN; linkFlag = header[offset++]; - linkName = TarUtils.parseName(header, offset, NAMELEN); + linkName = TarUtils.parseName(header, offset, NAMELEN, codec); offset += NAMELEN; - magic = TarUtils.parseName(header, offset, MAGICLEN); + magic = TarUtils.parseName(header, offset, MAGICLEN, codec); offset += MAGICLEN; - version = TarUtils.parseName(header, offset, VERSIONLEN); + version = TarUtils.parseName(header, offset, VERSIONLEN, codec); offset += VERSIONLEN; - userName = TarUtils.parseName(header, offset, UNAMELEN); + userName = TarUtils.parseName(header, offset, UNAMELEN, codec); offset += UNAMELEN; - groupName = TarUtils.parseName(header, offset, GNAMELEN); + groupName = TarUtils.parseName(header, offset, GNAMELEN, codec); offset += GNAMELEN; devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); offset += DEVLEN; @@ -866,7 +894,7 @@ } case FORMAT_POSIX: default: { - String prefix = TarUtils.parseName(header, offset, PREFIXLEN); + String prefix = TarUtils.parseName(header, offset, PREFIXLEN, codec); // SunOS tar -E does not add / to directory names, so fix // up to be consistent if (isDirectory() && !name.endsWith("/")){ Index: pom.xml =================================================================== --- pom.xml (revision 1297936) +++ pom.xml (working copy) @@ -129,17 +129,6 @@