Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 0)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(revision 0)
@@ -0,0 +1,143 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.zip.Inflater;
+import java.util.zip.InflaterInputStream;
+import java.util.zip.DeflaterOutputStream;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ *  A collection of utility methods for working on deflated data.
+ */
+public class DeflateUtils {
+  
+  private static final Log LOG = LogFactory.getLog(DeflateUtils.class);
+  private static final int EXPECTED_COMPRESSION_RATIO= 5;
+  private static final int BUF_SIZE= 4096;
+
+  /**
+   * Returns an inflated copy of the input array.  If the deflated 
+   * input has been truncated or corrupted, a best-effort attempt is
+   * made to inflate as much as possible.  If no data can be extracted
+   * <code>null</code> is returned.
+   */
+  public static final byte[] inflateBestEffort(byte[] in) {
+    return inflateBestEffort(in, Integer.MAX_VALUE);
+  }
+
+  /**
+   * Returns an ginflateped copy of the input array, truncated to
+   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
+   * has been truncated or corrupted, a best-effort attempt is made to
+   * inflate as much as possible.  If no data can be extracted
+   * <code>null</code> is returned.
+   */
+  public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
+    // decompress using InflaterInputStream 
+    ByteArrayOutputStream outStream = 
+      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+
+    // "true" because HTTP does not provide zlib headers
+    Inflater inflater = new Inflater(true);
+    InflaterInputStream inStream = 
+      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+
+    byte[] buf = new byte[BUF_SIZE];
+    int written = 0;
+    while (true) {
+      try {
+	int size = inStream.read(buf);
+	if (size <= 0) 
+	  break;
+	if ((written + size) > sizeLimit) {
+	  outStream.write(buf, 0, sizeLimit - written);
+	  break;
+	}
+	outStream.write(buf, 0, size);
+	written+= size;
+      } catch (Exception e) {
+	LOG.info( "Caught Exception in inflateBestEffort" );
+        e.printStackTrace(LogUtil.getWarnStream(LOG));
+	break;
+      }
+    }
+    try {
+      outStream.close();
+    } catch (IOException e) {
+    }
+
+    return outStream.toByteArray();
+  }
+
+
+  /**
+   * Returns an ginflateped copy of the input array.  
+   * @throws IOException if the input cannot be properly decompressed
+   */
+  public static final byte[] inflate(byte[] in) throws IOException {
+    // decompress using InflaterInputStream 
+    ByteArrayOutputStream outStream = 
+      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+
+    InflaterInputStream inStream = 
+      new InflaterInputStream ( new ByteArrayInputStream(in) );
+
+    byte[] buf = new byte[BUF_SIZE];
+    while (true) {
+      int size = inStream.read(buf);
+      if (size <= 0) 
+        break;
+      outStream.write(buf, 0, size);
+    }
+    outStream.close();
+
+    return outStream.toByteArray();
+  }
+
+  /**
+   * Returns an deflated copy of the input array.
+   */
+  public static final byte[] deflate(byte[] in) {
+    // compress using DeflaterOutputStream 
+    ByteArrayOutputStream byteOut= 
+      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+
+    DeflaterOutputStream outStream= new DeflaterOutputStream(byteOut);
+
+    try {
+      outStream.write(in);
+    } catch (Exception e) {
+      e.printStackTrace(LogUtil.getWarnStream(LOG));
+    }
+
+    try {
+      outStream.close();
+    } catch (IOException e) {
+      e.printStackTrace(LogUtil.getWarnStream(LOG));
+    }
+
+    return byteOut.toByteArray();
+  }
+    
+}

Property changes on: src/java/org/apache/nutch/util/DeflateUtils.java
___________________________________________________________________
Name: svn:executable
   + *

Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(revision 429630)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(working copy)
@@ -36,6 +36,7 @@
 import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.protocol.ProtocolStatus;
 import org.apache.nutch.util.GZIPUtils;
+import org.apache.nutch.util.DeflateUtils;
 import org.apache.nutch.util.LogUtil;
 
 // Hadoop imports
@@ -461,7 +462,24 @@
     }
     return content;
   }
-  
+
+  public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
+
+    if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
+
+    byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());
+
+    if (content == null)
+      throw new IOException("inflateBestEffort returned null");
+
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("fetched " + compressed.length
+                 + " bytes of compressed content (expanded to "
+                 + content.length + " bytes) from " + url);
+    }
+    return content;
+  }
+
   protected static void main(HttpBase http, String[] args) throws Exception {
     boolean verbose = false;
     String url = null;
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 429630)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -110,7 +110,7 @@
       reqStr.append(portString);
       reqStr.append("\r\n");
 
-      reqStr.append("Accept-Encoding: x-gzip, gzip\r\n");
+      reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n");
 
       String userAgent = http.getUserAgent();
       if ((userAgent == null) || (userAgent.length() == 0)) {
@@ -148,6 +148,8 @@
       String contentEncoding = getHeader(Response.CONTENT_ENCODING);
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         content = http.processGzipEncoded(content, url);
+      } else if ("deflate".equals(contentEncoding)) {
+       content = http.processDeflateEncoded(content, url);
       } else {
         if (Http.LOG.isTraceEnabled()) {
           Http.LOG.trace("fetched " + content.length + " bytes from " + url);
