Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 575912)
+++ conf/nutch-default.xml	(working copy)
@@ -155,6 +155,88 @@
 </property>
 
 <property>
+  <name>http.proxy.username</name>
+  <value></value>
+  <description>Username for proxy. This will be used by protocol-http11,
+  if the proxy server requests basic, digest and/or NTLM authentication.
+  To use this, protocol-http11 must be added in 'plugin.includes'
+  property.
+  NOTE: For NTLM authentication, do not prefix the username with the
+  domain, i.e. 'susam' is correct whereas 'DOMAIN/susam' is incorrect.
+  </description>
+</property>
+
+<property>
+  <name>http.proxy.password</name>
+  <value></value>
+  <description>Password for proxy. This will be used by protocol-http11,
+  if the proxy server requests basic, digest and/or NTLM authentication.
+  To use this, protocol-http11 must be added in 'plugin.includes'
+  property.
+  </description>
+</property>
+
+<property>
+  <name>http.proxy.realm</name>
+  <value></value>
+  <description>Authentication realm for proxy. Do not define a value
+  if realm is not required or authentication should take place for any
+  realm. NTLM does not use the notion of realms. Specify the domain name
+  of NTLM authentication as the value for this property. To use this,
+  protocol-http11 must be added in 'plugin.includes' property.
+  </description>
+</property>
+
+<property>
+  <name>http.auth.username</name>
+  <value></value>
+  <description>This is particularly useful in an intranet where users
+  or HTTP clients are required to login to the web servers to access the
+  web pages. This will be used by protocol-http11, if a web server
+  requests basic, digest and/or NTLM authentication. To use this,
+  protocol-http11 must be added in 'plugin.includes' property.
+  NOTE: For NTLM authentication, do not prefix the username with the
+  domain, i.e. 'susam' is correct whereas 'DOMAIN/susam' is incorrect.
+  </description>
+</property>
+
+<property>
+  <name>http.auth.password</name>
+  <value></value>
+  <description>This is particularly useful in an intranet where users
+  or HTTP clients are required to login to the web servers to access the
+  web pages. This will be used by protocol-http11, if a web server
+  requests basic, digest and/or NTLM authentication. To use this,
+  protocol-http11 must be added in 'plugin.includes' property.
+  </description>
+</property>
+
+<property>
+  <name>http.auth.realm</name>
+  <value></value>
+  <description>This is the authentication realm. Do not define a value
+  if realm is not required or authentication should take place for any
+  realm. NTLM does not use the notion of realms. Specify the domain name
+  of NTLM authentication as the value for this property. To use this,
+  protocol-http11 must be added in 'plugin.includes' property.
+  </description>
+</property>
+
+<property>
+  <name>http.auth.host</name>
+  <value></value>
+  <description>The host the authentication request is originating from.
+  So, this should be the name or IP address of the host on which the
+  Nutch crawler will be running. This should be specified if NTLM
+  authentication is required by the proxy server and/or the web servers.
+  This is not required for basic and digest authentications. So, this
+  can be left undefined (though not recommended) if NTLM authentication
+  is not going to be used. To use this, protocol-http11 must be added
+  in 'plugin.includes' property.
+  </description>
+</property>
+
+<property>
   <name>http.verbose</name>
   <value>false</value>
   <description>If true, HTTP will log more verbosely.</description>
@@ -172,8 +254,11 @@
 <property>
   <name>http.useHttp11</name>
   <value>false</value>
-  <description>NOTE: at the moment this works only for protocol-httpclient.
-  If true, use HTTP 1.1, if false use HTTP 1.0 .
+  <description>NOTE: At the moment this works only for protocol-http11
+  and protocol-httpclient. If true, use HTTP 1.1, if false use HTTP 1.0.
+  To use this, protocol-http11 or protocol-httpclient must be added in
+  'plugin.includes'
+  property.
   </description>
 </property>
 
@@ -822,9 +907,9 @@
   include.  Any plugin not matching this expression is excluded.
   In any case you need at least include the nutch-extensionpoints plugin. By
   default Nutch includes crawling just HTML and plain text via HTTP,
-  and basic indexing and search plugins. In order to use HTTPS please enable 
-  protocol-httpclient, but be aware of possible intermittent problems with the 
-  underlying commons-httpclient library.
+  and basic indexing and search plugins. In order to use HTTPS and
+  authenication schemes for web servers and proxy server, please enable 
+  protocol-http11.
   </description>
 </property>
 
Index: src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/Http.java
===================================================================
--- src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/Http.java	(revision 0)
+++ src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/Http.java	(revision 0)
@@ -0,0 +1,200 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http11;
+
+import java.net.URL;
+import java.io.IOException;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+// HttpClient imports
+import org.apache.commons.httpclient.auth.AuthScope;
+import org.apache.commons.httpclient.NTCredentials;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HostConfiguration;
+import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
+
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.http.api.HttpBase;
+import org.apache.nutch.protocol.ProtocolException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+/** 
+ * This class is a protocol plugin that configures an HTTP client for
+ * Basic, Digest and NTLM authentication schemes for web server as well
+ * as proxy server. HTTP authentication for proxy server or web server
+ * is enabled only if the corresponding properties for user-name
+ * (<code>http.proxy.username</code> and <code>http.auth.username</code>)
+ * is set in the <code>conf/nutch-site.xml</code> file
+ * 
+ * The realm properties (<code>http.proxy.realm</code> and
+ * <code>http.auth.realm</code>) are used as domain names for NTLM
+ * authentication.
+ *
+ * @author Susam Pal
+ */
+public class Http extends HttpBase {
+
+  static final Log LOG = LogFactory.getLog(Http.class);
+
+  private HttpClient client = new HttpClient();
+
+  private String proxyUser;
+  private String proxyPassword;
+  private String proxyRealm;
+
+  private String authUser;
+  private String authPassword;
+  private String authHost;
+  private String authRealm;
+
+  /**
+   * Constructs this plugin.
+   */
+  public Http() {
+    super(LOG);
+  }
+
+  /**
+   * Reads the configuration from the Nutch configuration files and sets
+   * the configuration.
+   *
+   * @param conf Configuration
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    this.proxyUser = conf.get("http.proxy.username", "");
+    this.proxyPassword = conf.get("http.proxy.password", "");
+    this.proxyRealm = conf.get("http.proxy.realm", "");
+    this.authUser = conf.get("http.auth.username", "");
+    this.authPassword = conf.get("http.auth.password", "");
+    this.authHost = conf.get("http.auth.host", "");
+    this.authRealm = conf.get("http.auth.realm", "");
+  }
+
+  /**
+   * Returns an authentication scope
+   *
+   * @param host   Host
+   * @param host   Port number
+   * @param realm  Authentication realm
+   * @return       Authentication scope
+   */
+  private AuthScope getAuthScope(String host, int port, String realm) {
+    if (realm.length() == 0) {
+      return new AuthScope(host, port);
+    } else {
+      return new AuthScope(host, port, realm);
+    }
+  }
+
+  /**
+   * Configures the HTTP client
+   *
+   * @param url URL to be fetched
+   */
+  private void configureClient(URL url) {
+
+    HostConfiguration hostConf = this.client.getHostConfiguration();
+ 
+    // HTTP proxy server details
+    if (useProxy) {
+      hostConf.setProxy(this.proxyHost, this.proxyPort); 
+
+      if (proxyUser.length() > 0) {
+
+        AuthScope proxyAuthScope = getAuthScope(
+            this.proxyHost, this.proxyPort, this.proxyRealm);
+
+        NTCredentials proxyCredentials = new NTCredentials(
+            this.proxyUser, this.proxyPassword,
+            this.authHost, this.proxyRealm);
+
+        this.client.getState().setProxyCredentials(
+            proxyAuthScope, proxyCredentials);
+      }
+
+    }
+
+    // Web server authentication details
+    if (authUser.length() > 0) {
+
+      int port = url.getPort();
+      if (port == -1) {
+        port = 80;
+      }
+
+      AuthScope serverAuthScope = getAuthScope(
+          url.getHost(), port, this.authRealm);
+
+      NTCredentials serverCredentials = new NTCredentials(
+          this.authUser, this.authPassword,
+          this.authHost, this.authRealm);
+
+      this.client.getState().setCredentials(
+          serverAuthScope, serverCredentials);
+    }
+
+    // Connection parameters
+    HttpConnectionManagerParams params =
+        this.client.getHttpConnectionManager().getParams();
+    params.setConnectionTimeout(timeout);
+    params.setSoTimeout(timeout);
+      
+  }
+
+  /**
+   * Returns the configured HTTP client.
+   *
+   * @return HTTP client
+   */
+  HttpClient getClient() {
+    return this.client;
+  }
+
+  /**
+   * Fetches the <code>url</code> with a configured HTTP client and
+   * gets the response.
+   *
+   * @param url       URL to be fetched
+   * @param datum     Crawl data
+   * @param redirect  Follow redirects if and only if true 
+   * @return          HTTP response
+   */
+  protected Response getResponse(URL url, CrawlDatum datum,
+      boolean redirect) throws ProtocolException, IOException {
+    configureClient(url);
+    return new HttpResponse(this, url, datum, redirect);
+  }
+
+  /**
+   * Method for unit testing this plugin.
+   *
+   * @param args Command line arguments
+   */
+  public static void main(String[] args) throws Exception {
+    Http http = new Http();
+    http.setConf(NutchConfiguration.create());
+    main(http, args);
+  }
+}
Index: src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/HttpResponse.java
===================================================================
--- src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/HttpResponse.java	(revision 0)
+++ src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/HttpResponse.java	(revision 0)
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http11;
+
+// JDK imports
+import java.io.InputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.URL;
+
+// HttpClient imports
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.httpclient.Header;
+import org.apache.commons.httpclient.params.HttpMethodParams;
+import org.apache.commons.httpclient.HttpVersion;
+import org.apache.commons.httpclient.HttpException;
+
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.ProtocolException;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.metadata.SpellCheckedMetadata;
+
+/**
+ * An HTTP response.
+ *
+ * @author Susam Pal
+ */
+public class HttpResponse implements Response {
+
+  private byte[] content;
+  private URL url;
+  private int code;
+  private Metadata headers = new SpellCheckedMetadata();
+
+  /**
+   * Fetches the given <code>url</code> and prepares HTTP response.
+   *
+   * @param http                An instance of the implementation class
+   *                            of this plugin
+   * @param url                 URL to be fetched
+   * @param datum               Crawl data
+   * @param followRedirects     Whether to follow redirects; follows
+   *                            redirect if and only if this is true
+   * @return                    HTTP response
+   * @throws ProtocolException  When an HTTP error occurs
+   * @throws IOException        When a non recoverable error occurs
+   */
+  public HttpResponse(Http http, URL url, CrawlDatum datum,
+      boolean followRedirects) throws ProtocolException, IOException {
+
+    // Prepare GET method for HTTP request
+    this.url = url;
+    GetMethod get = new GetMethod(url.toString());
+    get.setFollowRedirects(followRedirects);
+    get.setDoAuthentication(true);
+    get.setRequestHeader("Accept-Encoding", "x-gzip, gzip");
+    get.setRequestHeader("User-Agent", http.getUserAgent());
+    if (datum.getModifiedTime() > 0) {
+      get.setRequestHeader("If-Modified-Since",
+          HttpDateFormat.toString(datum.getModifiedTime()));
+    }
+
+    // Set HTTP parameters
+    HttpMethodParams params = get.getParams();
+    if (http.getUseHttp11()) {
+      params.setVersion(HttpVersion.HTTP_1_1);
+    } else {
+      params.setVersion(HttpVersion.HTTP_1_0);
+    }
+
+    try {
+
+      this.code = http.getClient().executeMethod(get);
+
+      // Read headers
+      Header[] headerArray = get.getResponseHeaders();
+      for (int i=0; i < headerArray.length; i++) {
+        headers.set(headerArray[i].getName(), headerArray[i].getValue());
+      }
+
+      // Allocate buffer of appropriate length to store content 
+      ByteArrayOutputStream out = null;
+      if (getHeader(Response.CONTENT_LENGTH) != null) {
+        out = new ByteArrayOutputStream(Integer.parseInt(
+          getHeader(Response.CONTENT_LENGTH).trim()));
+      } else {
+        out = new ByteArrayOutputStream();
+      }
+
+      // Read content
+      InputStream is = get.getResponseBodyAsStream();
+      int ch;
+      int bytesRead = 0;
+      while ((ch = is.read()) != -1 && bytesRead < http.getMaxContent()) {
+        out.write(ch);
+        bytesRead++;
+      }
+      content = out.toByteArray(); 
+
+      String contentEncoding = getHeader(Response.CONTENT_ENCODING);
+
+      // Trace message
+      StringBuffer fetchTrace = new StringBuffer("url: " + url + 
+        "; status code: " + code +
+        "; bytes received: " + content.length);
+      if (getHeader(Response.CONTENT_LENGTH) != null)
+        fetchTrace.append("; Content-Length: " + 
+          getHeader(Response.CONTENT_LENGTH));
+      if (contentEncoding != null)
+        fetchTrace.append("; Content-Encoding: " + contentEncoding);
+      if (getHeader(Response.LOCATION) != null)
+        fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); 
+
+      // Handle gzip and x-gzip files
+      if ("gzip".equals(contentEncoding) ||
+          "x-gzip".equals(contentEncoding)) {
+        content = http.processGzipEncoded(content, url);
+        fetchTrace.append("; gzip extracted to " + content.length +
+                          " bytes");
+      }
+
+      // Log trace message
+      if (Http.LOG.isInfoEnabled()) {
+        Http.LOG.info(fetchTrace);
+      }
+
+    } catch (HttpException ex) {
+     throw new ProtocolException("ProtocolException while fetching: " +
+                                 url + " - " + ex);
+    } catch (IOException ex) {
+     throw new IOException("IOException while fetching: " + url +
+                           " - " + ex);
+    } finally {
+      get.releaseConnection();
+    }
+  }
+
+  /* ------------------------- *
+   * <implementation:Response> *
+   * ------------------------- */
+  
+  public URL getUrl() {
+    return url;
+  }
+  
+  public int getCode() {
+    return code;
+  }
+
+  public String getHeader(String name) {
+    return headers.get(name);
+  }
+  
+  public Metadata getHeaders() {
+    return headers;
+  }
+
+  public byte[] getContent() {
+    return content;
+  }
+
+  /* ------------------------- *
+   * <implementation:Response> *
+   * ------------------------- */
+
+}
+    
Index: src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/package.html
===================================================================
--- src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/package.html	(revision 0)
+++ src/plugin/protocol-http11/src/java/org/apache/nutch/protocol/http11/package.html	(revision 0)
@@ -0,0 +1,7 @@
+<html>
+<body>
+<p>Protocol plugin which supports retrieving documents via the HTTP 1.0,
+HTTP 1.1 and HTTPS protocols, optionally with Basic, Digest and NTLM
+authentication schemes for web server as well as proxy server.</p>
+</body>
+</html>
Index: src/plugin/protocol-http11/plugin.xml
===================================================================
--- src/plugin/protocol-http11/plugin.xml	(revision 0)
+++ src/plugin/protocol-http11/plugin.xml	(revision 0)
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<plugin
+   id="protocol-http11"
+   name="Http 1.1 Protocol Plug-in"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="protocol-http11.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+      <import plugin="lib-http"/>
+   </requires>
+
+   <!-- HTTP 1.1 extension -->
+   <extension id="org.apache.nutch.protocol.http11"
+              name="Http11Protocol"
+              point="org.apache.nutch.protocol.Protocol">
+
+      <implementation id="org.apache.nutch.protocol.http11.Http"
+                      class="org.apache.nutch.protocol.http11.Http">
+        <parameter name="protocolName" value="http"/>
+      </implementation>
+
+   </extension>
+
+   <!-- HTTPS extension -->
+   <extension id="org.apache.nutch.protocol.https"
+              name="HttpsProtocol"
+              point="org.apache.nutch.protocol.Protocol">
+
+      <implementation id="org.apache.nutch.protocol.http11.Http"
+                      class="org.apache.nutch.protocol.http11.Http">
+        <parameter name="protocolName" value="https"/>
+      </implementation>
+
+   </extension>
+
+</plugin>
+
Index: src/plugin/protocol-http11/build.xml
===================================================================
--- src/plugin/protocol-http11/build.xml	(revision 0)
+++ src/plugin/protocol-http11/build.xml	(revision 0)
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="protocol-http11" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <target name="deps-jar">
+    <ant target="jar" inheritall="false" dir="../lib-http"/>
+  </target>
+
+  <path id="plugin.deps">
+    <fileset dir="${nutch.root}/build">
+      <include name="**/lib-http/*.jar" />
+    </fileset>
+  </path>
+
+</project>
Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 575912)
+++ src/plugin/build.xml	(working copy)
@@ -45,6 +45,7 @@
      <ant dir="protocol-file" target="deploy"/>
      <ant dir="protocol-ftp" target="deploy"/>
      <ant dir="protocol-http" target="deploy"/>
+     <ant dir="protocol-http11" target="deploy"/>
      <ant dir="protocol-httpclient" target="deploy"/>
      <ant dir="parse-ext" target="deploy"/>
      <ant dir="parse-html" target="deploy"/>
Index: build.xml
===================================================================
--- build.xml	(revision 575912)
+++ build.xml	(working copy)
@@ -332,6 +332,7 @@
       <packageset dir="${plugins.dir}/protocol-file/src/java"/>
       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-http11/src/java"/>
       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
       <packageset dir="${plugins.dir}/parse-ext/src/java"/>
       <packageset dir="${plugins.dir}/parse-html/src/java"/>
