diff --git conf/nutch-default.xml conf/nutch-default.xml
index 76d070c..0bbe857 100644
--- conf/nutch-default.xml
+++ conf/nutch-default.xml
@@ -118,6 +118,14 @@
 </property>
 
 <property>
+  <name>http.robot.rules.whitelist</name>
+  <value></value>
+  <description>Comma separated list of hostnames or IP addresses to ignore 
+  robot rules parsing for.
+  </description>
+</property>
+
+<property>
   <name>http.robots.403.allow</name>
   <value>true</value>
   <description>Some servers return HTTP status 403 (Forbidden) if
diff --git src/java/org/apache/nutch/protocol/RobotRules.java src/java/org/apache/nutch/protocol/RobotRules.java
deleted file mode 100644
index dbf72ba..0000000
--- src/java/org/apache/nutch/protocol/RobotRules.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol;
-
-import java.net.URL;
-
-/**
- * This class holds the rules which were parsed from a robots.txt file, and can
- * test paths against those rules.
- */
-public interface RobotRules {
-  /**
-   * Get expire time
-   */
-  public long getExpireTime();
-
-  /**
-   * Get Crawl-Delay, in milliseconds. This returns -1 if not set.
-   */
-  public long getCrawlDelay();
-
-  /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
-   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
-   */
-  public boolean isAllowed(URL url);
-
-}
diff --git src/java/org/apache/nutch/protocol/RobotRulesParser.java src/java/org/apache/nutch/protocol/RobotRulesParser.java
index d6b2364..6fe3bf4 100644
--- src/java/org/apache/nutch/protocol/RobotRulesParser.java
+++ src/java/org/apache/nutch/protocol/RobotRulesParser.java
@@ -20,10 +20,14 @@ package org.apache.nutch.protocol;
 // JDK imports
 import java.io.File;
 import java.io.FileReader;
+import java.io.IOException;
 import java.io.LineNumberReader;
+import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Hashtable;
+import java.util.Set;
 import java.util.StringTokenizer;
 
 // Commons Logging imports
@@ -34,6 +38,8 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.util.NutchConfiguration;
 
 import com.google.common.io.Files;
 
@@ -73,6 +79,10 @@ public abstract class RobotRulesParser implements Configurable {
   private Configuration conf;
   protected String agentNames;
 
+  /** set of host names or IPs to be explicitly excluded from robots.txt checking */
+  protected Set<String> whiteList = new HashSet<String>();;
+
+
   public RobotRulesParser() {
   }
 
@@ -112,6 +122,12 @@ public abstract class RobotRulesParser implements Configurable {
 
       agentNames = sb.toString();
     }
+
+    String[] confWhiteList = conf.getStrings("http.robot.rules.whitelist");
+    if (confWhiteList != null && confWhiteList.length > 0) {
+      whiteList.addAll(Arrays.asList(confWhiteList));
+      LOG.info("Whitelisted hosts: " + whiteList);
+    }
   }
 
   /**
@@ -121,6 +137,14 @@ public abstract class RobotRulesParser implements Configurable {
     return conf;
   }
 
+
+  /**
+   * Check whether a URL belongs to a whitelisted host.
+   */
+  public boolean isWhiteListed(URL url) {
+    return whiteList.contains(url.getHost());
+  }
+
   /**
    * Parses the robots content using the {@link SimpleRobotRulesParser} from
    * crawler commons
@@ -171,16 +195,49 @@ public abstract class RobotRulesParser implements Configurable {
       System.exit(-1);
     }
 
+    final File robotsFile = new File(argv[0]);
+    final File urlFile = new File(argv[1]);
+    final String agents = argv[2];
+
     try {
-      byte[] robotsBytes = Files.toByteArray(new File(argv[0]));
-      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes,
-          "text/plain", argv[2]);
+      Configuration conf = NutchConfiguration.create();
+      conf.set("http.agent.name", agents);
+      // instantiate parser object which does not fetch a robots.txt
+      // but uses the one passed as command-line argument
+      RobotRulesParser parser = new RobotRulesParser(conf) {
+        public BaseRobotRules getRobotRulesSet(Protocol protocol, URL url) {
+          BaseRobotRules rules;
+          try {
+            byte[] robotsBytes = Files.toByteArray(robotsFile);
+            rules = robotParser.parseContent(url.toString(), robotsBytes,
+                "text/plain", agents);
+          } catch (IOException e) {
+            LOG.error("Failed to open robots.txt file " + robotsFile
+                + StringUtils.stringifyException(e));
+            rules = EMPTY_RULES;
+          }
+          return rules;
+        }
+      };
+      BaseRobotRules rules = parser.getRobotRulesSet(null, robotsFile.toURI()
+          .toURL());
 
-      LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
+      LineNumberReader testsIn = new LineNumberReader(new FileReader(urlFile));
       String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println((rules.isAllowed(testPath) ? "allowed"
-            : "not allowed") + ":\t" + testPath);
+        try {
+          // testPath can be just a path or a complete URL
+          URL url = new URL(testPath);
+          String status;
+          if (parser.isWhiteListed(url)) {
+            status = "whitelisted";
+          } else if (rules.isAllowed(testPath)) {
+            status = "allowed";
+          } else {
+            status = "not allowed";
+          }
+          System.out.println(status + ":\t" + testPath);
+        } catch (MalformedURLException e) {}
         testPath = testsIn.readLine();
       }
       testsIn.close();
diff --git src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java
index 63c8d55..2a657a9 100644
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java
@@ -88,15 +88,23 @@ public class HttpRobotRulesParser extends RobotRulesParser {
   public BaseRobotRules getRobotRulesSet(Protocol http, URL url) {
 
     String cacheKey = getCacheKey(url);
-    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(cacheKey);
+    BaseRobotRules robotRules = CACHE.get(cacheKey);
+
+    if (robotRules != null) {
+      return robotRules; // cached rule
+    } else if (LOG.isTraceEnabled()) {
+      LOG.trace("cache miss " + url);
+    }
 
     boolean cacheRule = true;
+    URL redir = null;
 
-    if (robotRules == null) { // cache miss
-      URL redir = null;
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("cache miss " + url);
-      }
+    if (isWhiteListed(url)) {
+      // check in advance whether a host is whitelisted
+      // (we do not need to fetch robots.txt)
+      robotRules = EMPTY_RULES;
+
+    } else {
       try {
         Response response = ((HttpBase) http).getResponse(new URL(url,
             "/robots.txt"), new CrawlDatum(), true);
@@ -127,7 +135,7 @@ public class HttpRobotRulesParser extends RobotRulesParser {
         else if ((response.getCode() == 403) && (!allowForbidden))
           robotRules = FORBID_ALL_RULES; // use forbid all
         else if (response.getCode() >= 500) {
-          cacheRule = false;
+          cacheRule = false; // try again later to fetch robots.txt
           robotRules = EMPTY_RULES;
         } else
           robotRules = EMPTY_RULES; // use default rules
@@ -135,18 +143,19 @@ public class HttpRobotRulesParser extends RobotRulesParser {
         if (LOG.isInfoEnabled()) {
           LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
         }
-        cacheRule = false;
+        cacheRule = false; // try again later to fetch robots.txt
         robotRules = EMPTY_RULES;
       }
+    }
 
-      if (cacheRule) {
-        CACHE.put(cacheKey, robotRules); // cache rules for host
-        if (redir != null && !redir.getHost().equalsIgnoreCase(url.getHost())) {
-          // cache also for the redirected host
-          CACHE.put(getCacheKey(redir), robotRules);
-        }
+    if (cacheRule) {
+      CACHE.put(cacheKey, robotRules); // cache rules for host
+      if (redir != null && !redir.getHost().equalsIgnoreCase(url.getHost())) {
+        // cache also for the redirected host
+        CACHE.put(getCacheKey(redir), robotRules);
       }
     }
+
     return robotRules;
   }
 }
diff --git src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
index 93e3752..7dbfbad 100644
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
@@ -69,15 +69,22 @@ public class FtpRobotRulesParser extends RobotRulesParser {
                                                        // case
     String host = url.getHost().toLowerCase(); // normalize to lower case
 
-    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":"
-        + host);
+    BaseRobotRules robotRules = CACHE.get(protocol + ":" + host);
+
+    if (robotRules != null) {
+      return robotRules; // cached rule
+    } else if (LOG.isTraceEnabled()) {
+      LOG.trace("cache miss " + url);
+    }
 
     boolean cacheRule = true;
 
-    if (robotRules == null) { // cache miss
-      if (LOG.isTraceEnabled())
-        LOG.trace("cache miss " + url);
+    if (isWhiteListed(url)) {
+      // check in advance whether a host is whitelisted
+      // (we do not need to fetch robots.txt)
+      robotRules = EMPTY_RULES;
 
+    } else {
       try {
         Text robotsUrl = new Text(new URL(url, "/robots.txt").toString());
         ProtocolOutput output = ((Ftp) ftp).getProtocolOutput(robotsUrl,
@@ -94,13 +101,15 @@ public class FtpRobotRulesParser extends RobotRulesParser {
         if (LOG.isInfoEnabled()) {
           LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
         }
-        cacheRule = false;
+        cacheRule = false; // try again later to fetch robots.txt
         robotRules = EMPTY_RULES;
       }
 
-      if (cacheRule)
-        CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
     }
+
+    if (cacheRule)
+      CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
+
     return robotRules;
   }
 }