Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 1680953)
+++ conf/nutch-default.xml	(working copy)
@@ -118,15 +118,6 @@
 </property>
 
 <property>
-   <name>http.robot.rules.whitelist</name> 	 
-   <value></value> 	 
-   <description>Comma separated list of hostnames or IP addresses to ignore 	 
-   robot rules parsing for. Use with care and only if you are explicitly 	 
-   allowed by the site owner to ignore the site's robots.txt! 	 
-   </description> 	 
-</property> 	 
-   	   	   	   	   	   	 	  	 
-<property>
   <name>http.robot.rules.whitelist</name>
   <value></value>
   <description>Comma separated list of hostnames or IP addresses to ignore 
Index: src/java/org/apache/nutch/protocol/RobotRulesParser.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRulesParser.java	(revision 1680953)
+++ src/java/org/apache/nutch/protocol/RobotRulesParser.java	(working copy)
@@ -25,7 +25,6 @@
 import java.io.LineNumberReader;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;
@@ -42,6 +41,7 @@
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.SuffixStringMatcher;
 
 import crawlercommons.robots.BaseRobotRules;
 import crawlercommons.robots.SimpleRobotRules;
@@ -62,7 +62,7 @@
       .getLogger(RobotRulesParser.class);
 
   protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable<String, BaseRobotRules>();
-
+  
   /**
    * A {@link BaseRobotRules} object appropriate for use when the
    * {@code robots.txt} file is empty or missing; all requests are allowed.
@@ -83,9 +83,11 @@
   protected String agentNames;
 
   /** set of host names or IPs to be explicitly excluded from robots.txt checking */
-  protected Set<String> whiteList = new HashSet<String>();;
+  protected Set<String> whiteList = new HashSet<String>();
+  
+  /* Matcher user for efficiently matching URLs against a set of suffixes. */
+  private SuffixStringMatcher matcher = null;
 
-
   public RobotRulesParser() {
   }
 
@@ -127,8 +129,17 @@
     }
 
     String[] confWhiteList = conf.getStrings("http.robot.rules.whitelist");
-    if (confWhiteList != null && confWhiteList.length > 0) {
-      whiteList.addAll(Arrays.asList(confWhiteList));
+
+    for (int i = 0; i < confWhiteList.length; i++) {
+      if (confWhiteList[i].isEmpty()) {
+    	  LOG.info("Empty whitelisted URL skipped!");
+    	  continue;
+      }
+      whiteList.add(confWhiteList[i]);
+    }
+    
+    if (whiteList.size() > 0) {
+      matcher = new SuffixStringMatcher(whiteList);
       LOG.info("Whitelisted hosts: " + whiteList);
     }
   }
@@ -140,12 +151,18 @@
     return conf;
   }
 
-
   /**
    * Check whether a URL belongs to a whitelisted host.
    */
   public boolean isWhiteListed(URL url) {
-    return whiteList.contains(url.getHost());
+    boolean match = false;
+    String urlString = url.getHost();
+    
+    if (matcher != null) {
+    	match = matcher.matches(urlString);
+    }
+    
+    return match;
   }
 
   /**
