Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 165178)
+++ conf/nutch-default.xml	(working copy)
@@ -101,6 +101,15 @@
     trying to fetch a page.</description>
 </property>
 
+<!-- robots.txt properties -->
+
+<property>
+  <name>robot.rules.allow.forbidden</name>
+  <value>false</value>
+  <description>Sites that return a 403 error when accessing robots.txt will
+    still be crawled if this property is set to true</description>
+</property>
+
 <!-- FILE properties -->
 
 <property>
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/RobotRulesParser.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/RobotRulesParser.java	(revision 165178)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/RobotRulesParser.java	(working copy)
@@ -50,6 +50,8 @@
   public static final Logger LOG=
     LogFormatter.getLogger("org.apache.nutch.fetcher.RobotRulesParser");
 
+  private static final boolean ALLOW_FORBIDDEN = NutchConf.get().getBoolean("robot.rules.allow.forbidden", false);  
+  
   private static final String[] AGENTS = getAgents();
   private static final Hashtable CACHE = new Hashtable();
   
@@ -378,7 +380,7 @@
 
       if (response.getCode() == 200)               // found rules: parse them
         robotRules = new RobotRulesParser().parseRules(response.getContent());
-      else if (response.getCode() == 403)
+      else if ( (response.getCode() == 403) && (!ALLOW_FORBIDDEN) )
         robotRules = FORBID_ALL_RULES;            // use forbid all
       else                                        
         robotRules = EMPTY_RULES;                 // use default rules
