Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision )
+++ conf/nutch-default.xml	(revision )
@@ -921,6 +921,13 @@
 </property>
 
 <property>
+  <name>parser.html.outlinks.max.target.length</name>
+  <value>3000</value>
+  <description>The maximum number of characters permitted in an outlink urls target.
+  </description>
+</property>
+
+<property>
   <name>htmlparsefilter.order</name>
   <value></value>
   <description>The order by which HTMLParse filters are applied.
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision )
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(revision )


@@ -71,6 +69,7 @@
   private ParserFactory parserFactory;
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int maxParseTime;
+  private int maxTargetLength;
   private ExecutorService executorService;
   
   /**
@@ -92,6 +91,7 @@
     this.conf = conf;
     parserFactory = new ParserFactory(conf);
     maxParseTime=conf.getInt("parser.timeout", DEFAULT_MAX_PARSE_TIME);
+    maxTargetLength = conf.getInt("parser.html.outlinks.max.target.length", 3000);
     sig = SignatureFactory.getSignature(conf);
     filters = new URLFilters(conf);
     normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
@@ -252,6 +252,9 @@
         }
         for (int i = 0; count < maxOutlinks && i < outlinks.length; i++) {
           String toUrl = outlinks[i].getToUrl();
+          if (toUrl.length() > maxTargetLength) {
+             continue;
+          }
           try {
             toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK);
             toUrl = filters.filter(toUrl);
