Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	
@@ -89,6 +89,7 @@
     final int interval = job.getInt("db.fetch.interval.default", 2592000);
     final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false);
     int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100);
+    final int maxOutlinkLength = job.getInt("parser.outlink.length", 4000);
     final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
                                                      : maxOutlinksPerPage;
     final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);

@@ -180,6 +182,11 @@
           List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
           for (int i = 0; i < links.length && validCount < outlinksToStore; i++) {
             String toUrl = links[i].getToUrl();
+            
+            if ((toUrl.length()>maxOutlinkLength)&&(maxOutlinkLength>0)) {
+                continue;
+            }            
+
             // ignore links to self (or anchors within the page)
             if (fromUrl.equals(toUrl)) {
               continue;
Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	mié mar 10 16:22:59 2010
+++ conf/nutch-default.xml	jue mar 18 11:06:09 2010
@@ -966,7 +966,14 @@
   for most people would be "img,script,link".</description>
 </property>
 
+<property>
+  <name>parser.outlink.length</name>
+  <value>4000</value>
+  <description>The length limit of an url, in bytes.
+  If this value is nonnegative (>=0), outlinks with an url longer than it will be ignored.</description>
+</property>
 
 <!-- urlfilter plugin properties -->
 
 <property>