Index: nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java	(révision 367546)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java	(copie de travail)
@@ -27,6 +27,7 @@
 
 import java.io.*;
 import java.util.*;
+import java.net.*;
 
 /* Parse content in a segment. */
 public class ParseOutputFormat implements OutputFormat {
@@ -38,6 +39,7 @@
 
     final float interval = job.getFloat("db.default.fetch.interval", 30f);
     final float extscore = job.getFloat("db.score.link.external", 1.0f);
+    final boolean ignoreExternalLinks = job.getBoolean("crawl.ignore.external.links", false); 
     
     File text =
       new File(new File(job.getOutputDir(), ParseText.DIR_NAME), name);
@@ -62,7 +64,10 @@
           throws IOException {
           
           Parse parse = (Parse)value;
-          
+          String fromUrl = key.toString();
+          String fromHost = null; 
+          String toHost = null;
+                             
           textOut.append(key, new ParseText(parse.getText()));
           dataOut.append(key, parse.getData());
           
@@ -88,6 +93,11 @@
          if (scoreString != null) score = Float.parseFloat(scoreString);
           score /= links.length;
                           
+          try {
+			fromHost = new URL(fromUrl).getHost().toLowerCase();
+          } catch (MalformedURLException e) {
+        	  fromHost = null;
+          }
           for (int i = 0; i < links.length; i++) {
             String toUrl = links[i].getToUrl();
             try {
@@ -96,10 +106,21 @@
             } catch (Exception e) {
               toUrl = null;
             }
-            if (toUrl != null)
+            if (toUrl != null) {
+            	if (ignoreExternalLinks) {
+                	try {
+                		toHost = new URL(toUrl).getHost().toLowerCase();
+                	} catch (MalformedURLException e) {
+                		toHost = null;
+                	}            	
+                	if (toHost == null || !toHost.equals(fromHost)) { // external links
+    					continue; // skip it
+                	}					
+            	}
               crawlOut.append(new UTF8(toUrl),
                               new CrawlDatum(CrawlDatum.STATUS_LINKED,
                                              interval, score));
+            }
           }
         }
         
