Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 1730536)
+++ conf/nutch-default.xml	(working copy)
@@ -539,11 +539,11 @@
 
 <property>
   <name>db.ignore.internal.links</name>
-  <value>true</value>
-  <description>If true, when adding new links to a page, links from
-  the same host are ignored.  This is an effective way to limit the
-  size of the link database, keeping only the highest quality
-  links.
+  <value>false</value>
+  <description>If true, outlinks leading from a page to internal hosts or domain
+  will be ignored. This is an effective way to limit the crawl to include
+  only initially injected hosts, without creating complex URLFilters.
+  See 'db.ignore.external.links.mode'.
   </description>
 </property>
 
@@ -563,6 +563,14 @@
   <description>Alternative value is byDomain</description>
 </property>
 
+<property>
+  <name>db.ignore.treat.redirects.as.links</name>
+  <value>true</value>
+  <description>If true, redirects are treated as links when ignoring internal
+  or external links. Set to false to follow redirects despite the values for
+  db.ignore.external.links and db.ignore.internal.links.</description>
+</property>
+
  <property>
   <name>db.injector.overwrite</name>
   <value>false</value>
@@ -616,15 +624,6 @@
 </property>
 
 <property>
-  <name>db.max.inlinks</name>
-  <value>10000</value>
-  <description>Maximum number of Inlinks per URL to be kept in LinkDb.
-  If "invertlinks" finds more inlinks than this number, only the first
-  N inlinks will be stored, and the rest will be discarded.
-  </description>
-</property>
-
-<property>
   <name>db.max.outlinks.per.page</name>
   <value>100</value>
   <description>The maximum number of outlinks that we'll process for a page.
@@ -681,6 +680,35 @@
   </description>
 </property>
 
+<!-- linkdb properties -->
+
+<property>
+  <name>linkdb.max.inlinks</name>
+  <value>10000</value>
+  <description>Maximum number of Inlinks per URL to be kept in LinkDb.
+  If "invertlinks" finds more inlinks than this number, only the first
+  N inlinks will be stored, and the rest will be discarded.
+  </description>
+</property>
+
+<property>
+  <name>linkdb.ignore.internal.links</name>
+  <value>true</value>
+  <description>If true, when adding new links to a page, links from
+  the same host are ignored.  This is an effective way to limit the
+  size of the link database, keeping only the highest quality
+  links.
+  </description>
+</property>
+
+<property>
+  <name>linkdb.ignore.external.links</name>
+  <value>false</value>
+  <description>If true, when adding new links to a page, links from
+  the a different host are ignored.
+  </description>
+</property>
+
 <!-- generate properties -->
 
 <property>
Index: src/java/org/apache/nutch/crawl/LinkDb.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDb.java	(revision 1730536)
+++ src/java/org/apache/nutch/crawl/LinkDb.java	(working copy)
@@ -48,8 +48,8 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDb.class);
 
-  public static final String IGNORE_INTERNAL_LINKS = "db.ignore.internal.links";
-  public static final String IGNORE_EXTERNAL_LINKS = "db.ignore.external.links";
+  public static final String IGNORE_INTERNAL_LINKS = "linkdb.ignore.internal.links";
+  public static final String IGNORE_EXTERNAL_LINKS = "linkdb.ignore.external.links";
 
   public static final String CURRENT_NAME = "current";
   public static final String LOCK_NAME = ".locked";
@@ -68,7 +68,7 @@
   }
 
   public void configure(JobConf job) {
-    maxAnchorLength = job.getInt("db.max.anchor.length", 100);
+    maxAnchorLength = job.getInt("linkdb.max.anchor.length", 100);
     ignoreInternalLinks = job.getBoolean(IGNORE_INTERNAL_LINKS, true);
     ignoreExternalLinks = job.getBoolean(IGNORE_EXTERNAL_LINKS, false);
 
Index: src/java/org/apache/nutch/crawl/LinkDbMerger.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbMerger.java	(revision 1730536)
+++ src/java/org/apache/nutch/crawl/LinkDbMerger.java	(working copy)
@@ -55,7 +55,7 @@
  * </p>
  * <p>
  * If more than one LinkDb contains information about the same URL, all inlinks
- * are accumulated, but only at most <code>db.max.inlinks</code> inlinks will
+ * are accumulated, but only at most <code>linkdb.max.inlinks</code> inlinks will
  * ever be added.
  * </p>
  * <p>
@@ -104,7 +104,7 @@
   }
 
   public void configure(JobConf job) {
-    maxInlinks = job.getInt("db.max.inlinks", 10000);
+    maxInlinks = job.getInt("linkdb.max.inlinks", 10000);
   }
 
   public void close() throws IOException {
Index: src/java/org/apache/nutch/fetcher/FetcherThread.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherThread.java	(revision 1730536)
+++ src/java/org/apache/nutch/fetcher/FetcherThread.java	(working copy)
@@ -84,8 +84,10 @@
   private String reprUrl;
   private boolean redirecting;
   private int redirectCount;
+  private boolean ignoreInternalLinks;
   private boolean ignoreExternalLinks;
   private String ignoreExternalLinksMode;
+  private boolean ignoreTreatRedirectsAsLinks;
 
   // Used by fetcher.follow.outlinks.depth in parse
   private int maxOutlinksPerPage;
@@ -174,8 +176,10 @@
     maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
         : maxOutlinksPerPage;
     interval = conf.getInt("db.fetch.interval.default", 2592000);
+    ignoreInternalLinks = conf.getBoolean("db.ignore.internal.links", false);
     ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
     ignoreExternalLinksMode = conf.get("db.ignore.external.links.mode", "byHost");
+    ignoreTreatRedirectsAsLinks = conf.getBoolean("db.ignore.treat.redirects.as.links", true);
     maxOutlinkDepth = conf.getInt("fetcher.follow.outlinks.depth", -1);
     outlinksIgnoreExternal = conf.getBoolean(
         "fetcher.follow.outlinks.ignore.external", false);
@@ -428,22 +432,34 @@
     newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
     newUrl = urlFilters.filter(newUrl);
 
-    if (ignoreExternalLinks) {
+    if (ignoreTreatRedirectsAsLinks) {
       try {
         String origHost = new URL(urlString).getHost().toLowerCase();
         String newHost = new URL(newUrl).getHost().toLowerCase();
-        if (!origHost.equals(newHost)) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(" - ignoring redirect " + redirType + " from "
-                + urlString + " to " + newUrl
-                + " because external links are ignored");
+        if (ignoreExternalLinks) {
+          if (!origHost.equals(newHost)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(" - ignoring redirect " + redirType + " from "
+                  + urlString + " to " + newUrl
+                  + " because external links are ignored");
+            }
+            return null;
           }
-          return null;
         }
-      } catch (MalformedURLException e) {
-      }
+        
+        if (ignoreInternalLinks) {
+          if (origHost.equals(newHost)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(" - ignoring redirect " + redirType + " from "
+                  + urlString + " to " + newUrl
+                  + " because internal links are ignored");
+            }
+            return null;
+          }
+        }
+      } catch (MalformedURLException e) { }
     }
-
+    
     if (newUrl != null && !newUrl.equals(urlString)) {
       reprUrl = URLUtil.chooseRepr(reprUrl, newUrl, temp);
       url = new Text(newUrl);
@@ -621,7 +637,7 @@
           // collect outlinks for subsequent db update
           Outlink[] links = parseData.getOutlinks();
           int outlinksToStore = Math.min(maxOutlinks, links.length);
-          if (ignoreExternalLinks) {
+          if (ignoreExternalLinks || ignoreInternalLinks) {
             URL originURL = new URL(url.toString());
             // based on domain?
             if ("bydomain".equalsIgnoreCase(ignoreExternalLinksMode)) {
@@ -648,7 +664,7 @@
             String toUrl = links[i].getToUrl();
 
             toUrl = ParseOutputFormat.filterNormalize(url.toString(), toUrl,
-                origin, ignoreExternalLinks, ignoreExternalLinksMode, urlFilters, normalizers);
+                origin, ignoreInternalLinks, ignoreExternalLinks, ignoreExternalLinksMode, urlFilters, normalizers);
             if (toUrl == null) {
               continue;
             }
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 1730536)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -102,6 +102,8 @@
 
     this.scfilters = new ScoringFilters(job);
     final int interval = job.getInt("db.fetch.interval.default", 2592000);
+    final boolean ignoreInternalLinks = job.getBoolean(
+        "db.ignore.internal.links", false);
     final boolean ignoreExternalLinks = job.getBoolean(
         "db.ignore.external.links", false);
     final String ignoreExternalLinksMode = job.get(
@@ -189,7 +191,7 @@
           crawlOut.append(key, parseMDCrawlDatum);
 
         // need to determine origin (once for all outlinks)
-        if (ignoreExternalLinks) {
+        if (ignoreExternalLinks || ignoreInternalLinks) {
           URL originURL = new URL(fromUrl.toString());
           // based on domain?
           if ("bydomain".equalsIgnoreCase(ignoreExternalLinksMode)) {
@@ -207,7 +209,7 @@
           String newUrl = pstatus.getMessage();
           int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
           newUrl = filterNormalize(fromUrl, newUrl, origin,
-              ignoreExternalLinks, ignoreExternalLinksMode, filters, normalizers,
+              ignoreInternalLinks, ignoreExternalLinks, ignoreExternalLinksMode, filters, normalizers,
               URLNormalizers.SCOPE_FETCHER);
 
           if (newUrl != null) {
@@ -238,7 +240,7 @@
           // Only normalize and filter if fetcher.parse = false
           if (!isParsing) {
             toUrl = ParseOutputFormat.filterNormalize(fromUrl, toUrl, origin,
-                ignoreExternalLinks, ignoreExternalLinksMode, filters, normalizers);
+                ignoreInternalLinks, ignoreExternalLinks, ignoreExternalLinksMode, filters, normalizers);
             if (toUrl == null) {
               continue;
             }
@@ -316,22 +318,22 @@
   }
 
   public static String filterNormalize(String fromUrl, String toUrl,
-      String fromHost, boolean ignoreExternalLinks,
+      String fromHost, boolean ignoreInternalLinks, boolean ignoreExternalLinks,
       String ignoreExternalLinksMode, URLFilters filters,
       URLNormalizers normalizers) {
-    return filterNormalize(fromUrl, toUrl, fromHost, ignoreExternalLinks,
+    return filterNormalize(fromUrl, toUrl, fromHost, ignoreInternalLinks, ignoreExternalLinks,
         ignoreExternalLinksMode, filters, normalizers,
         URLNormalizers.SCOPE_OUTLINK);
   }
 
   public static String filterNormalize(String fromUrl, String toUrl,
-      String origin, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters,
+      String origin, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters,
       URLNormalizers normalizers, String urlNormalizerScope) {
     // ignore links to self (or anchors within the page)
     if (fromUrl.equals(toUrl)) {
       return null;
     }
-    if (ignoreExternalLinks) {
+    if (ignoreExternalLinks || ignoreInternalLinks) {
       URL targetURL = null;
       try {
         targetURL = new URL(toUrl);
@@ -338,15 +340,30 @@
       } catch (MalformedURLException e1) {
         return null; // skip it
       }
-      if ("bydomain".equalsIgnoreCase(ignoreExternalLinksMode)) {
-        String toDomain = URLUtil.getDomainName(targetURL).toLowerCase();
-        if (toDomain == null || !toDomain.equals(origin)) {
-          return null; // skip it
+      if (ignoreExternalLinks) {
+        if ("bydomain".equalsIgnoreCase(ignoreExternalLinksMode)) {
+          String toDomain = URLUtil.getDomainName(targetURL).toLowerCase();
+          if (toDomain == null || !toDomain.equals(origin)) {
+            return null; // skip it
+          }
+        } else {
+          String toHost = targetURL.getHost().toLowerCase();
+          if (toHost == null || !toHost.equals(origin)) {
+            return null; // skip it
+          }
         }
-      } else {
-        String toHost = targetURL.getHost().toLowerCase();
-        if (toHost == null || !toHost.equals(origin)) {
-          return null; // skip it
+      }
+      if (ignoreInternalLinks) {
+        if ("bydomain".equalsIgnoreCase(ignoreExternalLinksMode)) {
+          String toDomain = URLUtil.getDomainName(targetURL).toLowerCase();
+          if (toDomain == null || toDomain.equals(origin)) {
+            return null; // skip it
+          }
+        } else {
+          String toHost = targetURL.getHost().toLowerCase();
+          if (toHost == null || toHost.equals(origin)) {
+            return null; // skip it
+          }
         }
       }
     }
