Index: conf/log4j.properties
===================================================================
--- conf/log4j.properties	(revision 1333878)
+++ conf/log4j.properties	(working copy)
@@ -26,15 +26,17 @@
 #special logging requirements for some commandline tools
 log4j.logger.org.apache.nutch.crawl.Crawl=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.InjectorJob=INFO,cmdstdout
+log4j.logger.org.apache.nutch.host.HostInjectorJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.GeneratorJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.DbUpdaterJob=INFO,cmdstdout
+log4j.logger.org.apache.nutch.host.HostDbUpdateJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.fetcher.FetcherJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.parse.ParserJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.IndexerJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.solr.SolrIndexerJob=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.DeleteDuplicates=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.IndexMerger=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.WebTableReader=INFO,cmdstdout
+log4j.logger.org.apache.nutch.host.HostDbReader=INFO,cmdstdout
 
 log4j.logger.org.apache.nutch=INFO
 log4j.logger.org.apache.hadoop=WARN
Index: src/java/org/apache/nutch/fetcher/FetcherJob.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherJob.java	(revision 1333878)
+++ src/java/org/apache/nutch/fetcher/FetcherJob.java	(working copy)
@@ -110,7 +110,7 @@
       Utf8 mark = Mark.GENERATE_MARK.checkMark(page);
       if (!NutchJob.shouldProcess(mark, batchId)) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id (" + batchId + ")");
         }
         return;
       }
@@ -265,12 +265,12 @@
     String batchId;
 
     String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
-      "[-threads N] [-parse] [-resume] [-numTasks N]\n" +
-      "\tbatchId\tcrawl identifier returned by Generator, or -all for all generated batchId-s\n" +
-      "\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)\n" +
-      "\t-threads N\tnumber of fetching threads per task\n" +
-      "\t-resume\tresume interrupted job\n" +
-      "\t-numTasks N\tif N > 0 then use this many reduce tasks for fetching (default: mapred.map.tasks)";
+      "[-threads N] [-parse] \n \t \t  [-resume] [-numTasks N]\n" +
+      "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
+      "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n" +
+      "    -threads N    - number of fetching threads per task\n" +
+      "    -resume       - resume interrupted job\n" +
+      "    -numTasks N   - if N > 0 then use this many reduce tasks for fetching \n \t \t    (default: mapred.map.tasks)";
 
     if (args.length == 0) {
       System.err.println(usage);
Index: src/java/org/apache/nutch/indexer/IndexerJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerJob.java	(revision 1333878)
+++ src/java/org/apache/nutch/indexer/IndexerJob.java	(working copy)
@@ -80,7 +80,7 @@
       if (!batchId.equals(REINDEX)) {
         if (!NutchJob.shouldProcess(mark, batchId)) {
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id");
+            LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id (" + batchId + ")");
           }
           return;
         }
Index: src/java/org/apache/nutch/crawl/WebTableReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/WebTableReader.java	(revision 1333878)
+++ src/java/org/apache/nutch/crawl/WebTableReader.java	(working copy)
@@ -419,21 +419,17 @@
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
       System.err
-          .println("Usage: WebTableReader (-stats | -url [url] | -dump <out_dir> [-regex regex]) [-crawlId <id>] [-content] [-headers] [-links] [-text]");
-      System.err.println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err
-          .println("\t-stats [-sort] \tprint overall statistics to System.out");
-      System.err.println("\t\t[-sort]\tlist status sorted by host");
-      System.err
-          .println("\t-url <url>\tprint information on <url> to System.out");
-      System.err
-          .println("\t-dump <out_dir> [-regex regex]\tdump the webtable to a text file in <out_dir>");
-      System.err.println("\t\t-content\tdump also raw content");
-      System.err.println("\t\t-headers\tdump protocol headers");
-      System.err.println("\t\t-links\tdump links");
-      System.err.println("\t\t-text\tdump extracted text");
-      System.err
-          .println("\t\t[-regex]\tfilter on the URL of the webtable entry");
+          .println("Usage: WebTableReader (-stats | -url [url] | -dump <out_dir> [-regex regex]) \n \t \t      [-crawlId <id>] [-content] [-headers] [-links] [-text]");
+      System.err.println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t     (default: storage.crawl.id)");
+      System.err.println("    -stats [-sort] - print overall statistics to System.out");
+      System.err.println("    [-sort]        - list status sorted by host");
+      System.err.println("    -url <url>     - print information on <url> to System.out");
+      System.err.println("    -dump <out_dir> [-regex regex] - dump the webtable to a text file in \n \t \t     <out_dir>");
+      System.err.println("    -content       - dump also raw content");
+      System.err.println("    -headers       - dump protocol headers");
+      System.err.println("    -links         - dump links");
+      System.err.println("    -text          - dump extracted text");
+      System.err.println("    [-regex]       - filter on the URL of the webtable entry");
       return -1;
     }
     String param = null;
Index: src/java/org/apache/nutch/parse/ParserJob.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserJob.java	(revision 1333878)
+++ src/java/org/apache/nutch/parse/ParserJob.java	(working copy)
@@ -269,11 +269,11 @@
 
     if (args.length < 1) {
       System.err.println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
-      System.err.println("\tbatchId\tsymbolic batch ID created by Generator");
-      System.err.println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err.println("\t-all\tconsider pages from all crawl jobs");
-      System.err.println("-resume\tresume a previous incomplete job");
-      System.err.println("-force\tforce re-parsing even if a page is already parsed");
+      System.err.println("    <batchId>     - symbolic batch ID created by Generator");
+      System.err.println("    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)");
+      System.err.println("    -all          - consider pages from all crawl jobs");
+      System.err.println("    -resume       - resume a previous incomplete job");
+      System.err.println("    -force        - force re-parsing even if a page is already parsed");
       return -1;
     }
     for (int i = 0; i < args.length; i++) {
Index: src/bin/nutch
===================================================================
--- src/bin/nutch	(revision 1333878)
+++ src/bin/nutch	(working copy)
@@ -49,11 +49,14 @@
   echo "where COMMAND is one of:"
 # echo " crawl one-step crawler for intranets"
   echo " inject		inject new urls into the database"
+  echo " hostinject     creates or updates an existing host table from a text file"
   echo " generate 	generate new segments to fetch from crawl db"
   echo " fetch 		fetch URLs marked during generate"
   echo " parse 		parse URLs marked during fetch"
   echo " updatedb 	update web table after parsing"
+  echo " updatehostdb   update host table after parsing"
   echo " readdb 	read/dump records from page database"
+  echo " readhostdb     display entries from the hostDB"
   echo " solrindex 	run the solr indexer on parsed segments and linkdb"
   echo " solrdedup 	remove duplicates from solr"
   echo " plugin 	load a plugin and run one of its classes main()"
@@ -184,6 +187,8 @@
 CLASS=org.apache.nutch.crawl.Crawler
 elif [ "$COMMAND" = "inject" ] ; then
 CLASS=org.apache.nutch.crawl.InjectorJob
+elif [ "$COMMAND" = "hostinject" ] ; then
+CLASS=org.apache.nutch.host.HostInjectorJob
 elif [ "$COMMAND" = "generate" ] ; then
 CLASS=org.apache.nutch.crawl.GeneratorJob
 elif [ "$COMMAND" = "fetch" ] ; then
@@ -192,8 +197,12 @@
 CLASS=org.apache.nutch.parse.ParserJob
 elif [ "$COMMAND" = "updatedb" ] ; then
 CLASS=org.apache.nutch.crawl.DbUpdaterJob
+elif [ "$COMMAND" = "updatehostdb" ] ; then
+CLASS=org.apache.nutch.host.HostDbUpdateJob
 elif [ "$COMMAND" = "readdb" ] ; then
 CLASS=org.apache.nutch.crawl.WebTableReader
+elif [ "$COMMAND" = "readhostdb" ] ; then
+CLASS=org.apache.nutch.host.HostDbReader
 elif [ "$COMMAND" = "solrindex" ] ; then
 CLASS=org.apache.nutch.indexer.solr.SolrIndexerJob
 elif [ "$COMMAND" = "solrdedup" ] ; then
