Index: src/bin/crawl
===================================================================
--- src/bin/crawl	(revision 1465241)
+++ src/bin/crawl	(working copy)
@@ -19,7 +19,7 @@
 #
 # 
 # UNLIKE THE NUTCH ALL-IN-ONE-CRAWL COMMAND THIS SCRIPT DOES THE LINK INVERSION AND 
-# INDEXING FOR EACH SEGMENT
+# INDEXING FOR EACH BATCH
 
 SEEDDIR="$1"
 CRAWL_ID="$2"
@@ -111,33 +111,35 @@
 
   echo `date` ": Iteration $a of $LIMIT"
 
+  echo "Generating batchId"
+  batchId=`date +%s`-$RANDOM
+
   echo "Generating a new fetchlist"
-  $bin/nutch generate $commonOptions $CRAWL_ID/crawldb $CRAWL_ID/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter -adddays $addDays
+  $bin/nutch generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter -adddays $addDays -crawlId $CRAWL_ID -batchId $batchId
   
   if [ $? -ne 0 ] 
   then exit $? 
   fi
 
-  # TODO capture the batchID
   echo "Fetching : "
-  $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch -all -crawlId $CRAWL_ID -threads 50
+  $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $batchId -crawlId $CRAWL_ID -threads 50
 
   if [ $? -ne 0 ] 
   then exit $? 
   fi
 
-  # parsing the segment
+  # parsing the batch
   echo "Parsing : "
   # enable the skipping of records for the parsing so that a dodgy document 
   # so that it does not fail the full task
   skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1"
-  $bin/nutch parse $commonOptions $skipRecordsOptions -all -crawlId $CRAWL_ID
+  $bin/nutch parse $commonOptions $skipRecordsOptions $batchId -crawlId $CRAWL_ID
 
   if [ $? -ne 0 ] 
   then exit $? 
   fi
 
-  # updatedb with this segment
+  # updatedb with this batch
   echo "CrawlDB update"
   $bin/nutch updatedb $commonOptions
 
Index: src/java/org/apache/nutch/crawl/GeneratorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorJob.java	(revision 1465241)
+++ src/java/org/apache/nutch/crawl/GeneratorJob.java	(working copy)
@@ -167,10 +167,7 @@
       getConf().setLong(GENERATOR_TOP_N, topN);
     if (filter != null)
       getConf().setBoolean(GENERATOR_FILTER, filter);
-    int randomSeed = Math.abs(new Random().nextInt());
-    batchId = (curTime / 1000) + "-" + randomSeed;
-    getConf().setInt(GENERATOR_RANDOM_SEED, randomSeed);
-    getConf().set(BATCH_ID, batchId);
+
     getConf().setLong(Nutch.GENERATE_TIME_KEY, System.currentTimeMillis());
     if (norm != null)
       getConf().setBoolean(GENERATOR_NORMALISE, norm);
@@ -186,18 +183,16 @@
     }
     numJobs = 1;
     currentJobNum = 0;
-    currentJob = new NutchJob(getConf(), "generate: " + batchId);
+    currentJob = new NutchJob(getConf(), "generate: " + getConf().get(BATCH_ID));
     StorageUtils.initMapperJob(currentJob, FIELDS, SelectorEntry.class,
         WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class, true);
     StorageUtils.initReducerJob(currentJob, GeneratorReducer.class);
     currentJob.waitForCompletion(true);
     ToolUtil.recordJobStatus(null, currentJob, results);
-    results.put(BATCH_ID, batchId);
+    results.put(BATCH_ID, getConf().get(BATCH_ID));
     return results;
   }
   
-  private String batchId;
-  
   /**
    * Mark URLs ready for fetching.
    * @throws ClassNotFoundException
@@ -221,7 +216,7 @@
         Nutch.ARG_CURTIME, curTime,
         Nutch.ARG_FILTER, filter,
         Nutch.ARG_NORMALIZE, norm));
-    batchId =  getConf().get(BATCH_ID);
+    String batchId =  getConf().get(BATCH_ID);
     long finish = System.currentTimeMillis();
     LOG.info("GeneratorJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
     LOG.info("GeneratorJob: generated batch id: " + batchId);
@@ -237,6 +232,7 @@
       System.out.println("    -noNorm        - do not activate the normalizer plugin to normalize the url, default is true ");
       System.out.println("    -adddays       - Adds numDays to the current time to facilitate crawling urls already");
       System.out.println("                     fetched sooner then db.default.fetch.interval. Default value is 0.");
+      System.out.println("    -batchId       - the batch id ");
       System.out.println("----------------------");
       System.out.println("Please set the params.");
       return -1;
@@ -245,6 +241,11 @@
     long curTime = System.currentTimeMillis(), topN = Long.MAX_VALUE;
     boolean filter = true, norm = true;
 
+    // generate batchId
+    int randomSeed = Math.abs(new Random().nextInt());
+    String batchId = (curTime / 1000) + "-" + randomSeed;
+    getConf().set(BATCH_ID, batchId);
+
     for (int i = 0; i < args.length; i++) {
       if ("-topN".equals(args[i])) {
         topN = Long.parseLong(args[++i]);
@@ -253,10 +254,15 @@
       } else if ("-noNorm".equals(args[i])) {
         norm = false;
       } else if ("-crawlId".equals(args[i])) {
-        getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
+          getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
       } else if ("-adddays".equals(args[i])) {
         long numDays = Integer.parseInt(args[++i]);
         curTime += numDays * 1000L * 60 * 60 * 24;
+      }else if ("-batchId".equals(args[i]))
+          getConf().set(BATCH_ID,args[++i]);
+      else {
+          System.err.println("Unrecognized arg " + args[i]);
+          return -1;
       }
     }
 
