Index: src/java/org/apache/nutch/tools/FreeGenerator.java
===================================================================
--- src/java/org/apache/nutch/tools/FreeGenerator.java	(revision 612144)
+++ src/java/org/apache/nutch/tools/FreeGenerator.java	(working copy)
@@ -18,7 +18,9 @@
 package org.apache.nutch.tools;
 
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -77,6 +79,8 @@
         normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
       }
     }
+    
+    Generator.SelectorEntry entry = new Generator.SelectorEntry();
 
     public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException {
       // value is a line of text
@@ -102,12 +106,22 @@
         }
         return;
       }
-      output.collect(url, datum);
+      entry.datum = datum;
+      entry.url = url;
+      output.collect(url, entry);
     }
 
     public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException {
-      // pick just one (discard duplicates)
-      output.collect(key, (Writable)values.next());
+      // pick unique urls from values - discard the reduce key due to hash collisions
+      HashMap<Text, CrawlDatum> unique = new HashMap<Text, CrawlDatum>();
+      while (values.hasNext()) {
+        Generator.SelectorEntry entry = (Generator.SelectorEntry)values.next();
+        unique.put(entry.url, entry.datum);
+      }
+      // output unique urls
+      for (Entry<Text, CrawlDatum> e : unique.entrySet()) {
+        output.collect(e.getKey(), e.getValue());
+      }
     }
   }
   
@@ -142,6 +156,8 @@
     job.addInputPath(new Path(args[0]));
     job.setInputFormat(TextInputFormat.class);
     job.setMapperClass(FG.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(Generator.SelectorEntry.class);
     job.setPartitionerClass(PartitionUrlByHost.class);
     job.setReducerClass(FG.class);
     String segName = Generator.generateSegmentName();
Index: src/java/org/apache/nutch/crawl/Generator.java
===================================================================
--- src/java/org/apache/nutch/crawl/Generator.java	(revision 612144)
+++ src/java/org/apache/nutch/crawl/Generator.java	(working copy)
@@ -267,10 +267,24 @@
 
     public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException {
       SelectorEntry entry = (SelectorEntry)value;
-      output.collect(entry.url, entry.datum);
+      output.collect(entry.url, entry);
     }
   }
+  
+  public static class PartitionReducer extends MapReduceBase implements Reducer {
 
+    public void reduce(WritableComparable key, Iterator values,
+        OutputCollector output, Reporter reporter) throws IOException {
+      // if using HashComparator, we get only one input key in case of hash collision
+      // so use only URLs from values
+      while (values.hasNext()) {
+        SelectorEntry entry = (SelectorEntry)values.next();
+        output.collect(entry.url, entry.datum);
+      }
+    }
+    
+  }
+
   /** Sort fetch lists by hash of URL. */
   public static class HashComparator extends WritableComparator {
     public HashComparator() {
@@ -286,6 +300,7 @@
     }
 
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      System.out.println("b1=" + new String(b1, s1, l1) + "\tb2=" + new String(b2, s2, l2));
       int hash1 = hash(b1, s1, l1);
       int hash2 = hash(b2, s2, l2);
       return (hash1 < hash2 ? -1 : (hash1 == hash2 ? 0 : 1));
@@ -457,7 +472,10 @@
     job.setInputFormat(SequenceFileInputFormat.class);
 
     job.setMapperClass(SelectorInverseMapper.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(SelectorEntry.class);
     job.setPartitionerClass(PartitionUrlByHost.class);
+    job.setReducerClass(PartitionReducer.class);
     job.setNumReduceTasks(numLists);
 
     job.setOutputPath(output);