Index: src/java/org/apache/nutch/crawl/IpAddressResolver.java
===================================================================
--- src/java/org/apache/nutch/crawl/IpAddressResolver.java	(revision 0)
+++ src/java/org/apache/nutch/crawl/IpAddressResolver.java	(revision 0)
@@ -0,0 +1,218 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.crawl;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.URL;
+import java.util.Random;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapFileOutputFormat;
+import org.apache.hadoop.mapred.MapRunnable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.util.NutchConfiguration;
+
+/**
+ * Multithreaded IP Resolver
+ */
+public class IpAddressResolver implements MapRunnable {
+
+  public static final Logger LOG =
+      LogFormatter.getLogger(IpAddressResolver.class.getName());
+
+  private static final String NUM_OF_THREADS = "numOfThreads";
+
+  private JobConf job;
+
+  
+  private RecordReader input;
+  private OutputCollector output;
+  private Reporter reporter;
+
+  private int activeThreads = 0;
+  private long start = System.currentTimeMillis();
+  private long lastRequestStart;
+  private int processedUrls;
+  private long resolvedIps;
+  private long errors;
+
+  public void configure(JobConf job) {
+    this.job = job;
+  }
+
+  public void close() throws IOException {}
+
+  public void run(RecordReader input, OutputCollector output, Reporter reporter) throws IOException {
+    this.input = input;
+    this.output = output;
+    this.reporter = reporter;
+
+    int numOfThreads = this.job.getInt(NUM_OF_THREADS, 200);
+    long timeout = this.job.getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
+
+    LOG.info("Resolver: threads: " + numOfThreads);
+    for (int i = 0; i < numOfThreads; i++) {
+      new ResolveThread().start();
+    }
+
+    do {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {}
+      reportStatus();
+      synchronized (this) {
+        if ((System.currentTimeMillis() - lastRequestStart) > timeout) {
+          LOG.warning("Aborting with " + activeThreads + " hung threads.");
+          return;
+        }
+      }
+
+    } while (activeThreads > 0);
+    reportStatus(); // a final status report
+  }
+
+  class ResolveThread extends Thread {
+
+    public void run() {
+      synchronized (IpAddressResolver.this) {
+        activeThreads++;
+      }
+      try {
+        UTF8 key = new UTF8();
+        CrawlDatum datum = new CrawlDatum();
+        while (true) {
+          try {
+            if (!input.next(key, datum)) {
+              break;
+            }
+          } catch (IOException e) {
+            e.printStackTrace();
+            LOG.severe("resolver caught:" + StringUtils.stringifyException(e));
+            break;
+          }
+
+          lastRequestStart = System.currentTimeMillis();
+
+          if (datum.getIpAddress() == null) {
+            try {
+              String host = new URL(key.toString()).getHost();
+              InetAddress byName = InetAddress.getByName(host);
+              datum.setIpAddress(byName.getAddress());
+              resolvedIps++;
+            } catch (Exception e) {
+              LOG.info(StringUtils.stringifyException(e));
+              errors++;
+            }
+          }
+          // we collect the url - datum in case to not loose the record.
+          processedUrls++;
+          output.collect(key, datum);
+        }
+      } catch (Exception e) {
+        LOG.info(StringUtils.stringifyException(e));
+      } finally {
+        synchronized (IpAddressResolver.this) {
+          activeThreads--;
+        }
+      }
+    }
+  }
+
+  private void reportStatus() throws IOException {
+    String status;
+    synchronized (this) {
+      long elapsed = (System.currentTimeMillis() - start) / 1000;
+      status =
+          processedUrls
+              + " processed urls, "
+              + resolvedIps
+              + " resolved Ips, "
+              + errors
+              + " errors, "
+              + (resolvedIps / elapsed)
+              + " IP/sec";
+    }
+    reporter.setStatus(status);
+  }
+
+  private static void resolve(
+      Path crawlDb,
+      Configuration config,
+      int numOfThreads) throws IOException {
+    LOG.info("IpAddress Resolver: starting");
+
+    Path newCrawlDb =
+        new Path(
+            crawlDb,
+            Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+
+    JobConf job = new JobConf(config);
+    job.setInt(NUM_OF_THREADS, numOfThreads);
+    job.addInputPath(new Path(crawlDb, CrawlDatum.DB_DIR_NAME));
+    job.setInputFormat(SequenceFileInputFormat.class);
+    job.setInputKeyClass(UTF8.class);
+    job.setInputValueClass(CrawlDatum.class);
+
+    job.setMapRunnerClass(IpAddressResolver.class);
+
+    job.setOutputPath(newCrawlDb);
+    job.setOutputFormat(MapFileOutputFormat.class);
+    job.setOutputKeyClass(UTF8.class);
+    job.setOutputValueClass(CrawlDatum.class);
+
+    JobClient.runJob(job);
+
+    // replacing new crawlDb with existing crawlDb
+    FileSystem fs = new JobClient(job).getFs();
+    Path old = new Path(crawlDb, "old");
+    Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
+    fs.delete(old);
+    fs.rename(current, old);
+    fs.rename(newCrawlDb, current);
+    fs.delete(old);
+    LOG.info("IpAddress Resolver: done");
+  }
+
+  public static void main(String[] args) throws IOException {
+    String usage = "Usage: <crawldb> <numOfThreads>";
+    if (args.length < 2) {
+      System.err.println(usage);
+      return;
+    }
+    try {
+      resolve(
+          new Path(args[0]),
+          NutchConfiguration.create(),
+          Integer.parseInt(args[1]));
+    } catch (Exception e) {
+      System.err.println(e.toString());
+      System.err.println(usage);
+    }
+  }
+}
\ No newline at end of file
Index: src/java/org/apache/nutch/crawl/CrawlDatum.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDatum.java	(revision 411768)
+++ src/java/org/apache/nutch/crawl/CrawlDatum.java	(working copy)
@@ -17,6 +17,8 @@
 package org.apache.nutch.crawl;
 
 import java.io.*;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
 import java.util.*;
 
 import org.apache.hadoop.io.*;
@@ -30,7 +32,7 @@
   public static final String FETCH_DIR_NAME = "crawl_fetch";
   public static final String PARSE_DIR_NAME = "crawl_parse";
 
-  private final static byte CUR_VERSION = 4;
+  private final static byte CUR_VERSION = 5;
 
   public static final byte STATUS_SIGNATURE = 0;
   public static final byte STATUS_DB_UNFETCHED = 1;
@@ -61,6 +63,7 @@
   private float score = 1.0f;
   private byte[] signature = null;
   private long modifiedTime;
+  private byte[] ipAddress = null;
   private MapWritable metaData;
 
   public CrawlDatum() {}
@@ -129,6 +132,13 @@
     return this.metaData;
   }
   
+  public void setIpAddress(byte[] ipAddress) {
+    this.ipAddress = ipAddress;
+  }
+  
+  public byte[] getIpAddress() {
+    return ipAddress;
+  }
 
   //
   // writable methods
@@ -173,6 +183,13 @@
         }
       }
     }
+    ipAddress = null; // reset it in any case
+    if (version > 4) {
+      if (in.readBoolean()) {
+        ipAddress = new byte[4];
+        in.readFully(ipAddress);
+      } 
+    }
   }
 
   /** The number of bytes into a CrawlDatum that the score is stored. */
@@ -199,6 +216,12 @@
     } else {
       out.writeBoolean(false);
     }
+    if (ipAddress != null) {
+      out.writeBoolean(true);
+      out.write(ipAddress);
+    } else {
+      out.writeBoolean(false);
+    }
   }
 
   /** Copy the contents of another instance into this instance. */
@@ -211,6 +234,7 @@
     this.modifiedTime = that.modifiedTime;
     this.signature = that.signature;
     this.metaData = that.metaData;
+    this.ipAddress = that.ipAddress;
   }
 
 
@@ -291,6 +315,13 @@
     buf.append("Retry interval: " + getFetchInterval() + " days\n");
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
+    if(this.ipAddress.length != 0){
+      try {
+        buf.append("Ip: " + InetAddress.getByAddress(this.ipAddress).toString());
+      } catch (UnknownHostException e) {
+        // just ignore
+      }
+    }
     buf.append("Metadata: " + (metaData != null ? metaData.toString() : "null") + "\n");
     return buf.toString();
   }
@@ -306,7 +337,8 @@
       (this.retries == other.retries) &&
       (this.fetchInterval == other.fetchInterval) &&
       (SignatureComparator._compare(this.signature, other.signature) == 0) &&
-      (this.score == other.score);
+      (this.score == other.score) &&
+      (this.ipAddress == other.ipAddress);
     if (!res) return res;
     // allow zero-sized metadata to be equal to null metadata
     if (this.metaData == null) {
Index: src/java/org/apache/nutch/crawl/Generator.java
===================================================================
--- src/java/org/apache/nutch/crawl/Generator.java	(revision 411768)
+++ src/java/org/apache/nutch/crawl/Generator.java	(working copy)
@@ -136,7 +136,12 @@
           host = host.toLowerCase();
           if (byIP) {
             try {
-              InetAddress ia = InetAddress.getByName(host);
+              InetAddress ia;
+              if(entry.datum.getIpAddress() != null ){
+                ia = InetAddress.getByAddress(entry.datum.getIpAddress());
+              } else {
+                ia = InetAddress.getByName(host);
+              }
               host = ia.getHostAddress();
             } catch (UnknownHostException uhe) {
               LOG.fine("DNS lookup failed: " + host + ", skipping.");
