Index: src/java/org/apache/nutch/api/NutchServer.java
===================================================================
--- src/java/org/apache/nutch/api/NutchServer.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/NutchServer.java	(working copy)
@@ -165,7 +165,8 @@
    * Safety and convenience method to determine whether or not it is safe to
    * shut down the server. We make this assertion by consulting the
    * {@link org.apache.nutch.api.NutchApp#jobManager} for a list of jobs with
-   * {@link org.apache.nutch.api.model.response.JobInfo#state} equal to 'RUNNING'.
+   * {@link org.apache.nutch.api.model.response.JobInfo#state} equal to
+   * 'RUNNING'.
    * 
    * @param force
    *          ignore running tasks
Index: src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java
===================================================================
--- src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java	(working copy)
@@ -103,7 +103,7 @@
 
   public JobInfo getInfo(String jobId) {
     for (JobInfo jobInfo : getAllJobs()) {
-      if(StringUtils.equals(jobId, jobInfo.getId())){
+      if (StringUtils.equals(jobId, jobInfo.getId())) {
         return jobInfo;
       }
     }
Index: src/java/org/apache/nutch/api/impl/RAMConfManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMConfManager.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/RAMConfManager.java	(working copy)
@@ -89,7 +89,7 @@
     if (!canCreate(nutchConfig)) {
       throw new IllegalArgumentException("Config already exists.");
     }
-    
+
     createHadoopConfig(nutchConfig);
     return nutchConfig.getConfigId();
   }
Index: src/java/org/apache/nutch/api/impl/RAMJobManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMJobManager.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/RAMJobManager.java	(working copy)
@@ -80,9 +80,10 @@
 
   private NutchTool createTool(JobConfig jobConfig, Configuration conf) {
     if (StringUtils.isNotBlank(jobConfig.getJobClassName())) {
-      return jobFactory.createToolByClassName(jobConfig.getJobClassName(), conf);
+      return jobFactory
+          .createToolByClassName(jobConfig.getJobClassName(), conf);
     }
-    
+
     return jobFactory.createToolByType(jobConfig.getType(), conf);
   }
 
Index: src/java/org/apache/nutch/api/impl/db/DbIterator.java
===================================================================
--- src/java/org/apache/nutch/api/impl/db/DbIterator.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/db/DbIterator.java	(working copy)
@@ -100,7 +100,8 @@
   }
 
   private Map<String, Object> pageAsMap(String url, WebPage page) {
-    Map<String, Object> result = DbPageConverter.convertPage(page, commonFields);
+    Map<String, Object> result = DbPageConverter
+        .convertPage(page, commonFields);
 
     if (CollectionUtils.isEmpty(commonFields) || commonFields.contains("url")) {
       result.put("url", TableUtil.unreverseUrl(url));
Index: src/java/org/apache/nutch/api/impl/db/DbPageConverter.java
===================================================================
--- src/java/org/apache/nutch/api/impl/db/DbPageConverter.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/db/DbPageConverter.java	(working copy)
@@ -103,7 +103,7 @@
     if (CollectionUtils.isEmpty(queryFields)) {
       return Sets.newHashSet(pageFields);
     }
-    
+
     Set<Field> filteredFields = Sets.newLinkedHashSet();
     for (Field field : pageFields) {
       if (queryFields.contains(field.name())) {
Index: src/java/org/apache/nutch/api/impl/package-info.java
===================================================================
--- src/java/org/apache/nutch/api/impl/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Implementations of REST API interfaces.
  */
 package org.apache.nutch.api.impl;
+
Index: src/java/org/apache/nutch/api/model/response/NutchStatus.java
===================================================================
--- src/java/org/apache/nutch/api/model/response/NutchStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/model/response/NutchStatus.java	(working copy)
@@ -54,18 +54,16 @@
     this.jobs = jobs;
   }
 
-  public Collection<JobInfo> getRunningJobs()
-  {
+  public Collection<JobInfo> getRunningJobs() {
     return purgeFinishedFailedJobs(runningJobs);
   }
 
-
   public void setRunningJobs(Collection<JobInfo> runningJobs) {
     this.runningJobs = runningJobs;
   }
 
-  private Collection<JobInfo> purgeFinishedFailedJobs(Collection<JobInfo> runningJobColl)
-  {
+  private Collection<JobInfo> purgeFinishedFailedJobs(
+      Collection<JobInfo> runningJobColl) {
     if (CollectionUtils.isNotEmpty(runningJobColl)) {
       Iterator<JobInfo> runningJobsIterator = runningJobColl.iterator();
       while (runningJobsIterator.hasNext()) {
@@ -73,8 +71,7 @@
 
         if (jobInfo.getState().equals(State.FINISHED)) {
           runningJobsIterator.remove();
-        }
-        else if (jobInfo.getState().equals(State.FAILED)) {
+        } else if (jobInfo.getState().equals(State.FAILED)) {
           runningJobsIterator.remove();
         }
 
Index: src/java/org/apache/nutch/api/package-info.java
===================================================================
--- src/java/org/apache/nutch/api/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * REST API to run and control crawl jobs.
  */
 package org.apache.nutch.api;
+
Index: src/java/org/apache/nutch/api/resources/SeedResource.java
===================================================================
--- src/java/org/apache/nutch/api/resources/SeedResource.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/resources/SeedResource.java	(working copy)
@@ -43,7 +43,8 @@
 
 @Path("/seed")
 public class SeedResource extends AbstractResource {
-  private static final Logger log = LoggerFactory.getLogger(AdminResource.class);
+  private static final Logger log = LoggerFactory
+      .getLogger(AdminResource.class);
 
   @POST
   @Path("/create")
@@ -101,8 +102,8 @@
 
   private RuntimeException handleException(Exception e) {
     log.error("Cannot create seed file!", e);
-    return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR).entity(
-        "Cannot create seed file!").build());
+    return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR)
+        .entity("Cannot create seed file!").build());
   }
 
 }
Index: src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(working copy)
@@ -29,13 +29,13 @@
 /**
  * This class provides common methods for implementations of
  * {@link FetchSchedule}.
- *
+ * 
  * @author Andrzej Bialecki
  */
-public abstract class AbstractFetchSchedule
-extends Configured
-implements FetchSchedule {
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractFetchSchedule.class);
+public abstract class AbstractFetchSchedule extends Configured implements
+    FetchSchedule {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AbstractFetchSchedule.class);
 
   protected int defaultInterval;
   protected int maxInterval;
@@ -59,20 +59,22 @@
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     defaultInterval = conf.getInt("db.fetch.interval.default", 0);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
     LOG.info("defaultInterval=" + defaultInterval);
     LOG.info("maxInterval=" + maxInterval);
   }
-  
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation sets the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation sets the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   @Override
@@ -84,27 +86,31 @@
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page. NOTE: this implementation resets the
-   * retry counter - extending classes should call super.setFetchSchedule() to
+   * successfully fetched page. NOTE: this implementation resets the retry
+   * counter - extending classes should call super.setFetchSchedule() to
    * preserve this behavior.
    */
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     page.setRetriesSinceFetch(0);
   }
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%
-   * but the value may never exceed <code>maxInterval</code>.
-   * @param url URL of the page
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50% but the value may
+   * never exceed <code>maxInterval</code>.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   @Override
   public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
@@ -121,19 +127,23 @@
   }
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases
-   * the retry counter.
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   @Override
   public void setPageRetrySchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
     page.setFetchTime(fetchTime + SECONDS_PER_DAY * 1000L);
     page.setRetriesSinceFetch(page.getRetriesSinceFetch() + 1);
   }
@@ -140,6 +150,7 @@
 
   /**
    * This method return the last fetch time of the CrawlDatum
+   * 
    * @return the date as a long.
    */
   @Override
@@ -148,20 +159,24 @@
   }
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
    * @param page
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   @Override
   public boolean shouldFetch(String url, WebPage page, long curTime) {
@@ -181,11 +196,14 @@
   /**
    * This method resets fetchTime, fetchInterval, modifiedTime,
    * retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   @Override
   public void forceRefetch(String url, WebPage page, boolean asap) {
@@ -196,10 +214,10 @@
     page.setRetriesSinceFetch(0);
     // TODO: row.setSignature(null) ??
     page.setModifiedTime(0L);
-    if (asap) page.setFetchTime(System.currentTimeMillis());
+    if (asap)
+      page.setFetchTime(System.currentTimeMillis());
   }
 
-
   public Set<WebPage.Field> getFields() {
     return FIELDS;
   }
Index: src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(working copy)
@@ -30,11 +30,12 @@
  * If SYNC_DELTA property is true, then:
  * <ul>
  * <li>calculate a <code>delta = fetchTime - modifiedTime</code></li>
- * <li>try to synchronize with the time of change, by shifting the next fetchTime
- * by a fraction of the difference between the last modification time and the last
- * fetch time. I.e. the next fetch time will be set to
+ * <li>try to synchronize with the time of change, by shifting the next
+ * fetchTime by a fraction of the difference between the last modification time
+ * and the last fetch time. I.e. the next fetch time will be set to
  * <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li>
- * <li>if the adjusted fetch interval is bigger than the delta, then <code>fetchInterval = delta</code>.</li>
+ * <li>if the adjusted fetch interval is bigger than the delta, then
+ * <code>fetchInterval = delta</code>.</li>
  * </ul>
  * </li>
  * <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL
@@ -42,10 +43,13 @@
  * <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL
  * (default is 365 days).</li>
  * </ul>
- * <p>NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize the algorithm,
- * so that the fetch interval either increases or decreases infinitely, with little
- * relevance to the page changes. Please use {@link #main(String[])} method to
- * test the values before applying them in a production system.</p>
+ * <p>
+ * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
+ * the algorithm, so that the fetch interval either increases or decreases
+ * infinitely, with little relevance to the page changes. Please use
+ * {@link #main(String[])} method to test the values before applying them in a
+ * production system.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
@@ -58,51 +62,57 @@
   private int MAX_INTERVAL;
 
   private int MIN_INTERVAL;
-  
+
   private boolean SYNC_DELTA;
 
   private double SYNC_DELTA_RATE;
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
     DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
     MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60);
-    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval", SECONDS_PER_DAY * 365 ); // 1 year
+    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval",
+        SECONDS_PER_DAY * 365); // 1 year
     SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
-    SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
+    SYNC_DELTA_RATE = conf.getFloat(
+        "db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
   }
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     long refTime = fetchTime;
-    if (modifiedTime <= 0) modifiedTime = fetchTime;
+    if (modifiedTime <= 0)
+      modifiedTime = fetchTime;
     int interval = page.getFetchInterval();
     switch (state) {
-      case FetchSchedule.STATUS_MODIFIED:
-        interval *= (1.0f - DEC_RATE);
-        break;
-      case FetchSchedule.STATUS_NOTMODIFIED:
-        interval *= (1.0f + INC_RATE);
-        break;
-      case FetchSchedule.STATUS_UNKNOWN:
-        break;
+    case FetchSchedule.STATUS_MODIFIED:
+      interval *= (1.0f - DEC_RATE);
+      break;
+    case FetchSchedule.STATUS_NOTMODIFIED:
+      interval *= (1.0f + INC_RATE);
+      break;
+    case FetchSchedule.STATUS_UNKNOWN:
+      break;
     }
     if (SYNC_DELTA) {
       // try to synchronize with the time of change
       // TODO: different from normal class (is delta in seconds)?
-      int delta = (int) ((fetchTime - modifiedTime) / 1000L) ;
-      if (delta > interval) interval = delta;
+      int delta = (int) ((fetchTime - modifiedTime) / 1000L);
+      if (delta > interval)
+        interval = delta;
       refTime = fetchTime - Math.round(delta * SYNC_DELTA_RATE);
     }
-    if (interval < MIN_INTERVAL) interval = MIN_INTERVAL;
-    if (interval > MAX_INTERVAL) interval = MAX_INTERVAL;
-   
+    if (interval < MIN_INTERVAL)
+      interval = MIN_INTERVAL;
+    if (interval > MAX_INTERVAL)
+      interval = MAX_INTERVAL;
+
     page.setFetchInterval(interval);
     page.setFetchTime(refTime + interval * 1000L);
     page.setModifiedTime(modifiedTime);
@@ -109,5 +119,4 @@
     page.setPrevModifiedTime(prevModifiedTime);
   }
 
-
 }
Index: src/java/org/apache/nutch/crawl/CrawlStatus.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/CrawlStatus.java	(working copy)
@@ -21,22 +21,22 @@
 
 public class CrawlStatus {
   /** Page was not fetched yet. */
-  public static final byte STATUS_UNFETCHED      = 0x01;
+  public static final byte STATUS_UNFETCHED = 0x01;
   /** Page was successfully fetched. */
-  public static final byte STATUS_FETCHED        = 0x02;
+  public static final byte STATUS_FETCHED = 0x02;
   /** Page no longer exists. */
-  public static final byte STATUS_GONE           = 0x03;
+  public static final byte STATUS_GONE = 0x03;
   /** Page temporarily redirects to other page. */
-  public static final byte STATUS_REDIR_TEMP     = 0x04;
+  public static final byte STATUS_REDIR_TEMP = 0x04;
   /** Page permanently redirects to other page. */
-  public static final byte STATUS_REDIR_PERM     = 0x05;
+  public static final byte STATUS_REDIR_PERM = 0x05;
   /** Fetching unsuccessful, needs to be retried (transient errors). */
-  public static final byte STATUS_RETRY          = 0x22;
+  public static final byte STATUS_RETRY = 0x22;
   /** Fetching successful - page is not modified. */
-  public static final byte STATUS_NOTMODIFIED    = 0x26;
-  
+  public static final byte STATUS_NOTMODIFIED = 0x26;
+
   private static final Map<Byte, String> NAMES = new HashMap<Byte, String>();
-  
+
   static {
     NAMES.put(STATUS_UNFETCHED, "status_unfetched");
     NAMES.put(STATUS_FETCHED, "status_fetched");
@@ -46,9 +46,9 @@
     NAMES.put(STATUS_RETRY, "status_retry");
     NAMES.put(STATUS_NOTMODIFIED, "status_notmodified");
   }
-  
+
   public static String getName(byte status) {
     return NAMES.get(status);
   }
- 
+
 }
Index: src/java/org/apache/nutch/crawl/DbUpdateMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(working copy)
@@ -37,8 +37,8 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.apache.gora.mapreduce.GoraMapper;
 
-public class DbUpdateMapper
-extends GoraMapper<String, WebPage, UrlWithScore, NutchWritable> {
+public class DbUpdateMapper extends
+    GoraMapper<String, WebPage, UrlWithScore, NutchWritable> {
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private ScoringFilters scoringFilters;
@@ -46,8 +46,8 @@
   private final List<ScoreDatum> scoreData = new ArrayList<ScoreDatum>();
 
   private Utf8 batchId;
-  
-  //reuse writables
+
+  // reuse writables
   private UrlWithScore urlWithScore = new UrlWithScore();
   private NutchWritable nutchWritable = new NutchWritable();
   private WebPageWritable pageWritable;
@@ -54,14 +54,15 @@
 
   @Override
   public void map(String key, WebPage page, Context context)
-  throws IOException, InterruptedException {
-   if(Mark.GENERATE_MARK.checkMark(page) == null) {
+      throws IOException, InterruptedException {
+    if (Mark.GENERATE_MARK.checkMark(page) == null) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; not generated yet");
+        LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+            + "; not generated yet");
       }
       return;
     }
-  
+
     String url = TableUtil.unreverseUrl(key);
 
     scoreData.clear();
@@ -68,20 +69,22 @@
     Map<CharSequence, CharSequence> outlinks = page.getOutlinks();
     if (outlinks != null) {
       for (Entry<CharSequence, CharSequence> e : outlinks.entrySet()) {
-                int depth=Integer.MAX_VALUE;
+        int depth = Integer.MAX_VALUE;
         CharSequence depthUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
-        if (depthUtf8 != null) depth=Integer.parseInt(depthUtf8.toString());
-        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), 
-            e.getValue().toString(), depth));
+        if (depthUtf8 != null)
+          depth = Integer.parseInt(depthUtf8.toString());
+        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), e.getValue()
+            .toString(), depth));
       }
     }
 
     // TODO: Outlink filtering (i.e. "only keep the first n outlinks")
     try {
-      scoringFilters.distributeScoreToOutlinks(url, page, scoreData, (outlinks == null ? 0 : outlinks.size()));
+      scoringFilters.distributeScoreToOutlinks(url, page, scoreData,
+          (outlinks == null ? 0 : outlinks.size()));
     } catch (ScoringFilterException e) {
-      LOG.warn("Distributing score failed for URL: " + key +
-          " exception:" + StringUtils.stringifyException(e));
+      LOG.warn("Distributing score failed for URL: " + key + " exception:"
+          + StringUtils.stringifyException(e));
     }
 
     urlWithScore.setUrl(key);
@@ -104,7 +107,8 @@
   public void setup(Context context) {
     scoringFilters = new ScoringFilters(context.getConfiguration());
     pageWritable = new WebPageWritable(context.getConfiguration(), null);
-    batchId = new Utf8(context.getConfiguration().get(Nutch.BATCH_NAME_KEY,Nutch.ALL_BATCH_ID_STR));
+    batchId = new Utf8(context.getConfiguration().get(Nutch.BATCH_NAME_KEY,
+        Nutch.ALL_BATCH_ID_STR));
   }
 
 }
Index: src/java/org/apache/nutch/crawl/DbUpdateReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(working copy)
@@ -37,11 +37,11 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.slf4j.Logger;
 
-public class DbUpdateReducer
-extends GoraReducer<UrlWithScore, NutchWritable, String, WebPage> {
+public class DbUpdateReducer extends
+    GoraReducer<UrlWithScore, NutchWritable, String, WebPage> {
 
-  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";	
-	
+  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";
+
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private int retryMax;
@@ -53,11 +53,12 @@
   private int maxLinks;
 
   @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
     retryMax = conf.getInt("db.fetch.retry.max", 3);
     additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);
     maxLinks = conf.getInt("db.update.max.inlinks", 10000);
@@ -70,7 +71,7 @@
 
     WebPage page = null;
     inlinkedScoreData.clear();
-    
+
     for (NutchWritable nutchWritable : values) {
       Writable val = nutchWritable.get();
       if (val instanceof WebPageWritable) {
@@ -108,10 +109,10 @@
     } else {
       byte status = page.getStatus().byteValue();
       switch (status) {
-      case CrawlStatus.STATUS_FETCHED:         // succesful fetch
-      case CrawlStatus.STATUS_REDIR_TEMP:      // successful fetch, redirected
+      case CrawlStatus.STATUS_FETCHED: // succesful fetch
+      case CrawlStatus.STATUS_REDIR_TEMP: // successful fetch, redirected
       case CrawlStatus.STATUS_REDIR_PERM:
-      case CrawlStatus.STATUS_NOTMODIFIED:     // successful fetch, notmodified
+      case CrawlStatus.STATUS_NOTMODIFIED: // successful fetch, notmodified
         int modified = FetchSchedule.STATUS_UNKNOWN;
         if (status == CrawlStatus.STATUS_NOTMODIFIED) {
           modified = FetchSchedule.STATUS_NOTMODIFIED;
@@ -129,8 +130,9 @@
         long prevFetchTime = page.getPrevFetchTime();
         long modifiedTime = page.getModifiedTime();
         long prevModifiedTime = page.getPrevModifiedTime();
-        CharSequence lastModified = page.getHeaders().get(new Utf8("Last-Modified"));
-        if ( lastModified != null ){
+        CharSequence lastModified = page.getHeaders().get(
+            new Utf8("Last-Modified"));
+        if (lastModified != null) {
           try {
             modifiedTime = HttpDateFormat.toLong(lastModified.toString());
             prevModifiedTime = page.getModifiedTime();
@@ -143,15 +145,17 @@
           schedule.forceRefetch(url, page, false);
         break;
       case CrawlStatus.STATUS_RETRY:
-        schedule.setPageRetrySchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
+        schedule.setPageRetrySchedule(url, page, 0L,
+            page.getPrevModifiedTime(), page.getFetchTime());
         if (page.getRetriesSinceFetch() < retryMax) {
-          page.setStatus((int)CrawlStatus.STATUS_UNFETCHED);
+          page.setStatus((int) CrawlStatus.STATUS_UNFETCHED);
         } else {
-          page.setStatus((int)CrawlStatus.STATUS_GONE);
+          page.setStatus((int) CrawlStatus.STATUS_GONE);
         }
         break;
       case CrawlStatus.STATUS_GONE:
-        schedule.setPageGoneSchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
+        schedule.setPageGoneSchedule(url, page, 0L, page.getPrevModifiedTime(),
+            page.getFetchTime());
         break;
       }
     }
@@ -159,27 +163,31 @@
     if (page.getInlinks() != null) {
       page.getInlinks().clear();
     }
-    
+
     // Distance calculation.
     // Retrieve smallest distance from all inlinks distances
     // Calculate new distance for current page: smallest inlink distance plus 1.
-    // If the new distance is smaller than old one (or if old did not exist yet),
+    // If the new distance is smaller than old one (or if old did not exist
+    // yet),
     // write it to the page.
-    int smallestDist=Integer.MAX_VALUE;
+    int smallestDist = Integer.MAX_VALUE;
     for (ScoreDatum inlink : inlinkedScoreData) {
       int inlinkDist = inlink.getDistance();
       if (inlinkDist < smallestDist) {
-        smallestDist=inlinkDist;
+        smallestDist = inlinkDist;
       }
-      page.getInlinks().put(new Utf8(inlink.getUrl()), new Utf8(inlink.getAnchor()));
+      page.getInlinks().put(new Utf8(inlink.getUrl()),
+          new Utf8(inlink.getAnchor()));
     }
     if (smallestDist != Integer.MAX_VALUE) {
-      int oldDistance=Integer.MAX_VALUE;
+      int oldDistance = Integer.MAX_VALUE;
       CharSequence oldDistUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
-      if (oldDistUtf8 != null)oldDistance=Integer.parseInt(oldDistUtf8.toString());
-      int newDistance = smallestDist+1;
+      if (oldDistUtf8 != null)
+        oldDistance = Integer.parseInt(oldDistUtf8.toString());
+      int newDistance = smallestDist + 1;
       if (newDistance < oldDistance) {
-        page.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(Integer.toString(newDistance)));
+        page.getMarkers().put(DbUpdaterJob.DISTANCE,
+            new Utf8(Integer.toString(newDistance)));
       }
     }
 
@@ -186,8 +194,8 @@
     try {
       scoringFilters.updateScore(url, page, inlinkedScoreData);
     } catch (ScoringFilterException e) {
-      LOG.warn("Scoring filters failed with exception " +
-                StringUtils.stringifyException(e));
+      LOG.warn("Scoring filters failed with exception "
+          + StringUtils.stringifyException(e));
     }
 
     // clear markers
Index: src/java/org/apache/nutch/crawl/DbUpdaterJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(working copy)
@@ -48,10 +48,8 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(DbUpdaterJob.class);
 
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-  private static final Collection<WebPage.Field> FIELDS =
-    new HashSet<WebPage.Field>();
-
   static {
     FIELDS.add(WebPage.Field.OUTLINKS);
     FIELDS.add(WebPage.Field.INLINKS);
@@ -78,35 +76,35 @@
   public DbUpdaterJob(Configuration conf) {
     setConf(conf);
   }
-    
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String crawlId = (String)args.get(Nutch.ARG_CRAWL);
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
+
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String crawlId = (String) args.get(Nutch.ARG_CRAWL);
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
     numJobs = 1;
     currentJobNum = 0;
-    
+
     if (batchId == null) {
       batchId = Nutch.ALL_BATCH_ID_STR;
     }
     getConf().set(Nutch.BATCH_NAME_KEY, batchId);
-    //job.setBoolean(ALL, updateAll);
+    // job.setBoolean(ALL, updateAll);
     ScoringFilters scoringFilters = new ScoringFilters(getConf());
     HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
     fields.addAll(scoringFilters.getFields());
-    
+
     currentJob = new NutchJob(getConf(), "update-table");
     if (crawlId != null) {
       currentJob.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
     }
-    
+
     // Partition by {url}, sort by {url,score} and group by {url}.
     // This ensures that the inlinks are sorted by score when they enter
     // the reducer.
-    
+
     currentJob.setPartitionerClass(UrlOnlyPartitioner.class);
     currentJob.setSortComparatorClass(UrlScoreComparator.class);
     currentJob.setGroupingComparatorClass(UrlOnlyComparator.class);
-    
+
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
     StorageUtils.initMapperJob(currentJob, fields, UrlWithScore.class,
         NutchWritable.class, DbUpdateMapper.class, batchIdFilter);
@@ -129,22 +127,22 @@
     return filter;
   }
 
-  private int updateTable(String crawlId,String batchId) throws Exception {
-    
+  private int updateTable(String crawlId, String batchId) throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("DbUpdaterJob: starting at " + sdf.format(start));
-    
+
     if (batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
       LOG.info("DbUpdaterJob: updatinging all");
     } else {
       LOG.info("DbUpdaterJob: batchId: " + batchId);
     }
-    run(ToolUtil.toArgMap(Nutch.ARG_CRAWL, crawlId,
-            Nutch.ARG_BATCH, batchId));
-    
+    run(ToolUtil.toArgMap(Nutch.ARG_CRAWL, crawlId, Nutch.ARG_BATCH, batchId));
+
     long finish = System.currentTimeMillis();
-    LOG.info("DbUpdaterJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("DbUpdaterJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
     return 0;
   }
 
@@ -152,9 +150,9 @@
     String crawlId = null;
     String batchId;
 
-    String usage = "Usage: DbUpdaterJob (<batchId> | -all) [-crawlId <id>] " +
-            "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
-            "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n";
+    String usage = "Usage: DbUpdaterJob (<batchId> | -all) [-crawlId <id>] "
+        + "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n"
+        + "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -171,14 +169,15 @@
       if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
       } else {
-        throw new IllegalArgumentException("arg " +args[i]+ " not recognized");
+        throw new IllegalArgumentException("arg " + args[i] + " not recognized");
       }
     }
-    return updateTable(crawlId,batchId);
+    return updateTable(crawlId, batchId);
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(working copy)
@@ -20,19 +20,18 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This class implements the default re-fetch schedule. That is, no matter
- * if the page was changed or not, the <code>fetchInterval</code> remains
+ * This class implements the default re-fetch schedule. That is, no matter if
+ * the page was changed or not, the <code>fetchInterval</code> remains
  * unchanged, and the updated page fetchTime will always be set to
  * <code>fetchTime + fetchInterval * 1000</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class DefaultFetchSchedule extends AbstractFetchSchedule {
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     page.setFetchTime(fetchTime + page.getFetchInterval() * 1000L);
Index: src/java/org/apache/nutch/crawl/FetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/FetchSchedule.java	(working copy)
@@ -24,29 +24,30 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This interface defines the contract for implementations that manipulate
- * fetch times and re-fetch intervals.
- *
+ * This interface defines the contract for implementations that manipulate fetch
+ * times and re-fetch intervals.
+ * 
  * @author Andrzej Bialecki
  */
 public interface FetchSchedule extends Configurable {
 
   /** It is unknown whether page was changed since our last visit. */
-  public static final int STATUS_UNKNOWN       = 0;
+  public static final int STATUS_UNKNOWN = 0;
   /** Page is known to have been modified since our last visit. */
-  public static final int STATUS_MODIFIED      = 1;
+  public static final int STATUS_MODIFIED = 1;
   /** Page is known to remain unmodified since our last visit. */
-  public static final int STATUS_NOTMODIFIED    = 2;
+  public static final int STATUS_NOTMODIFIED = 2;
 
   public static final int SECONDS_PER_DAY = 3600 * 24;
 
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation set the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation set the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   public void initializeSchedule(String url, WebPage page);
@@ -53,50 +54,67 @@
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page.
-   * Implementations may use supplied arguments to support different re-fetching
-   * schedules.
-   *
-   * @param url url of the page
+   * successfully fetched page. Implementations may use supplied arguments to
+   * support different re-fetching schedules.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param prevFetchTime previous value of fetch time, or -1 if not available
-   * @param prevModifiedTime previous value of modifiedTime, or -1 if not available
-   * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@param datum} to something greater than this value.
-   * @param modifiedTime last time the content was modified. This information comes from
-   * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@param datum} to this value.
-   * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
-   * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
-   * This information may be obtained by comparing page signatures before and after fetching. If this
-   * is set to {@link #STATUS_UNKNOWN}, then it is unknown whether the page was changed; implementations
-   * are free to follow a sensible default behavior.
+   * @param prevFetchTime
+   *          previous value of fetch time, or -1 if not available
+   * @param prevModifiedTime
+   *          previous value of modifiedTime, or -1 if not available
+   * @param fetchTime
+   *          the latest time, when the page was recently re-fetched. Most
+   *          FetchSchedule implementations should update the value in
+   * @param datum
+   *          to something greater than this value.
+   * @param modifiedTime
+   *          last time the content was modified. This information comes from
+   *          the protocol implementations, or is set to < 0 if not available.
+   *          Most FetchSchedule implementations should update the value in
+   * @param datum
+   *          to this value.
+   * @param state
+   *          if {@link #STATUS_MODIFIED}, then the content is considered to be
+   *          "changed" before the <code>fetchTime</code>, if
+   *          {@link #STATUS_NOTMODIFIED} then the content is known to be
+   *          unchanged. This information may be obtained by comparing page
+   *          signatures before and after fetching. If this is set to
+   *          {@link #STATUS_UNKNOWN}, then it is unknown whether the page was
+   *          changed; implementations are free to follow a sensible default
+   *          behavior.
    */
-  public void setFetchSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime,
-      long fetchTime, long modifiedTime, int state);
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state);
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    */
-  public void setPageGoneSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime, long fetchTime);
+  public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime);
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases the
-   * retry counter.Set
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.Set
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   public void setPageRetrySchedule(String url, WebPage page,
       long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -103,36 +121,45 @@
 
   /**
    * Calculates last fetch time of the given CrawlDatum.
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(WebPage page);
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param row url's row
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param row
+   *          url's row
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(String url, WebPage page, long curTime);
 
   /**
-   * This method resets fetchTime, fetchInterval, modifiedTime and
-   * page signature, so that it forces refetching.
-   * @param url URL of the page
+   * This method resets fetchTime, fetchInterval, modifiedTime and page
+   * signature, so that it forces refetching.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   public void forceRefetch(String url, WebPage row, boolean asap);
 
Index: src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(working copy)
@@ -25,20 +25,23 @@
 /** Creates and caches a {@link FetchSchedule} implementation. */
 public class FetchScheduleFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(FetchScheduleFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FetchScheduleFactory.class);
 
-  private FetchScheduleFactory() {}                   // no public ctor
+  private FetchScheduleFactory() {
+  } // no public ctor
 
   /** Return the FetchSchedule implementation. */
   public static FetchSchedule getFetchSchedule(Configuration conf) {
-    String clazz = conf.get("db.fetch.schedule.class", DefaultFetchSchedule.class.getName());
+    String clazz = conf.get("db.fetch.schedule.class",
+        DefaultFetchSchedule.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    FetchSchedule impl = (FetchSchedule)objectCache.getObject(clazz);
+    FetchSchedule impl = (FetchSchedule) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using FetchSchedule impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (FetchSchedule)implClass.newInstance();
+        impl = (FetchSchedule) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/GeneratorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorJob.java	(working copy)
@@ -74,13 +74,14 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(GeneratorJob.class);
 
-  public static class SelectorEntry
-  implements WritableComparable<SelectorEntry> {
+  public static class SelectorEntry implements
+      WritableComparable<SelectorEntry> {
 
     String url;
     float score;
 
-    public SelectorEntry() {  }
+    public SelectorEntry() {
+    }
 
     public SelectorEntry(String url, float score) {
       this.url = url;
@@ -109,7 +110,7 @@
     public int hashCode() {
       final int prime = 31;
       int result = 1;
-      result = prime * result +  url.hashCode();
+      result = prime * result + url.hashCode();
       result = prime * result + Float.floatToIntBits(score);
       return result;
     }
@@ -126,13 +127,13 @@
 
     /**
      * Sets url with score on this writable. Allows for writable reusing.
-     *
+     * 
      * @param url
      * @param score
      */
     public void set(String url, float score) {
-      this.url=url;
-      this.score=score;
+      this.url = url;
+      this.score = score;
     }
   }
 
@@ -144,7 +145,7 @@
 
   static {
     WritableComparator.define(SelectorEntry.class,
-                              new SelectorEntryComparator());
+        new SelectorEntryComparator());
   }
 
   public GeneratorJob() {
@@ -157,24 +158,25 @@
 
   public Collection<WebPage.Field> getFields(Job job) {
     Collection<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
-    fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration()).getFields());
+    fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration())
+        .getFields());
     return fields;
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
     if (batchId != null) {
       getConf().set(GeneratorJob.BATCH_ID, batchId);
     }
-    
+
     // map to inverted subset due for fetch, sort by score
-    Long topN = (Long)args.get(Nutch.ARG_TOPN);
-    Long curTime = (Long)args.get(Nutch.ARG_CURTIME);
+    Long topN = (Long) args.get(Nutch.ARG_TOPN);
+    Long curTime = (Long) args.get(Nutch.ARG_CURTIME);
     if (curTime == null) {
       curTime = System.currentTimeMillis();
     }
-    Boolean filter = (Boolean)args.get(Nutch.ARG_FILTER);
-    Boolean norm = (Boolean)args.get(Nutch.ARG_NORMALIZE);
+    Boolean filter = (Boolean) args.get(Nutch.ARG_FILTER);
+    Boolean norm = (Boolean) args.get(Nutch.ARG_NORMALIZE);
     // map to inverted subset due for fetch, sort by score
     getConf().setLong(GENERATOR_CUR_TIME, curTime);
     if (topN != null)
@@ -185,15 +187,20 @@
     getConf().setLong(Nutch.GENERATE_TIME_KEY, System.currentTimeMillis());
     if (norm != null)
       getConf().setBoolean(GENERATOR_NORMALISE, norm);
-    String mode = getConf().get(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
+    String mode = getConf().get(GENERATOR_COUNT_MODE,
+        GENERATOR_COUNT_VALUE_HOST);
     if (GENERATOR_COUNT_VALUE_HOST.equalsIgnoreCase(mode)) {
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     } else if (GENERATOR_COUNT_VALUE_DOMAIN.equalsIgnoreCase(mode)) {
-        getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_DOMAIN);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_DOMAIN);
     } else {
-      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode=" + GENERATOR_COUNT_VALUE_HOST);
+      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode="
+          + GENERATOR_COUNT_VALUE_HOST);
       getConf().set(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     }
     numJobs = 1;
     currentJobNum = 0;
@@ -200,7 +207,8 @@
     currentJob = new NutchJob(getConf(), "generate: " + getConf().get(BATCH_ID));
     Collection<WebPage.Field> fields = getFields(currentJob);
     StorageUtils.initMapperJob(currentJob, fields, SelectorEntry.class,
-        WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class, true);
+        WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class,
+        true);
     StorageUtils.initReducerJob(currentJob, GeneratorReducer.class);
     currentJob.waitForCompletion(true);
     ToolUtil.recordJobStatus(null, currentJob, results);
@@ -213,6 +221,7 @@
 
   /**
    * Mark URLs ready for fetching.
+   * 
    * @throws ClassNotFoundException
    * @throws InterruptedException
    * */
@@ -229,16 +238,16 @@
     if (topN != Long.MAX_VALUE) {
       LOG.info("GeneratorJob: topN: " + topN);
     }
-    Map<String,Object> results = run(ToolUtil.toArgMap(
-        Nutch.ARG_TOPN, topN,
-        Nutch.ARG_CURTIME, curTime,
-        Nutch.ARG_FILTER, filter,
+    Map<String, Object> results = run(ToolUtil.toArgMap(Nutch.ARG_TOPN, topN,
+        Nutch.ARG_CURTIME, curTime, Nutch.ARG_FILTER, filter,
         Nutch.ARG_NORMALIZE, norm));
-    String batchId =  getConf().get(BATCH_ID);
+    String batchId = getConf().get(BATCH_ID);
     long finish = System.currentTimeMillis();
     long generateCount = (Long) results.get(GENERATE_COUNT);
-    LOG.info("GeneratorJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
-    LOG.info("GeneratorJob: generated batch id: " + batchId + " containing " + generateCount + " URLs");
+    LOG.info("GeneratorJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("GeneratorJob: generated batch id: " + batchId + " containing "
+        + generateCount + " URLs");
     if (generateCount == 0) {
       return null;
     }
@@ -247,13 +256,20 @@
 
   public int run(String[] args) throws Exception {
     if (args.length <= 0) {
-      System.out.println("Usage: GeneratorJob [-topN N] [-crawlId id] [-noFilter] [-noNorm] [-adddays numDays]");
-      System.out.println("    -topN <N>      - number of top URLs to be selected, default is Long.MAX_VALUE ");
-      System.out.println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\");");
-      System.out.println("    -noFilter      - do not activate the filter plugin to filter the url, default is true ");
-      System.out.println("    -noNorm        - do not activate the normalizer plugin to normalize the url, default is true ");
-      System.out.println("    -adddays       - Adds numDays to the current time to facilitate crawling urls already");
-      System.out.println("                     fetched sooner then db.fetch.interval.default. Default value is 0.");
+      System.out
+          .println("Usage: GeneratorJob [-topN N] [-crawlId id] [-noFilter] [-noNorm] [-adddays numDays]");
+      System.out
+          .println("    -topN <N>      - number of top URLs to be selected, default is Long.MAX_VALUE ");
+      System.out
+          .println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\");");
+      System.out
+          .println("    -noFilter      - do not activate the filter plugin to filter the url, default is true ");
+      System.out
+          .println("    -noNorm        - do not activate the normalizer plugin to normalize the url, default is true ");
+      System.out
+          .println("    -adddays       - Adds numDays to the current time to facilitate crawling urls already");
+      System.out
+          .println("                     fetched sooner then db.fetch.interval.default. Default value is 0.");
       System.out.println("    -batchId       - the batch id ");
       System.out.println("----------------------");
       System.out.println("Please set the params.");
@@ -280,8 +296,8 @@
       } else if ("-adddays".equals(args[i])) {
         long numDays = Integer.parseInt(args[++i]);
         curTime += numDays * 1000L * 60 * 60 * 24;
-      }else if ("-batchId".equals(args[i]))
-        getConf().set(BATCH_ID,args[++i]);
+      } else if ("-batchId".equals(args[i]))
+        getConf().set(BATCH_ID, args[++i]);
       else {
         System.err.println("Unrecognized arg " + args[i]);
         return -1;
@@ -297,7 +313,8 @@
   }
 
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/GeneratorMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorMapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorMapper.java	(working copy)
@@ -34,8 +34,8 @@
 import java.nio.ByteBuffer;
 import java.util.HashMap;
 
-public class GeneratorMapper
-extends GoraMapper<String, WebPage, SelectorEntry, WebPage> {
+public class GeneratorMapper extends
+    GoraMapper<String, WebPage, SelectorEntry, WebPage> {
 
   private URLFilters filters;
   private URLNormalizers normalizers;
@@ -48,8 +48,8 @@
   private int maxDistance;
 
   @Override
-  public void map(String reversedUrl, WebPage page,
-      Context context) throws IOException, InterruptedException {
+  public void map(String reversedUrl, WebPage page, Context context)
+      throws IOException, InterruptedException {
     String url = TableUtil.unreverseUrl(reversedUrl);
 
     if (Mark.GENERATE_MARK.checkMark(page) != null) {
@@ -57,11 +57,11 @@
       return;
     }
 
-    //filter on distance
+    // filter on distance
     if (maxDistance > -1) {
       CharSequence distanceUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
       if (distanceUtf8 != null) {
-        int distance=Integer.parseInt(distanceUtf8.toString());
+        int distance = Integer.parseInt(distanceUtf8.toString());
         if (distance > maxDistance) {
           return;
         }
@@ -71,15 +71,18 @@
     // If filtering is on don't generate URLs that don't pass URLFilters
     try {
       if (normalise) {
-        url = normalizers.normalize(url, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+        url = normalizers.normalize(url,
+            URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
       }
       if (filter && filters.filter(url) == null)
         return;
     } catch (URLFilterException e) {
-      GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", url, e.getMessage());
+      GeneratorJob.LOG
+          .warn("Couldn't filter url: {} ({})", url, e.getMessage());
       return;
     } catch (MalformedURLException e) {
-      GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", url, e.getMessage());
+      GeneratorJob.LOG
+          .warn("Couldn't filter url: {} ({})", url, e.getMessage());
       return;
     }
 
@@ -86,8 +89,8 @@
     // check fetch schedule
     if (!schedule.shouldFetch(url, page, curTime)) {
       if (GeneratorJob.LOG.isDebugEnabled()) {
-        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url + "', fetchTime=" +
-            page.getFetchTime() + ", curTime=" + curTime);
+        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url
+            + "', fetchTime=" + page.getFetchTime() + ", curTime=" + curTime);
       }
       return;
     }
@@ -95,7 +98,7 @@
     try {
       score = scoringFilters.generatorSortValue(url, page, score);
     } catch (ScoringFilterException e) {
-      //ignore
+      // ignore
     }
     entry.set(url, score);
     context.write(entry, page);
@@ -110,10 +113,12 @@
       filters = new URLFilters(conf);
     }
     if (normalise) {
-      normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+      normalizers = new URLNormalizers(conf,
+          URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
     }
-    maxDistance=conf.getInt("generate.max.distance", -1);
-    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME, System.currentTimeMillis());
+    maxDistance = conf.getInt("generate.max.distance", -1);
+    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME,
+        System.currentTimeMillis());
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);
   }
Index: src/java/org/apache/nutch/crawl/GeneratorReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorReducer.java	(working copy)
@@ -34,14 +34,15 @@
 import org.apache.nutch.util.TableUtil;
 import org.apache.nutch.util.URLUtil;
 
-/** Reduce class for generate
- *
- * The #reduce() method write a random integer to all generated URLs. This random
- * number is then used by {@link FetcherMapper}.
- *
+/**
+ * Reduce class for generate
+ * 
+ * The #reduce() method write a random integer to all generated URLs. This
+ * random number is then used by {@link FetcherMapper}.
+ * 
  */
-public class GeneratorReducer
-extends GoraReducer<SelectorEntry, WebPage, String, WebPage> {
+public class GeneratorReducer extends
+    GoraReducer<SelectorEntry, WebPage, String, WebPage> {
 
   private long limit;
   private long maxCount;
@@ -81,7 +82,7 @@
       try {
         context.write(TableUtil.reverseUrl(key.url), page);
       } catch (MalformedURLException e) {
-    	context.getCounter("Generator", "MALFORMED_URL").increment(1);
+        context.getCounter("Generator", "MALFORMED_URL").increment(1);
         continue;
       }
       context.getCounter("Generator", "GENERATE_MARK").increment(1);
@@ -90,10 +91,11 @@
   }
 
   @Override
-  protected void setup(Context context)
-      throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
-    long totalLimit = conf.getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
+    long totalLimit = conf
+        .getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
     if (totalLimit == Long.MAX_VALUE) {
       limit = Long.MAX_VALUE;
     } else {
@@ -101,8 +103,8 @@
     }
     maxCount = conf.getLong(GeneratorJob.GENERATOR_MAX_COUNT, -2);
     batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID));
-    String countMode =
-      conf.get(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    String countMode = conf.get(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     if (countMode.equals(GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN)) {
       byDomain = true;
     }
Index: src/java/org/apache/nutch/crawl/InjectorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/InjectorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/InjectorJob.java	(working copy)
@@ -47,14 +47,17 @@
 import java.text.SimpleDateFormat;
 import java.util.*;
 
-/** This class takes a flat file of URLs and adds them to the of pages to be
- * crawled.  Useful for bootstrapping the system.
- * The URL files contain one URL per line, optionally followed by custom metadata
- * separated by tabs with the metadata key separated from the corresponding value by '='. <br>
+/**
+ * This class takes a flat file of URLs and adds them to the of pages to be
+ * crawled. Useful for bootstrapping the system. The URL files contain one URL
+ * per line, optionally followed by custom metadata separated by tabs with the
+ * metadata key separated from the corresponding value by '='. <br>
  * Note that some metadata keys are reserved : <br>
  * - <i>nutch.score</i> : allows to set a custom score for a specific URL <br>
- * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a specific URL <br>
- * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
+ * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a
+ * specific URL <br>
+ * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000
+ * \t userType=open_source
  **/
 public class InjectorJob extends NutchTool implements Tool {
 
@@ -63,7 +66,7 @@
   private static final Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
   private static final Utf8 YES_STRING = new Utf8("y");
-  
+
   static {
     FIELDS.add(WebPage.Field.MARKERS);
     FIELDS.add(WebPage.Field.STATUS);
@@ -75,7 +78,7 @@
    * metadata key reserved for setting a custom fetchInterval for a specific URL
    */
   public static String nutchFetchIntervalMDName = "nutch.fetchInterval";
-  
+
   public static class UrlMapper extends
       Mapper<LongWritable, Text, String, WebPage> {
     private URLNormalizers urlNormalizers;
@@ -86,24 +89,25 @@
     private long curTime;
 
     @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
+    protected void setup(Context context) throws IOException,
+        InterruptedException {
       urlNormalizers = new URLNormalizers(context.getConfiguration(),
-        URLNormalizers.SCOPE_INJECT);
+          URLNormalizers.SCOPE_INJECT);
       interval = context.getConfiguration().getInt("db.fetch.interval.default",
-        2592000);
+          2592000);
       filters = new URLFilters(context.getConfiguration());
       scfilters = new ScoringFilters(context.getConfiguration());
       scoreInjected = context.getConfiguration().getFloat("db.score.injected",
-        1.0f);
+          1.0f);
       curTime = context.getConfiguration().getLong("injector.current.time",
-        System.currentTimeMillis());
+          System.currentTimeMillis());
     }
 
     protected void map(LongWritable key, Text value, Context context)
         throws IOException, InterruptedException {
       String url = value.toString().trim(); // value is line of text
-      
-      if (url != null && ( url.length() == 0 || url.startsWith("#") ) ) {
+
+      if (url != null && (url.length() == 0 || url.startsWith("#"))) {
         /* Ignore line that start with # */
         return;
       }
@@ -149,41 +153,43 @@
       if (url == null) {
         context.getCounter("injector", "urls_filtered").increment(1);
         return;
-      } else {                                         // if it passes
-      String reversedUrl = TableUtil.reverseUrl(url);  // collect it
-      WebPage row = WebPage.newBuilder().build();
-      row.setFetchTime(curTime);
-      row.setFetchInterval(customInterval);
+      } else { // if it passes
+        String reversedUrl = TableUtil.reverseUrl(url); // collect it
+        WebPage row = WebPage.newBuilder().build();
+        row.setFetchTime(curTime);
+        row.setFetchInterval(customInterval);
 
-      // now add the metadata
-      Iterator<String> keysIter = metadata.keySet().iterator();
-      while (keysIter.hasNext()) {
-        String keymd = keysIter.next();
-        String valuemd = metadata.get(keymd);
-        row.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
-      }
+        // now add the metadata
+        Iterator<String> keysIter = metadata.keySet().iterator();
+        while (keysIter.hasNext()) {
+          String keymd = keysIter.next();
+          String valuemd = metadata.get(keymd);
+          row.getMetadata().put(new Utf8(keymd),
+              ByteBuffer.wrap(valuemd.getBytes()));
+        }
 
-      if (customScore != -1)
-        row.setScore(customScore);
-      else
-        row.setScore(scoreInjected);
+        if (customScore != -1)
+          row.setScore(customScore);
+        else
+          row.setScore(scoreInjected);
 
-      try {
-        scfilters.injectedScore(url, row);
-      } catch (ScoringFilterException e) {
-        if (LOG.isWarnEnabled()) {
-          LOG.warn("Cannot filter injected score for url " + url
-          + ", using default (" + e.getMessage() + ")");
+        try {
+          scfilters.injectedScore(url, row);
+        } catch (ScoringFilterException e) {
+          if (LOG.isWarnEnabled()) {
+            LOG.warn("Cannot filter injected score for url " + url
+                + ", using default (" + e.getMessage() + ")");
+          }
         }
+        context.getCounter("injector", "urls_injected").increment(1);
+        row.getMarkers()
+            .put(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
+        Mark.INJECT_MARK.putMark(row, YES_STRING);
+        context.write(reversedUrl, row);
       }
-      context.getCounter("injector", "urls_injected").increment(1);
-      row.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
-      Mark.INJECT_MARK.putMark(row, YES_STRING);
-      context.write(reversedUrl, row);
     }
-    }
   }
-  
+
   public InjectorJob() {
   }
 
@@ -191,12 +197,12 @@
     setConf(conf);
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     getConf().setLong("injector.current.time", System.currentTimeMillis());
     Path input;
     Object path = args.get(Nutch.ARG_SEEDDIR);
     if (path instanceof Path) {
-      input = (Path)path;
+      input = (Path) path;
     } else {
       input = new Path(path.toString());
     }
@@ -208,26 +214,30 @@
     currentJob.setMapOutputKeyClass(String.class);
     currentJob.setMapOutputValueClass(WebPage.class);
     currentJob.setOutputFormatClass(GoraOutputFormat.class);
-    
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(),
-      String.class, WebPage.class);
+
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     GoraOutputFormat.setOutput(currentJob, store, true);
-    
+
     // NUTCH-1471 Make explicit which datastore class we use
-    Class<? extends DataStore<Object, Persistent>> dataStoreClass = 
-      StorageUtils.getDataStoreClass(currentJob.getConfiguration());
-    LOG.info("InjectorJob: Using " + dataStoreClass + " as the Gora storage class.");
-    
+    Class<? extends DataStore<Object, Persistent>> dataStoreClass = StorageUtils
+        .getDataStoreClass(currentJob.getConfiguration());
+    LOG.info("InjectorJob: Using " + dataStoreClass
+        + " as the Gora storage class.");
+
     currentJob.setReducerClass(Reducer.class);
     currentJob.setNumReduceTasks(0);
-    
+
     currentJob.waitForCompletion(true);
     ToolUtil.recordJobStatus(null, currentJob, results);
 
     // NUTCH-1370 Make explicit #URLs injected @runtime
-    long urlsInjected = currentJob.getCounters().findCounter("injector", "urls_injected").getValue();
-    long urlsFiltered = currentJob.getCounters().findCounter("injector", "urls_filtered").getValue();
-    LOG.info("InjectorJob: total number of urls rejected by filters: " + urlsFiltered);
+    long urlsInjected = currentJob.getCounters()
+        .findCounter("injector", "urls_injected").getValue();
+    long urlsFiltered = currentJob.getCounters()
+        .findCounter("injector", "urls_filtered").getValue();
+    LOG.info("InjectorJob: total number of urls rejected by filters: "
+        + urlsFiltered);
     LOG.info("InjectorJob: total number of urls injected after normalization and filtering: "
         + urlsInjected);
 
@@ -238,10 +248,11 @@
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("InjectorJob: starting at " + sdf.format(start));
-    LOG.info("InjectorJob: Injecting urlDir: " + urlDir); 
+    LOG.info("InjectorJob: Injecting urlDir: " + urlDir);
     run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir));
     long end = System.currentTimeMillis();
-    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   @Override
@@ -252,7 +263,7 @@
     }
     for (int i = 1; i < args.length; i++) {
       if ("-crawlId".equals(args[i])) {
-        getConf().set(Nutch.CRAWL_ID_KEY, args[i+1]);
+        getConf().set(Nutch.CRAWL_ID_KEY, args[i + 1]);
         i++;
       } else {
         System.err.println("Unrecognized arg " + args[i]);
@@ -270,7 +281,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/crawl/MD5Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/MD5Signature.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/MD5Signature.java	(working copy)
@@ -26,10 +26,10 @@
 import java.util.HashSet;
 
 /**
- * Default implementation of a page signature. It calculates an MD5 hash
- * of the raw binary content of a page. In case there is no content, it
- * calculates a hash from the page's URL.
- *
+ * Default implementation of a page signature. It calculates an MD5 hash of the
+ * raw binary content of a page. In case there is no content, it calculates a
+ * hash from the page's URL.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class MD5Signature extends Signature {
@@ -52,8 +52,7 @@
         data = null;
         of = 0;
         cb = 0;
-      }
-      else {
+      } else {
         data = baseUrl.getBytes();
         of = 0;
         cb = baseUrl.length();
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -26,12 +26,12 @@
 
   static {
     CLASSES = (Class<? extends Writable>[]) new Class<?>[] {
-      org.apache.nutch.scoring.ScoreDatum.class,
-      org.apache.nutch.util.WebPageWritable.class
-    };
+        org.apache.nutch.scoring.ScoreDatum.class,
+        org.apache.nutch.util.WebPageWritable.class };
   }
 
-  public NutchWritable() { }
+  public NutchWritable() {
+  }
 
   public NutchWritable(Writable instance) {
     set(instance);
Index: src/java/org/apache/nutch/crawl/SignatureComparator.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureComparator.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/SignatureComparator.java	(working copy)
@@ -21,27 +21,38 @@
 
 public class SignatureComparator {
   public static int compare(byte[] data1, byte[] data2) {
-    if (data1 == null && data2 == null) return 0;
-    if (data1 == null) return -1;
-    if (data2 == null) return 1;
+    if (data1 == null && data2 == null)
+      return 0;
+    if (data1 == null)
+      return -1;
+    if (data2 == null)
+      return 1;
     return _compare(data1, 0, data1.length, data2, 0, data2.length);
   }
 
   public static int compare(ByteBuffer buf1, ByteBuffer buf2) {
-    if (buf1 == null && buf2 == null) return 0;
-    if (buf1 == null) return -1;
-    if (buf2 == null) return 1;
-    return _compare(buf1.array(), buf1.arrayOffset() + buf1.position(), buf1.remaining(),
-                    buf2.array(), buf2.arrayOffset() + buf2.position(), buf2.remaining());
+    if (buf1 == null && buf2 == null)
+      return 0;
+    if (buf1 == null)
+      return -1;
+    if (buf2 == null)
+      return 1;
+    return _compare(buf1.array(), buf1.arrayOffset() + buf1.position(),
+        buf1.remaining(), buf2.array(), buf2.arrayOffset() + buf2.position(),
+        buf2.remaining());
   }
-  
-  public static int _compare(byte[] data1, int s1, int l1, byte[] data2, int s2, int l2) {
-    if (l2 > l1) return -1;
-    if (l2 < l1) return 1;
+
+  public static int _compare(byte[] data1, int s1, int l1, byte[] data2,
+      int s2, int l2) {
+    if (l2 > l1)
+      return -1;
+    if (l2 < l1)
+      return 1;
     int res = 0;
     for (int i = 0; i < l1; i++) {
       res = (data1[s1 + i] - data2[s2 + i]);
-      if (res != 0) return res;
+      if (res != 0)
+        return res;
     }
     return 0;
   }
Index: src/java/org/apache/nutch/crawl/SignatureFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/SignatureFactory.java	(working copy)
@@ -28,26 +28,28 @@
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current Configuration configuration. This newly created instance is cached in the
- * Configuration instance, so that it could be later retrieved.
- *
+ * current Configuration configuration. This newly created instance is cached in
+ * the Configuration instance, so that it could be later retrieved.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class SignatureFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(SignatureFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SignatureFactory.class);
 
-  private SignatureFactory() {}                   // no public ctor
+  private SignatureFactory() {
+  } // no public ctor
 
   /** Return the default Signature implementation. */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    Signature impl = (Signature)objectCache.getObject(clazz);
+    Signature impl = (Signature) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using Signature impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (Signature)implClass.newInstance();
+        impl = (Signature) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/TextProfileSignature.java
===================================================================
--- src/java/org/apache/nutch/crawl/TextProfileSignature.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/TextProfileSignature.java	(working copy)
@@ -29,28 +29,33 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * <p>An implementation of a page signature. It calculates an MD5 hash
- * of a plain text "profile" of a page. In case there is no text, it
- * calculates a hash using the {@link MD5Signature}.</p>
- * <p>The algorithm to calculate a page "profile" takes the plain text version of
- * a page and performs the following steps:
+ * <p>
+ * An implementation of a page signature. It calculates an MD5 hash of a plain
+ * text "profile" of a page. In case there is no text, it calculates a hash
+ * using the {@link MD5Signature}.
+ * </p>
+ * <p>
+ * The algorithm to calculate a page "profile" takes the plain text version of a
+ * page and performs the following steps:
  * <ul>
  * <li>remove all characters except letters and digits, and bring all characters
  * to lower case,</li>
  * <li>split the text into tokens (all consecutive non-whitespace characters),</li>
- * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
+ * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2
+ * characters),</li>
  * <li>sort the list of tokens by decreasing frequency,</li>
- * <li>round down the counts of tokens to the nearest multiple of QUANT
- * (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
- * by default, and <code>maxFreq</code> is the maximum token frequency). If
- * <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
- * means that tokens with frequency 1 are always discarded).</li>
- * <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
- * <li>create a list of tokens and their quantized frequency, separated by spaces,
- * in the order of decreasing frequency.</li>
+ * <li>round down the counts of tokens to the nearest multiple of QUANT (
+ * <code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is
+ * 0.01f by default, and <code>maxFreq</code> is the maximum token frequency).
+ * If <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2
+ * (which means that tokens with frequency 1 are always discarded).</li>
+ * <li>tokens, which frequency after quantization falls below QUANT, are
+ * discarded.</li>
+ * <li>create a list of tokens and their quantized frequency, separated by
+ * spaces, in the order of decreasing frequency.</li>
  * </ul>
  * This list is then submitted to an MD5 hash calculation.
- *
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class TextProfileSignature extends Signature {
@@ -65,12 +70,16 @@
 
   @Override
   public byte[] calculate(WebPage page) {
-    int MIN_TOKEN_LEN = getConf().getInt("db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat("db.signature.text_profile.quant_rate", 0.01f);
+    int MIN_TOKEN_LEN = getConf().getInt(
+        "db.signature.text_profile.min_token_len", 2);
+    float QUANT_RATE = getConf().getFloat(
+        "db.signature.text_profile.quant_rate", 0.01f);
     HashMap<String, Token> tokens = new HashMap<String, Token>();
     String text = null;
-    if (page.getText() != null) text = page.getText().toString();
-    if (text == null || text.length() == 0) return fallback.calculate(page);
+    if (page.getText() != null)
+      text = page.getText().toString();
+    if (text == null || text.length() == 0)
+      return fallback.calculate(page);
     StringBuffer curToken = new StringBuffer();
     int maxFreq = 0;
     for (int i = 0; i < text.length(); i++) {
@@ -88,7 +97,8 @@
               tokens.put(s, tok);
             }
             tok.cnt++;
-            if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+            if (tok.cnt > maxFreq)
+              maxFreq = tok.cnt;
           }
           curToken.setLength(0);
         }
@@ -104,7 +114,8 @@
         tokens.put(s, tok);
       }
       tok.cnt++;
-      if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+      if (tok.cnt > maxFreq)
+        maxFreq = tok.cnt;
     }
     Iterator<Token> it = tokens.values().iterator();
     ArrayList<Token> profile = new ArrayList<Token>();
@@ -111,10 +122,12 @@
     // calculate the QUANT value
     int QUANT = Math.round(maxFreq * QUANT_RATE);
     if (QUANT < 2) {
-      if (maxFreq > 1) QUANT = 2;
-      else QUANT = 1;
+      if (maxFreq > 1)
+        QUANT = 2;
+      else
+        QUANT = 1;
     }
-    while(it.hasNext()) {
+    while (it.hasNext()) {
       Token t = it.next();
       // round down to the nearest QUANT
       t.cnt = (t.cnt / QUANT) * QUANT;
@@ -129,7 +142,8 @@
     it = profile.iterator();
     while (it.hasNext()) {
       Token t = it.next();
-      if (newText.length() > 0) newText.append("\n");
+      if (newText.length() > 0)
+        newText.append("\n");
       newText.append(t.toString());
     }
     return MD5Hash.digest(newText.toString()).getDigest();
Index: src/java/org/apache/nutch/crawl/URLPartitioner.java
===================================================================
--- src/java/org/apache/nutch/crawl/URLPartitioner.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/URLPartitioner.java	(working copy)
@@ -40,7 +40,8 @@
  * parameter 'partition.url.mode' which can be 'byHost', 'byDomain' or 'byIP'
  */
 public class URLPartitioner implements Configurable {
-  private static final Logger LOG = LoggerFactory.getLogger(URLPartitioner.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLPartitioner.class);
 
   public static final String PARTITION_MODE_KEY = "partition.url.mode";
 
@@ -47,7 +48,7 @@
   public static final String PARTITION_MODE_HOST = "byHost";
   public static final String PARTITION_MODE_DOMAIN = "byDomain";
   public static final String PARTITION_MODE_IP = "byIP";
-  
+
   public static final String PARTITION_URL_SEED = "partition.url.seed";
 
   private Configuration conf;
@@ -77,14 +78,15 @@
 
   public int getPartition(String urlString, int numReduceTasks) {
     if (numReduceTasks == 1) {
-      //this check can be removed when we use Hadoop with MAPREDUCE-1287
+      // this check can be removed when we use Hadoop with MAPREDUCE-1287
       return 0;
     }
-    
+
     int hashCode;
     URL url = null;
     try {
-      urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_PARTITION);
+      urlString = normalizers.normalize(urlString,
+          URLNormalizers.SCOPE_PARTITION);
       hashCode = urlString.hashCode();
       url = new URL(urlString);
     } catch (MalformedURLException e) {
@@ -91,7 +93,7 @@
       LOG.warn("Malformed URL: '" + urlString + "'");
       hashCode = urlString.hashCode();
     }
-    
+
     if (url != null) {
       if (mode.equals(PARTITION_MODE_HOST)) {
         hashCode = url.getHost().hashCode();
@@ -106,20 +108,20 @@
         }
       }
     }
-    
+
     // make hosts wind up in different partitions on different runs
     hashCode ^= seed;
     return (hashCode & Integer.MAX_VALUE) % numReduceTasks;
   }
-  
-  
-  public static class SelectorEntryPartitioner 
-      extends Partitioner<SelectorEntry, WebPage> implements Configurable {
+
+  public static class SelectorEntryPartitioner extends
+      Partitioner<SelectorEntry, WebPage> implements Configurable {
     private URLPartitioner partitioner = new URLPartitioner();
     private Configuration conf;
-    
+
     @Override
-    public int getPartition(SelectorEntry selectorEntry, WebPage page, int numReduces) {
+    public int getPartition(SelectorEntry selectorEntry, WebPage page,
+        int numReduces) {
       return partitioner.getPartition(selectorEntry.url, numReduces);
     }
 
@@ -130,23 +132,24 @@
 
     @Override
     public void setConf(Configuration conf) {
-      this.conf=conf;
+      this.conf = conf;
       partitioner.setConf(conf);
     }
   }
-  
-  public static class FetchEntryPartitioner
-      extends Partitioner<IntWritable, FetchEntry> implements Configurable {
+
+  public static class FetchEntryPartitioner extends
+      Partitioner<IntWritable, FetchEntry> implements Configurable {
     private URLPartitioner partitioner = new URLPartitioner();
     private Configuration conf;
-    
+
     @Override
-    public int getPartition(IntWritable intWritable, FetchEntry fetchEntry, int numReduces) {
+    public int getPartition(IntWritable intWritable, FetchEntry fetchEntry,
+        int numReduces) {
       String key = fetchEntry.getKey();
       String url = TableUtil.unreverseUrl(key);
       return partitioner.getPartition(url, numReduces);
     }
-    
+
     @Override
     public Configuration getConf() {
       return conf;
@@ -154,9 +157,9 @@
 
     @Override
     public void setConf(Configuration conf) {
-      this.conf=conf;
+      this.conf = conf;
       partitioner.setConf(conf);
     }
   }
-  
+
 }
Index: src/java/org/apache/nutch/crawl/UrlWithScore.java
===================================================================
--- src/java/org/apache/nutch/crawl/UrlWithScore.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/UrlWithScore.java	(working copy)
@@ -90,7 +90,7 @@
   public void setUrl(Text url) {
     this.url = url;
   }
-  
+
   public void setUrl(String url) {
     this.url.set(url);
   }
@@ -102,7 +102,7 @@
   public void setScore(FloatWritable score) {
     this.score = score;
   }
-  
+
   public void setScore(float score) {
     this.score.set(score);
   }
@@ -111,13 +111,12 @@
   public int compareTo(UrlWithScore other) {
     return comp.compare(this, other);
   }
-  
+
   @Override
   public String toString() {
     return "UrlWithScore [url=" + url + ", score=" + score + "]";
   }
 
-
   /**
    * A partitioner by {url}.
    */
@@ -144,7 +143,7 @@
       if (cmp != 0) {
         return cmp;
       }
-      //reverse order
+      // reverse order
       return -o1.getScore().compareTo(o2.getScore());
     }
 
@@ -159,9 +158,9 @@
         if (cmp != 0) {
           return cmp;
         }
-        //reverse order
-        return -floatComp.compare(b1, s1 + deptLen1, l1 - deptLen1, 
-                                  b2, s2 + deptLen2, l2 - deptLen2);
+        // reverse order
+        return -floatComp.compare(b1, s1 + deptLen1, l1 - deptLen1, b2, s2
+            + deptLen2, l2 - deptLen2);
       } catch (IOException e) {
         throw new IllegalArgumentException(e);
       }
Index: src/java/org/apache/nutch/crawl/WebTableReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/WebTableReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/WebTableReader.java	(working copy)
@@ -59,7 +59,8 @@
 
 public class WebTableReader extends NutchTool implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(WebTableReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(WebTableReader.class);
 
   public static class WebTableStatMapper extends
       GoraMapper<String, WebPage, Text, LongWritable> {
@@ -209,12 +210,12 @@
     if (LOG.isInfoEnabled()) {
       LOG.info("WebTable statistics start");
     }
-    
+
     run(ToolUtil.toArgMap(Nutch.ARG_SORT, sort));
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("Statistics for WebTable: ");
-      for (Entry<String,Object> e : results.entrySet()) {
+      for (Entry<String, Object> e : results.entrySet()) {
         LOG.info(e.getKey() + ":\t" + e.getValue());
       }
       LOG.info("WebTable statistics: done");
@@ -223,9 +224,10 @@
 
   /** Prints out the entry to the standard out **/
   private void read(String key, boolean dumpContent, boolean dumpHeaders,
-      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException, IOException, Exception {
-    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(getConf(),
-        String.class, WebPage.class);
+      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException,
+      IOException, Exception {
+    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(
+        getConf(), String.class, WebPage.class);
 
     Query<String, WebPage> query = datastore.newQuery();
     String reversedUrl = TableUtil.reverseUrl(key);
@@ -245,7 +247,7 @@
         String url = TableUtil.unreverseUrl(skey);
         System.out.println(getPageRepresentation(url, page, dumpContent,
             dumpHeaders, dumpLinks, dumpText));
-      }catch (Exception e) {
+      } catch (Exception e) {
         e.printStackTrace();
       }
     }
@@ -280,9 +282,10 @@
       // checks whether the Key passes the regex
       String url = TableUtil.unreverseUrl(key.toString());
       if (regex.matcher(url).matches()) {
-        context.write(new Text(url),
-            new Text(getPageRepresentation(key, value, dumpContent, dumpHeaders,
-                dumpLinks, dumpText)));
+        context.write(
+            new Text(url),
+            new Text(getPageRepresentation(key, value, dumpContent,
+                dumpHeaders, dumpLinks, dumpText)));
       }
     }
 
@@ -292,8 +295,10 @@
         throws IOException, InterruptedException {
       regex = Pattern.compile(context.getConfiguration().get(regexParamName,
           ".+"));
-      dumpContent = context.getConfiguration().getBoolean(contentParamName, false);
-      dumpHeaders = context.getConfiguration().getBoolean(headersParamName, false);
+      dumpContent = context.getConfiguration().getBoolean(contentParamName,
+          false);
+      dumpHeaders = context.getConfiguration().getBoolean(headersParamName,
+          false);
       dumpLinks = context.getConfiguration().getBoolean(linksParamName, false);
       dumpText = context.getConfiguration().getBoolean(textParamName, false);
     }
@@ -317,10 +322,10 @@
     cfg.setBoolean(WebTableRegexMapper.linksParamName, links);
     cfg.setBoolean(WebTableRegexMapper.textParamName, text);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(job
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        job.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
-    //remove the __g__dirty field since it is not stored
+    // remove the __g__dirty field since it is not stored
     String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1,
         WebPage._ALL_FIELDS.length);
     query.setFields(fields);
@@ -342,30 +347,37 @@
   }
 
   private static String getPageRepresentation(String key, WebPage page,
-      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks, boolean dumpText) {
+      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks,
+      boolean dumpText) {
     StringBuffer sb = new StringBuffer();
     sb.append("key:\t" + key).append("\n");
     sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
-    sb.append("status:\t").append(page.getStatus()).append(" (").append(
-        CrawlStatus.getName(page.getStatus().byteValue())).append(")\n");
+    sb.append("status:\t").append(page.getStatus()).append(" (")
+        .append(CrawlStatus.getName(page.getStatus().byteValue()))
+        .append(")\n");
     sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
     sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
-    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n"); 
-    sb.append("retriesSinceFetch:\t" + page.getRetriesSinceFetch()).append("\n");
+    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
+    sb.append("retriesSinceFetch:\t" + page.getRetriesSinceFetch())
+        .append("\n");
     sb.append("modifiedTime:\t" + page.getModifiedTime()).append("\n");
     sb.append("prevModifiedTime:\t" + page.getPrevModifiedTime()).append("\n");
-    sb.append("protocolStatus:\t" +
-        ProtocolStatusUtils.toString(page.getProtocolStatus())).append("\n");
+    sb.append(
+        "protocolStatus:\t"
+            + ProtocolStatusUtils.toString(page.getProtocolStatus())).append(
+        "\n");
     ByteBuffer prevSig = page.getPrevSignature();
-        if (prevSig != null) {
-      sb.append("prevSignature:\t" + StringUtil.toHexString(prevSig)).append("\n");
+    if (prevSig != null) {
+      sb.append("prevSignature:\t" + StringUtil.toHexString(prevSig)).append(
+          "\n");
     }
     ByteBuffer sig = page.getSignature();
     if (sig != null) {
       sb.append("signature:\t" + StringUtil.toHexString(sig)).append("\n");
     }
-    sb.append("parseStatus:\t" +
-        ParseStatusUtils.toString(page.getParseStatus())).append("\n");
+    sb.append(
+        "parseStatus:\t" + ParseStatusUtils.toString(page.getParseStatus()))
+        .append("\n");
     sb.append("title:\t" + page.getTitle()).append("\n");
     sb.append("score:\t" + page.getScore()).append("\n");
 
@@ -439,22 +451,29 @@
     System.exit(res);
   }
 
-  private static enum Op {READ, STAT, DUMP};
+  private static enum Op {
+    READ, STAT, DUMP
+  };
 
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
       System.err
           .println("Usage: WebTableReader (-stats | -url [url] | -dump <out_dir> [-regex regex]) \n \t \t      [-crawlId <id>] [-content] [-headers] [-links] [-text]");
-      System.err.println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t     (default: storage.crawl.id)");
-      System.err.println("    -stats [-sort] - print overall statistics to System.out");
+      System.err
+          .println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t     (default: storage.crawl.id)");
+      System.err
+          .println("    -stats [-sort] - print overall statistics to System.out");
       System.err.println("    [-sort]        - list status sorted by host");
-      System.err.println("    -url <url>     - print information on <url> to System.out");
-      System.err.println("    -dump <out_dir> [-regex regex] - dump the webtable to a text file in \n \t \t     <out_dir>");
+      System.err
+          .println("    -url <url>     - print information on <url> to System.out");
+      System.err
+          .println("    -dump <out_dir> [-regex regex] - dump the webtable to a text file in \n \t \t     <out_dir>");
       System.err.println("    -content       - dump also raw content");
       System.err.println("    -headers       - dump protocol headers");
       System.err.println("    -links         - dump links");
       System.err.println("    -text          - dump extracted text");
-      System.err.println("    [-regex]       - filter on the URL of the webtable entry");
+      System.err
+          .println("    [-regex]       - filter on the URL of the webtable entry");
       return -1;
     }
     String param = null;
@@ -470,8 +489,8 @@
         if (args[i].equals("-url")) {
           param = args[++i];
           op = Op.READ;
-          //read(param);
-          //return 0;
+          // read(param);
+          // return 0;
         } else if (args[i].equals("-stats")) {
           op = Op.STAT;
         } else if (args[i].equals("-sort")) {
@@ -516,7 +535,7 @@
 
   // for now handles only -stat
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".")
         + "stat_tmp" + System.currentTimeMillis());
 
@@ -523,23 +542,25 @@
     numJobs = 1;
     currentJob = new NutchJob(getConf(), "db_stats");
 
-    currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
-    
-    Boolean sort = (Boolean)args.get(Nutch.ARG_SORT);
-    if (sort == null) sort = Boolean.FALSE;
+    currentJob.getConfiguration().setBoolean(
+        "mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+
+    Boolean sort = (Boolean) args.get(Nutch.ARG_SORT);
+    if (sort == null)
+      sort = Boolean.FALSE;
     currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
 
-    //remove the __g__dirty field since it is not stored
+    // remove the __g__dirty field since it is not stored
     String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1,
-            WebPage._ALL_FIELDS.length);
+        WebPage._ALL_FIELDS.length);
     query.setFields(fields);
 
-    GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class,
-        WebTableStatMapper.class, null, true);
+    GoraMapper.initMapperJob(currentJob, query, store, Text.class,
+        LongWritable.class, WebTableStatMapper.class, null, true);
 
     currentJob.setCombinerClass(WebTableStatCombiner.class);
     currentJob.setReducerClass(WebTableStatReducer.class);
@@ -596,7 +617,8 @@
     }
 
     LongWritable totalCnt = stats.get("T");
-    if (totalCnt==null)totalCnt=new LongWritable(0);
+    if (totalCnt == null)
+      totalCnt = new LongWritable(0);
     stats.remove("T");
     results.put("TOTAL urls", totalCnt.get());
     for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
@@ -615,14 +637,15 @@
         if (st.length > 2)
           results.put(st[2], val.get());
         else
-          results.put(st[0] + " " + code + " ("
-              + CrawlStatus.getName((byte) code) + ")", val.get());
+          results.put(
+              st[0] + " " + code + " (" + CrawlStatus.getName((byte) code)
+                  + ")", val.get());
       } else
         results.put(k, val.get());
     }
     // removing the tmp folder
     fileSystem.delete(tmpFolder, true);
-    
+
     return results;
   }
 }
Index: src/java/org/apache/nutch/fetcher/FetchEntry.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetchEntry.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetchEntry.java	(working copy)
@@ -66,6 +66,5 @@
   public String toString() {
     return "FetchEntry [key=" + key + ", page=" + page + "]";
   }
-  
-  
+
 }
Index: src/java/org/apache/nutch/fetcher/FetcherJob.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetcherJob.java	(working copy)
@@ -53,7 +53,7 @@
 
 /**
  * Multi-threaded fetcher.
- *
+ * 
  */
 public class FetcherJob extends NutchTool implements Tool {
 
@@ -80,8 +80,8 @@
    * Mapper class for Fetcher.
    * </p>
    * <p>
-   * This class reads the random integer written by {@link GeneratorJob} as its key
-   * while outputting the actual key and value arguments through a
+   * This class reads the random integer written by {@link GeneratorJob} as its
+   * key while outputting the actual key and value arguments through a
    * {@link FetchEntry} instance.
    * </p>
    * <p>
@@ -92,8 +92,8 @@
    * from other hosts as well.
    * </p>
    */
-  public static class FetcherMapper
-  extends GoraMapper<String, WebPage, IntWritable, FetchEntry> {
+  public static class FetcherMapper extends
+      GoraMapper<String, WebPage, IntWritable, FetchEntry> {
 
     private boolean shouldContinue;
 
@@ -105,7 +105,8 @@
     protected void setup(Context context) {
       Configuration conf = context.getConfiguration();
       shouldContinue = conf.getBoolean(RESUME_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
     }
 
     @Override
@@ -120,12 +121,13 @@
       }
       if (shouldContinue && Mark.FETCH_MARK.checkMark(page) != null) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; already fetched");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+              + "; already fetched");
         }
         return;
       }
-      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(context
-          .getConfiguration(), key, page));
+      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(
+          context.getConfiguration(), key, page));
     }
   }
 
@@ -145,7 +147,8 @@
       ParserJob parserJob = new ParserJob();
       fields.addAll(parserJob.getFields(job));
     }
-    ProtocolFactory protocolFactory = new ProtocolFactory(job.getConfiguration());
+    ProtocolFactory protocolFactory = new ProtocolFactory(
+        job.getConfiguration());
     fields.addAll(protocolFactory.getFields());
 
     return fields;
@@ -152,13 +155,13 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     checkConfiguration();
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Integer threads = (Integer)args.get(Nutch.ARG_THREADS);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Integer numTasks = (Integer)args.get(Nutch.ARG_NUMTASKS);
- 
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Integer threads = (Integer) args.get(Nutch.ARG_THREADS);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Integer numTasks = (Integer) args.get(Nutch.ARG_NUMTASKS);
+
     if (threads != null && threads > 0) {
       getConf().setInt(THREADS_KEY, threads);
     }
@@ -169,7 +172,7 @@
     if (shouldResume != null) {
       getConf().setBoolean(RESUME_KEY, shouldResume);
     }
-    
+
     LOG.info("FetcherJob: threads: " + getConf().getInt(THREADS_KEY, 10));
     LOG.info("FetcherJob: parsing: " + getConf().getBoolean(PARSE_KEY, false));
     LOG.info("FetcherJob: resuming: " + getConf().getBoolean(RESUME_KEY, false));
@@ -182,13 +185,14 @@
       timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000);
       getConf().setLong("fetcher.timelimit", timelimit);
     }
-    LOG.info("FetcherJob : timelimit set for : " + getConf().getLong("fetcher.timelimit", -1));
+    LOG.info("FetcherJob : timelimit set for : "
+        + getConf().getLong("fetcher.timelimit", -1));
     numJobs = 1;
     currentJob = new NutchJob(getConf(), "fetch");
-    
+
     // for politeness, don't permit parallel execution of a single task
     currentJob.setReduceSpeculativeExecution(false);
-    
+
     Collection<WebPage.Field> fields = getFields(currentJob);
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
     StorageUtils.initMapperJob(currentJob, fields, IntWritable.class,
@@ -196,8 +200,8 @@
         batchIdFilter, false);
     StorageUtils.initReducerJob(currentJob, FetcherReducer.class);
     if (numTasks == null || numTasks < 1) {
-      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt("mapred.map.tasks",
-          currentJob.getNumReduceTasks()));
+      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt(
+          "mapred.map.tasks", currentJob.getNumReduceTasks()));
     } else {
       currentJob.setNumReduceTasks(numTasks);
     }
@@ -219,19 +223,24 @@
     return filter;
   }
 
-    /**
+  /**
    * Run fetcher.
-   * @param batchId batchId (obtained from Generator) or null to fetch all generated fetchlists
-   * @param threads number of threads per map task
+   * 
+   * @param batchId
+   *          batchId (obtained from Generator) or null to fetch all generated
+   *          fetchlists
+   * @param threads
+   *          number of threads per map task
    * @param shouldResume
-   * @param numTasks number of fetching tasks (reducers). If set to < 1 then use the default,
-   * which is mapred.map.tasks.
+   * @param numTasks
+   *          number of fetching tasks (reducers). If set to < 1 then use the
+   *          default, which is mapred.map.tasks.
    * @return 0 on success
    * @throws Exception
    */
-  public int fetch(String batchId, int threads, boolean shouldResume, int numTasks)
-      throws Exception {
-    
+  public int fetch(String batchId, int threads, boolean shouldResume,
+      int numTasks) throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("FetcherJob: starting at " + sdf.format(start));
@@ -242,15 +251,13 @@
       LOG.info("FetcherJob: batchId: " + batchId);
     }
 
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_THREADS, threads,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_NUMTASKS, numTasks));
-    
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_THREADS, threads,
+        Nutch.ARG_RESUME, shouldResume, Nutch.ARG_NUMTASKS, numTasks));
+
     long finish = System.currentTimeMillis();
-    LOG.info("FetcherJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
-    
+    LOG.info("FetcherJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+
     return 0;
   }
 
@@ -273,13 +280,13 @@
     boolean shouldResume = false;
     String batchId;
 
-    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
-      "[-threads N] \n \t \t  [-resume] [-numTasks N]\n" +
-      "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
-      "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n" +
-      "    -threads N    - number of fetching threads per task\n" +
-      "    -resume       - resume interrupted job\n" +
-      "    -numTasks N   - if N > 0 then use this many reduce tasks for fetching \n \t \t    (default: mapred.map.tasks)";
+    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] "
+        + "[-threads N] \n \t \t  [-resume] [-numTasks N]\n"
+        + "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n"
+        + "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n"
+        + "    -threads N    - number of fetching threads per task\n"
+        + "    -resume       - resume interrupted job\n"
+        + "    -numTasks N   - if N > 0 then use this many reduce tasks for fetching \n \t \t    (default: mapred.map.tasks)";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -303,17 +310,19 @@
       } else if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
       } else {
-        throw new IllegalArgumentException("arg " +args[i]+ " not recognized");
+        throw new IllegalArgumentException("arg " + args[i] + " not recognized");
       }
     }
 
-    int fetchcode = fetch(batchId, threads, shouldResume, numTasks); // run the Fetcher
+    int fetchcode = fetch(batchId, threads, shouldResume, numTasks); // run the
+                                                                     // Fetcher
 
     return fetchcode;
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/fetcher/FetcherReducer.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetcherReducer.java	(working copy)
@@ -46,8 +46,8 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-public class FetcherReducer
-extends GoraReducer<IntWritable, FetchEntry, String, WebPage> {
+public class FetcherReducer extends
+    GoraReducer<IntWritable, FetchEntry, String, WebPage> {
 
   public static final Logger LOG = FetcherJob.LOG;
 
@@ -54,12 +54,15 @@
   private final AtomicInteger activeThreads = new AtomicInteger(0);
   private final AtomicInteger spinWaiting = new AtomicInteger(0);
 
-  private final long start = System.currentTimeMillis(); // start time of fetcher run
+  private final long start = System.currentTimeMillis(); // start time of
+                                                         // fetcher run
   private final AtomicLong lastRequestStart = new AtomicLong(start);
 
-  private final AtomicLong bytes = new AtomicLong(0);        // total bytes fetched
-  private final AtomicInteger pages = new AtomicInteger(0);  // total pages fetched
-  private final AtomicInteger errors = new AtomicInteger(0); // total pages errored
+  private final AtomicLong bytes = new AtomicLong(0); // total bytes fetched
+  private final AtomicInteger pages = new AtomicInteger(0); // total pages
+                                                            // fetched
+  private final AtomicInteger errors = new AtomicInteger(0); // total pages
+                                                             // errored
 
   private QueueFeeder feeder;
 
@@ -89,9 +92,10 @@
       this.queueID = queueID;
     }
 
-    /** Create an item. Queue id will be created based on <code>queueMode</code>
-     * argument, either as a protocol + hostname pair, protocol + IP
-     * address pair or protocol+domain pair.
+    /**
+     * Create an item. Queue id will be created based on <code>queueMode</code>
+     * argument, either as a protocol + hostname pair, protocol + IP address
+     * pair or protocol+domain pair.
      */
     public static FetchItem create(String url, WebPage page, String queueMode) {
       String queueID;
@@ -113,19 +117,18 @@
           LOG.warn("Unable to resolve: " + u.getHost() + ", skipping.");
           return null;
         }
-      }
-      else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)){
+      } else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
         host = URLUtil.getDomainName(u);
         if (host == null) {
-          LOG.warn("Unknown domain for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          LOG.warn("Unknown domain for url: " + url
+              + ", using URL string as key");
+          host = u.toExternalForm();
         }
-      }
-      else {
+      } else {
         host = u.getHost();
         if (host == null) {
           LOG.warn("Unknown host for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          host = u.toExternalForm();
         }
       }
       queueID = proto + "://" + host.toLowerCase();
@@ -140,19 +143,22 @@
   }
 
   /**
-   * This class handles FetchItems which come from the same host ID (be it
-   * a proto/hostname or proto/IP pair). It also keeps track of requests in
+   * This class handles FetchItems which come from the same host ID (be it a
+   * proto/hostname or proto/IP pair). It also keeps track of requests in
    * progress and elapsed time between requests.
    */
   private static class FetchItemQueue {
-    List<FetchItem> queue = Collections.synchronizedList(new LinkedList<FetchItem>());
-    Set<FetchItem>  inProgress = Collections.synchronizedSet(new HashSet<FetchItem>());
+    List<FetchItem> queue = Collections
+        .synchronizedList(new LinkedList<FetchItem>());
+    Set<FetchItem> inProgress = Collections
+        .synchronizedSet(new HashSet<FetchItem>());
     AtomicLong nextFetchTime = new AtomicLong();
     long crawlDelay;
     long minCrawlDelay;
     int maxThreads;
 
-    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay, long minCrawlDelay) {
+    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay,
+        long minCrawlDelay) {
       this.maxThreads = maxThreads;
       this.crawlDelay = crawlDelay;
       this.minCrawlDelay = minCrawlDelay;
@@ -176,27 +182,34 @@
     }
 
     public void addFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       queue.add(it);
     }
 
     @SuppressWarnings("unused")
     public void addInProgressFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       inProgress.add(it);
     }
 
     public FetchItem getFetchItem() {
-      if (inProgress.size() >= maxThreads) return null;
+      if (inProgress.size() >= maxThreads)
+        return null;
       final long now = System.currentTimeMillis();
-      if (nextFetchTime.get() > now) return null;
+      if (nextFetchTime.get() > now)
+        return null;
       FetchItem it = null;
-      if (queue.size() == 0) return null;
+      if (queue.size() == 0)
+        return null;
       try {
         it = queue.remove(0);
         inProgress.add(it);
       } catch (final Exception e) {
-        LOG.error("Cannot remove FetchItem from queue or cannot add it to inProgress queue", e);
+        LOG.error(
+            "Cannot remove FetchItem from queue or cannot add it to inProgress queue",
+            e);
       }
       return it;
     }
@@ -220,11 +233,12 @@
 
     private void setEndTime(long endTime, boolean asap) {
       if (!asap)
-        nextFetchTime.set(endTime + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
+        nextFetchTime.set(endTime
+            + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
       else
         nextFetchTime.set(endTime);
     }
-    
+
     public synchronized int emptyQueue() {
       int presize = queue.size();
       queue.clear();
@@ -247,7 +261,7 @@
     long minCrawlDelay;
     Configuration conf;
     long timelimit = -1;
-    
+
     boolean useHostSettings = false;
     HostDb hostDb = null;
 
@@ -260,16 +274,19 @@
       this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
       queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(QUEUE_MODE_IP)
+          && !queueMode.equals(QUEUE_MODE_DOMAIN)
           && !queueMode.equals(QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
-      
-      // Optionally enable host specific queue behavior 
+      LOG.info("Using queue mode : " + queueMode);
+
+      // Optionally enable host specific queue behavior
       if (queueMode.equals(QUEUE_MODE_HOST)) {
-        useHostSettings = conf.getBoolean("fetcher.queue.use.host.settings", false);
+        useHostSettings = conf.getBoolean("fetcher.queue.use.host.settings",
+            false);
         if (useHostSettings) {
           LOG.info("Host specific queue settings enabled.");
           // Initialize the HostDb if we need it.
@@ -276,9 +293,10 @@
           hostDb = new HostDb(conf);
         }
       }
-      
+
       this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
-      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
+      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay",
+          0.0f) * 1000);
       this.timelimit = conf.getLong("fetcher.timelimit", -1);
     }
 
@@ -292,7 +310,8 @@
 
     public void addFetchItem(String url, WebPage page) {
       final FetchItem it = FetchItem.create(url, page, queueMode);
-      if (it != null) addFetchItem(it);
+      if (it != null)
+        addFetchItem(it);
     }
 
     public synchronized void addFetchItem(FetchItem it) {
@@ -321,19 +340,18 @@
         if (useHostSettings) {
           // Use host specific queue settings (if defined in the host table)
           try {
-            String hostname = id.substring(id.indexOf("://")+3);
+            String hostname = id.substring(id.indexOf("://") + 3);
             Host host = hostDb.getByHostName(hostname);
             if (host != null) {
-              fiq = new FetchItemQueue(conf,
-                                       host.getInt("q_mt", maxThreads),
-                                       host.getLong("q_cd", crawlDelay),
-                                       host.getLong("q_mcd", minCrawlDelay));
+              fiq = new FetchItemQueue(conf, host.getInt("q_mt", maxThreads),
+                  host.getLong("q_cd", crawlDelay), host.getLong("q_mcd",
+                      minCrawlDelay));
             }
-            
+
           } catch (IOException e) {
             LOG.error("Error while trying to access host settings", e);
           }
-        } 
+        }
         if (fiq == null) {
           // Use queue defaults
           fiq = new FetchItemQueue(conf, maxThreads, crawlDelay, minCrawlDelay);
@@ -344,8 +362,8 @@
     }
 
     public synchronized FetchItem getFetchItem() {
-      final Iterator<Map.Entry<String, FetchItemQueue>> it =
-        queues.entrySet().iterator();
+      final Iterator<Map.Entry<String, FetchItemQueue>> it = queues.entrySet()
+          .iterator();
       while (it.hasNext()) {
         final FetchItemQueue fiq = it.next().getValue();
         // reap empty queues
@@ -362,7 +380,7 @@
       }
       return null;
     }
-    
+
     public synchronized int checkTimelimit() {
       if (System.currentTimeMillis() >= timelimit && timelimit != -1) {
         return emptyQueues();
@@ -369,12 +387,12 @@
       }
       return 0;
     }
-    
 
     public synchronized void dump() {
       for (final String id : queues.keySet()) {
         final FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id);
         fiq.dump();
       }
@@ -383,11 +401,12 @@
     // empties the queues (used by timebomb and throughput threshold)
     public synchronized int emptyQueues() {
       int count = 0;
-      
+
       // emptying the queues
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id + " >> dropping! ");
         int deleted = fiq.emptyQueue();
         for (int i = 0; i < deleted; i++) {
@@ -398,7 +417,8 @@
       // there might also be a case where totalsize !=0 but number of queues
       // == 0
       // in which case we simply force it to 0 to avoid blocking
-      if (totalSize.get() != 0 && queues.size() == 0) totalSize.set(0);
+      if (totalSize.get() != 0 && queues.size() == 0)
+        totalSize.set(0);
 
       return count;
     }
@@ -420,8 +440,8 @@
     private final boolean ignoreExternalLinks;
 
     public FetcherThread(Context context, int num) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread" + num);        // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread" + num); // use an informative name
       this.context = context;
       Configuration conf = context.getConfiguration();
       this.urlFilters = new URLFilters(conf);
@@ -430,7 +450,8 @@
       this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000;
       // backward-compatible default setting
       this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true);
-      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
+      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links",
+          false);
     }
 
     @Override
@@ -446,13 +467,15 @@
           if (fit == null) {
             if (feeder.isAlive() || fetchQueues.getTotalSize() > 0) {
               if (LOG.isDebugEnabled()) {
-                LOG.debug(getName() + " fetchQueues.getFetchItem() was null, spin-waiting ...");
+                LOG.debug(getName()
+                    + " fetchQueues.getFetchItem() was null, spin-waiting ...");
               }
               // spin-wait.
               spinWaiting.incrementAndGet();
               try {
                 Thread.sleep(500);
-              } catch (final Exception e) {}
+              } catch (final Exception e) {
+              }
               spinWaiting.decrementAndGet();
               continue;
             } else {
@@ -467,12 +490,13 @@
             reprUrl = TableUtil.toString(fit.page.getReprUrl());
           }
           try {
-            LOG.info("fetching " + fit.url + " (queue crawl delay=" + 
-                      fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay + "ms)"); 
+            LOG.info("fetching " + fit.url + " (queue crawl delay="
+                + fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay + "ms)");
 
             // fetch the page
             final Protocol protocol = this.protocolFactory.getProtocol(fit.url);
-            final BaseRobotRules rules = protocol.getRobotRules(fit.url, fit.page);
+            final BaseRobotRules rules = protocol.getRobotRules(fit.url,
+                fit.page);
             if (!rules.isAllowed(fit.u.toString())) {
               // unblock
               fetchQueues.finishFetchItem(fit, true);
@@ -487,30 +511,38 @@
               if (rules.getCrawlDelay() > maxCrawlDelay && maxCrawlDelay >= 0) {
                 // unblock
                 fetchQueues.finishFetchItem(fit, true);
-                LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
-                output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED, CrawlStatus.STATUS_GONE);
+                LOG.debug("Crawl-Delay for " + fit.url + " too long ("
+                    + rules.getCrawlDelay() + "), skipping");
+                output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED,
+                    CrawlStatus.STATUS_GONE);
                 continue;
               } else {
-                final FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
+                final FetchItemQueue fiq = fetchQueues
+                    .getFetchItemQueue(fit.queueID);
                 fiq.crawlDelay = rules.getCrawlDelay();
                 if (LOG.isDebugEnabled()) {
-                  LOG.info("Crawl delay for queue: " + fit.queueID + " is set to " + fiq.crawlDelay + " as per robots.txt. url: " + fit.url);
+                  LOG.info("Crawl delay for queue: " + fit.queueID
+                      + " is set to " + fiq.crawlDelay
+                      + " as per robots.txt. url: " + fit.url);
                 }
               }
             }
-            final ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.page);
+            final ProtocolOutput output = protocol.getProtocolOutput(fit.url,
+                fit.page);
             final ProtocolStatus status = output.getStatus();
             final Content content = output.getContent();
             // unblock queue
             fetchQueues.finishFetchItem(fit);
 
-            context.getCounter("FetcherStatus", ProtocolStatusUtils.getName(status.getCode())).increment(1);
+            context.getCounter("FetcherStatus",
+                ProtocolStatusUtils.getName(status.getCode())).increment(1);
 
             int length = 0;
-            if (content!=null && content.getContent()!=null) length= content.getContent().length;
+            if (content != null && content.getContent() != null)
+              length = content.getContent().length;
             updateStatus(length);
 
-            switch(status.getCode()) {
+            switch (status.getCode()) {
 
             case ProtocolStatusCodes.WOULDBLOCK:
               // retry ?
@@ -517,11 +549,11 @@
               fetchQueues.addFetchItem(fit);
               break;
 
-            case ProtocolStatusCodes.SUCCESS:        // got a page
+            case ProtocolStatusCodes.SUCCESS: // got a page
               output(fit, content, status, CrawlStatus.STATUS_FETCHED);
               break;
 
-            case ProtocolStatusCodes.MOVED:         // redirect
+            case ProtocolStatusCodes.MOVED: // redirect
             case ProtocolStatusCodes.TEMP_MOVED:
               byte code;
               boolean temp;
@@ -533,18 +565,19 @@
                 temp = true;
               }
               final String newUrl = ProtocolStatusUtils.getMessage(status);
-              handleRedirect(fit.url, newUrl, temp,  FetcherJob.PROTOCOL_REDIR, fit.page);
+              handleRedirect(fit.url, newUrl, temp, FetcherJob.PROTOCOL_REDIR,
+                  fit.page);
               output(fit, content, status, code);
               break;
             case ProtocolStatusCodes.EXCEPTION:
               logFetchFailure(fit.url, ProtocolStatusUtils.getMessage(status));
               /* FALLTHROUGH */
-            case ProtocolStatusCodes.RETRY:          // retry
+            case ProtocolStatusCodes.RETRY: // retry
             case ProtocolStatusCodes.BLOCKED:
               output(fit, null, status, CrawlStatus.STATUS_RETRY);
               break;
 
-            case ProtocolStatusCodes.GONE:           // gone
+            case ProtocolStatusCodes.GONE: // gone
             case ProtocolStatusCodes.NOTFOUND:
             case ProtocolStatusCodes.ACCESS_DENIED:
             case ProtocolStatusCodes.ROBOTS_DENIED:
@@ -562,7 +595,7 @@
               output(fit, null, status, CrawlStatus.STATUS_RETRY);
             }
 
-          } catch (final Throwable t) {                 // unexpected exception
+          } catch (final Throwable t) { // unexpected exception
             // unblock
             fetchQueues.finishFetchItem(fit);
             LOG.error("Unexpected error for " + fit.url, t);
@@ -574,15 +607,17 @@
       } catch (final Throwable e) {
         LOG.error("fetcher throwable caught", e);
       } finally {
-        if (fit != null) fetchQueues.finishFetchItem(fit);
+        if (fit != null)
+          fetchQueues.finishFetchItem(fit);
         activeThreads.decrementAndGet(); // count threads
-        LOG.info("-finishing thread " + getName() + ", activeThreads=" + activeThreads);
+        LOG.info("-finishing thread " + getName() + ", activeThreads="
+            + activeThreads);
       }
     }
 
-    private void handleRedirect(String url, String newUrl,
-        boolean temp, String redirType, WebPage page)
-    throws URLFilterException, IOException, InterruptedException {
+    private void handleRedirect(String url, String newUrl, boolean temp,
+        String redirType, WebPage page) throws URLFilterException, IOException,
+        InterruptedException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
       if (newUrl == null || newUrl.equals(url)) {
@@ -590,7 +625,7 @@
       }
 
       if (ignoreExternalLinks) {
-        String toHost   = new URL(newUrl).getHost().toLowerCase();
+        String toHost = new URL(newUrl).getHost().toLowerCase();
         String fromHost = new URL(url).getHost().toLowerCase();
         if (toHost == null || !toHost.equals(fromHost)) {
           // external links
@@ -606,12 +641,11 @@
       } else {
         page.setReprUrl(new Utf8(reprUrl));
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect to " +
-              reprUrl + " (fetching later)");
+          LOG.debug(" - " + redirType + " redirect to " + reprUrl
+              + " (fetching later)");
         }
       }
     }
-    
 
     private void updateStatus(int bytesInPage) throws IOException {
       pages.incrementAndGet();
@@ -618,10 +652,9 @@
       bytes.addAndGet(bytesInPage);
     }
 
-    private void output(FetchItem fit, Content content,
-        ProtocolStatus pstatus, byte status)
-    throws IOException, InterruptedException {
-      fit.page.setStatus((int)status);
+    private void output(FetchItem fit, Content content, ProtocolStatus pstatus,
+        byte status) throws IOException, InterruptedException {
+      fit.page.setStatus((int) status);
       final long prevFetchTime = fit.page.getFetchTime();
       fit.page.setPrevFetchTime(prevFetchTime);
       fit.page.setFetchTime(System.currentTimeMillis());
@@ -638,13 +671,15 @@
       String key = TableUtil.reverseUrl(fit.url);
 
       if (parse) {
-        if (!skipTruncated || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
+        if (!skipTruncated
+            || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
           parseUtil.process(key, fit.page);
         }
       }
-      //remove content if storingContent is false. Content is added to fit.page above 
-      //for ParseUtil be able to parse it. 
-      if(content != null && !storingContent){
+      // remove content if storingContent is false. Content is added to fit.page
+      // above
+      // for ParseUtil be able to parse it.
+      if (content != null && !storingContent) {
         fit.page.setContent(ByteBuffer.wrap(new byte[0]));
       }
       context.write(key, fit.page);
@@ -656,10 +691,9 @@
     }
   }
 
-
   /**
-   * This class feeds the queues with input items, and re-fills them as
-   * items are consumed by FetcherThread-s.
+   * This class feeds the queues with input items, and re-fills them as items
+   * are consumed by FetcherThread-s.
    */
   private static class QueueFeeder extends Thread {
     private final Context context;
@@ -669,9 +703,8 @@
     boolean hasMore;
     private long timelimit = -1;
 
-    public QueueFeeder(Context context,
-        FetchItemQueues queues, int size)
-    throws IOException, InterruptedException {
+    public QueueFeeder(Context context, FetchItemQueues queues, int size)
+        throws IOException, InterruptedException {
       this.context = context;
       this.queues = queues;
       this.size = size;
@@ -681,8 +714,9 @@
       if (hasMore) {
         currentIter = context.getValues().iterator();
       }
-      // the value of the time limit is either -1 or the time where it should finish
-      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1); 
+      // the value of the time limit is either -1 or the time where it should
+      // finish
+      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1);
     }
 
     @Override
@@ -709,7 +743,9 @@
             // queues are full - spin-wait until they have some free space
             try {
               Thread.sleep(1000);
-            } catch (final Exception e) {};
+            } catch (final Exception e) {
+            }
+            ;
             continue;
           }
           if (LOG.isDebugEnabled()) {
@@ -717,8 +753,7 @@
           }
           while (feed > 0 && currentIter.hasNext()) {
             FetchEntry entry = currentIter.next();
-            final String url =
-              TableUtil.unreverseUrl(entry.getKey());
+            final String url = TableUtil.unreverseUrl(entry.getKey());
             queues.addFetchItem(url, entry.getWebPage());
             feed--;
             cnt++;
@@ -735,22 +770,27 @@
         LOG.error("QueueFeeder error reading input, record " + cnt, e);
         return;
       }
-      LOG.info("QueueFeeder finished: total " + cnt + " records. Hit by time limit :"
-          + timelimitcount);
-      context.getCounter("FetcherStatus","HitByTimeLimit-QueueFeeder").increment(timelimitcount);
+      LOG.info("QueueFeeder finished: total " + cnt
+          + " records. Hit by time limit :" + timelimitcount);
+      context.getCounter("FetcherStatus", "HitByTimeLimit-QueueFeeder")
+          .increment(timelimitcount);
     }
   }
 
-  private void reportAndLogStatus(Context context, float actualPages, 
+  private void reportAndLogStatus(Context context, float actualPages,
       int actualBytes, int totalSize) throws IOException {
     StringBuilder status = new StringBuilder();
-    long elapsed = (System.currentTimeMillis() - start)/1000;
-    status.append(spinWaiting).append("/").append(activeThreads).append(" spinwaiting/active, ");
+    long elapsed = (System.currentTimeMillis() - start) / 1000;
+    status.append(spinWaiting).append("/").append(activeThreads)
+        .append(" spinwaiting/active, ");
     status.append(pages).append(" pages, ").append(errors).append(" errors, ");
-    status.append(Math.round((((float)pages.get())*10)/elapsed)/10.0).append(" ");
-    status.append(Math.round((actualPages*10)/10.0)).append(" pages/s, ");
-    status.append(Math.round((((float)bytes.get())*8)/1024)/elapsed).append(" ");
-    status.append(Math.round(((float)actualBytes)*8)/1024).append(" kb/s, ");
+    status.append(Math.round((((float) pages.get()) * 10) / elapsed) / 10.0)
+        .append(" ");
+    status.append(Math.round((actualPages * 10) / 10.0)).append(" pages/s, ");
+    status.append(Math.round((((float) bytes.get()) * 8) / 1024) / elapsed)
+        .append(" ");
+    status.append(Math.round(((float) actualBytes) * 8) / 1024).append(
+        " kb/s, ");
     status.append(totalSize).append(" URLs in ");
     status.append(this.fetchQueues.getQueueCount()).append(" queues");
     String toString = status.toString();
@@ -759,30 +799,30 @@
   }
 
   @Override
-  public void run(Context context)
-  throws IOException, InterruptedException {
+  public void run(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     this.fetchQueues = new FetchItemQueues(conf);
     int threadCount = conf.getInt("fetcher.threads.fetch", 10);
     parse = conf.getBoolean(FetcherJob.PARSE_KEY, false);
-    storingContent=conf.getBoolean("fetcher.store.content", true);
+    storingContent = conf.getBoolean("fetcher.store.content", true);
     if (parse) {
-      skipTruncated=conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
+      skipTruncated = conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
       parseUtil = new ParseUtil(conf);
     }
     LOG.info("Fetcher: threads: " + threadCount);
 
     int maxFeedPerThread = conf.getInt("fetcher.queue.depth.multiplier", 50);
-    feeder = new QueueFeeder(context, fetchQueues, threadCount * maxFeedPerThread);
+    feeder = new QueueFeeder(context, fetchQueues, threadCount
+        * maxFeedPerThread);
     feeder.start();
 
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       FetcherThread ft = new FetcherThread(context, i);
       fetcherThreads.add(ft);
       ft.start();
     }
     // select a timeout that avoids a task timeout
-    final long timeout = conf.getInt("mapred.task.timeout", 10*60*1000)/2;
+    final long timeout = conf.getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
 
     // Used for threshold check, holds pages and bytes processed in the last sec
     float pagesLastSec;
@@ -790,48 +830,59 @@
 
     int throughputThresholdCurrentSequence = 0;
 
-    int throughputThresholdPages = conf.getInt("fetcher.throughput.threshold.pages", -1);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages); }
-    int throughputThresholdSequence = conf.getInt("fetcher.throughput.threshold.sequence", 5);
-    if (LOG.isInfoEnabled()) { 
-      LOG.info("Fetcher: throughput threshold sequence: " + throughputThresholdSequence); 
+    int throughputThresholdPages = conf.getInt(
+        "fetcher.throughput.threshold.pages", -1);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages);
     }
-    long throughputThresholdTimeLimit = conf.getLong("fetcher.throughput.threshold.check.after", -1);
-    
-    do {                                          // wait for threads to exit
+    int throughputThresholdSequence = conf.getInt(
+        "fetcher.throughput.threshold.sequence", 5);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold sequence: "
+          + throughputThresholdSequence);
+    }
+    long throughputThresholdTimeLimit = conf.getLong(
+        "fetcher.throughput.threshold.check.after", -1);
+
+    do { // wait for threads to exit
       pagesLastSec = pages.get();
-      bytesLastSec = (int)bytes.get();
+      bytesLastSec = (int) bytes.get();
       final int secondsToSleep = 5;
       try {
         Thread.sleep(secondsToSleep * 1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
-      pagesLastSec = (pages.get() - pagesLastSec)/secondsToSleep;
-      bytesLastSec = ((int)bytes.get() - bytesLastSec)/secondsToSleep;
+      pagesLastSec = (pages.get() - pagesLastSec) / secondsToSleep;
+      bytesLastSec = ((int) bytes.get() - bytesLastSec) / secondsToSleep;
 
       int fetchQueuesTotalSize = fetchQueues.getTotalSize();
-      reportAndLogStatus(context, pagesLastSec, bytesLastSec, fetchQueuesTotalSize);
-      
+      reportAndLogStatus(context, pagesLastSec, bytesLastSec,
+          fetchQueuesTotalSize);
+
       boolean feederAlive = feeder.isAlive();
       if (!feederAlive && fetchQueuesTotalSize < 5) {
         fetchQueues.dump();
       }
-      
+
       // check timelimit
       if (!feederAlive) {
         int hitByTimeLimit = fetchQueues.checkTimelimit();
         if (hitByTimeLimit != 0) {
-          context.getCounter("FetcherStatus","HitByTimeLimit-Queues").increment(hitByTimeLimit);
+          context.getCounter("FetcherStatus", "HitByTimeLimit-Queues")
+              .increment(hitByTimeLimit);
         }
       }
-      
+
       // if throughput threshold is enabled
-      if (throughputThresholdTimeLimit < System.currentTimeMillis() && throughputThresholdPages != -1) {
+      if (throughputThresholdTimeLimit < System.currentTimeMillis()
+          && throughputThresholdPages != -1) {
         // Check if we're dropping below the threshold
         if (pagesLastSec < throughputThresholdPages) {
           throughputThresholdCurrentSequence++;
-          LOG.warn(Integer.toString(throughputThresholdCurrentSequence) 
-              + ": dropping below configured threshold of " + Integer.toString(throughputThresholdPages) 
+          LOG.warn(Integer.toString(throughputThresholdCurrentSequence)
+              + ": dropping below configured threshold of "
+              + Integer.toString(throughputThresholdPages)
               + " pages per second");
 
           // Quit if we dropped below threshold too many times
@@ -841,17 +892,19 @@
             // Disable the threshold checker
             throughputThresholdPages = -1;
 
-            // Empty the queues cleanly and get number of items that were dropped
+            // Empty the queues cleanly and get number of items that were
+            // dropped
             int hitByThrougputThreshold = fetchQueues.emptyQueues();
 
-            if (hitByThrougputThreshold != 0) context.getCounter("FetcherStatus", 
-                "hitByThrougputThreshold").increment(hitByThrougputThreshold);
+            if (hitByThrougputThreshold != 0)
+              context.getCounter("FetcherStatus", "hitByThrougputThreshold")
+                  .increment(hitByThrougputThreshold);
           }
         } else {
           throughputThresholdCurrentSequence = 0;
         }
       }
-      
+
       // some requests seem to hang, despite all intentions
       if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) {
         if (LOG.isWarnEnabled() && activeThreads.get() > 0) {
@@ -859,7 +912,8 @@
           for (int i = 0; i < fetcherThreads.size(); i++) {
             FetcherThread thread = fetcherThreads.get(i);
             if (thread.isAlive()) {
-              LOG.warn("Thread #" + i + " hung while processing " + thread.reprUrl);
+              LOG.warn("Thread #" + i + " hung while processing "
+                  + thread.reprUrl);
               if (LOG.isDebugEnabled()) {
                 StackTraceElement[] stack = thread.getStackTrace();
                 StringBuilder sb = new StringBuilder();
@@ -879,4 +933,3 @@
     LOG.info("-activeThreads=" + activeThreads);
   }
 }
-
Index: src/java/org/apache/nutch/host/HostDb.java
===================================================================
--- src/java/org/apache/nutch/host/HostDb.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDb.java	(working copy)
@@ -37,22 +37,23 @@
 import com.google.common.cache.RemovalNotification;
 
 /**
- * A caching wrapper for the host datastore. 
+ * A caching wrapper for the host datastore.
  */
 public class HostDb implements Closeable {
   public static final Log LOG = LogFactory.getLog(HostDb.class);
-  
+
   private static final class CacheHost {
     private final Host host;
     private final long timestamp;
+
     public CacheHost(Host host, long timestamp) {
       this.host = host;
       this.timestamp = timestamp;
-    }   
+    }
   }
-  private final static CacheHost NULL_HOST = new CacheHost(null,0);
-  
 
+  private final static CacheHost NULL_HOST = new CacheHost(null, 0);
+
   private DataStore<String, Host> hostStore;
 
   public static final String HOSTDB_LRU_SIZE = "hostdb.lru.size";
@@ -61,7 +62,7 @@
   public static final int DEFAULT_HOSTDB_CONCURRENCY_LEVEL = 8;
 
   private Cache<String, CacheHost> cache;
-  
+
   private AtomicLong lastFlush;
 
   public HostDb(Configuration conf) throws GoraException {
@@ -73,47 +74,43 @@
 
     // Create a cache.
     // We add a removal listener to see if we need to flush the store,
-    // in order to adhere to the put-flush-get semantic 
+    // in order to adhere to the put-flush-get semantic
     // ("read your own write") of DataStore.
-    
+
     long lruSize = conf.getLong(HOSTDB_LRU_SIZE, DEFAULT_LRU_SIZE);
-    int concurrencyLevel = conf.getInt(HOSTDB_CONCURRENCY_LEVEL, 
+    int concurrencyLevel = conf.getInt(HOSTDB_CONCURRENCY_LEVEL,
         DEFAULT_HOSTDB_CONCURRENCY_LEVEL);
-    RemovalListener<String, CacheHost> listener = 
-        new RemovalListener<String, CacheHost>() {
-          @Override
-          public void onRemoval(
-              RemovalNotification<String, CacheHost> notification) {
-            CacheHost removeFromCacheHost = notification.getValue();
-            if (removeFromCacheHost != NULL_HOST) {
-              if (removeFromCacheHost.timestamp < lastFlush.get()) {
-                try {
-                  hostStore.flush();
-                } catch (Exception e) {
-                  throw new RuntimeException(e);
-                }
-                lastFlush.set(System.currentTimeMillis());
-              }
+    RemovalListener<String, CacheHost> listener = new RemovalListener<String, CacheHost>() {
+      @Override
+      public void onRemoval(RemovalNotification<String, CacheHost> notification) {
+        CacheHost removeFromCacheHost = notification.getValue();
+        if (removeFromCacheHost != NULL_HOST) {
+          if (removeFromCacheHost.timestamp < lastFlush.get()) {
+            try {
+              hostStore.flush();
+            } catch (Exception e) {
+              throw new RuntimeException(e);
             }
+            lastFlush.set(System.currentTimeMillis());
           }
+        }
+      }
     };
-    
-    cache=CacheBuilder.newBuilder().maximumSize(lruSize)
-        .removalListener(listener).concurrencyLevel(concurrencyLevel)
-        .build();
+
+    cache = CacheBuilder.newBuilder().maximumSize(lruSize)
+        .removalListener(listener).concurrencyLevel(concurrencyLevel).build();
     lastFlush = new AtomicLong(System.currentTimeMillis());
   }
 
-  
-  
   public Host get(final String key) throws IOException {
     Callable<CacheHost> valueLoader = new Callable<CacheHost>() {
       @Override
       public CacheHost call() throws Exception {
         Host host = hostStore.get(key);
-        if (host == null) return NULL_HOST;
+        if (host == null)
+          return NULL_HOST;
         return new CacheHost(host, System.currentTimeMillis());
-      }  
+      }
     };
     CacheHost cachedHost;
     try {
@@ -127,14 +124,11 @@
       return null;
     }
   }
- 
 
-
   public Host getByHostName(String hostName) throws IOException {
-   return get(TableUtil.reverseHost(hostName));
+    return get(TableUtil.reverseHost(hostName));
   }
-  
-  
+
   public void put(String key, Host host) throws IOException {
     cache.put(key, new CacheHost(host, System.currentTimeMillis()));
     hostStore.put(key, host);
Index: src/java/org/apache/nutch/host/HostDbReader.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDbReader.java	(working copy)
@@ -39,7 +39,8 @@
 public class HostDbReader extends Configured implements Tool {
   public static final Log LOG = LogFactory.getLog(HostDbReader.class);
 
-  private void read(String key) throws ClassNotFoundException, IOException, Exception {
+  private void read(String key) throws ClassNotFoundException, IOException,
+      Exception {
 
     DataStore<String, Host> datastore = StorageUtils.createWebStore(getConf(),
         String.class, Host.class);
Index: src/java/org/apache/nutch/host/HostDbUpdateJob.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbUpdateJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDbUpdateJob.java	(working copy)
@@ -116,15 +116,14 @@
 
   @Override
   public int run(String[] args) throws Exception {
-    boolean linkDb=false;
+    boolean linkDb = false;
     for (int i = 0; i < args.length; i++) {
       if ("-linkDb".equals(args[i])) {
         linkDb = true;
       } else if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
-      }
-      else {
-        throw new IllegalArgumentException("unrecognized arg " + args[i] 
+      } else {
+        throw new IllegalArgumentException("unrecognized arg " + args[i]
             + " usage: (-linkDb) (-crawlId <crawlId>)");
       }
     }
Index: src/java/org/apache/nutch/host/HostDbUpdateReducer.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbUpdateReducer.java	(revision 1650446)
+++ src/java/org/apache/nutch/host/HostDbUpdateReducer.java	(working copy)
@@ -30,36 +30,37 @@
 import java.util.Set;
 
 /**
- * Combines all WebPages with the same host key to create a Host object, 
- * with some statistics.
+ * Combines all WebPages with the same host key to create a Host object, with
+ * some statistics.
  */
-public class HostDbUpdateReducer extends GoraReducer<Text, WebPage, String, Host> {
-  
+public class HostDbUpdateReducer extends
+    GoraReducer<Text, WebPage, String, Host> {
+
   @Override
   protected void reduce(Text key, Iterable<WebPage> values, Context context)
-    throws IOException, InterruptedException {
-    
+      throws IOException, InterruptedException {
+
     int numPages = 0;
     int numFetched = 0;
     boolean buildLinkDb = true;
-    
+
     Histogram<String> inlinkCount = new Histogram<String>();
     Histogram<String> outlinkCount = new Histogram<String>();
-    
-    for (WebPage page: values) {
+
+    for (WebPage page : values) {
       // count number of pages
-      numPages++;     
+      numPages++;
       // count number of fetched pages
       if (page.getStatus() == CrawlStatus.STATUS_FETCHED) {
         numFetched++;
       }
-      
+
       // build host link db
       // TODO: limit number of links
       if (buildLinkDb) {
         if (page.getInlinks() != null) {
           Set<CharSequence> inlinks = page.getInlinks().keySet();
-          for (CharSequence inlink: inlinks) {
+          for (CharSequence inlink : inlinks) {
             String host = URLUtil.getHost(inlink.toString());
             inlinkCount.add(host);
           }
@@ -66,7 +67,7 @@
         }
         if (page.getOutlinks() != null) {
           Set<CharSequence> outlinks = page.getOutlinks().keySet();
-          for (CharSequence outlink: outlinks) {
+          for (CharSequence outlink : outlinks) {
             String host = URLUtil.getHost(outlink.toString());
             outlinkCount.add(host);
           }
@@ -73,20 +74,24 @@
         }
       }
     }
-    
+
     // output host data
     Host host = new Host();
-    host.getMetadata().put(new Utf8("p"),ByteBuffer.wrap(Integer.toString(numPages).getBytes()));
+    host.getMetadata().put(new Utf8("p"),
+        ByteBuffer.wrap(Integer.toString(numPages).getBytes()));
     if (numFetched > 0) {
-      host.getMetadata().put(new Utf8("f"),ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
+      host.getMetadata().put(new Utf8("f"),
+          ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
     }
-    for (String inlink: inlinkCount.getKeys()) {
-      host.getInlinks().put(new Utf8(inlink), new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
+    for (String inlink : inlinkCount.getKeys()) {
+      host.getInlinks().put(new Utf8(inlink),
+          new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
     }
-    for (String outlink: outlinkCount.getKeys()) {
-      host.getOutlinks().put(new Utf8(outlink), new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
+    for (String outlink : outlinkCount.getKeys()) {
+      host.getOutlinks().put(new Utf8(outlink),
+          new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
     }
-    
+
     context.write(key.toString(), host);
   }
 }
Index: src/java/org/apache/nutch/host/HostInjectorJob.java
===================================================================
--- src/java/org/apache/nutch/host/HostInjectorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostInjectorJob.java	(working copy)
@@ -123,13 +123,14 @@
       while (keysIter.hasNext()) {
         String keymd = keysIter.next();
         String valuemd = metadata.get(keymd);
-        host.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
+        host.getMetadata().put(new Utf8(keymd),
+            ByteBuffer.wrap(valuemd.getBytes()));
       }
       String hostname;
-      if (url.indexOf("://")> -1) {
-        hostname=new URL(url).getHost();
+      if (url.indexOf("://") > -1) {
+        hostname = new URL(url).getHost();
       } else {
-        hostname=new URL("http://"+url).getHost();
+        hostname = new URL("http://" + url).getHost();
       }
       String hostkey = TableUtil.reverseHost(hostname);
       context.write(hostkey, host);
@@ -145,8 +146,8 @@
     job.setMapOutputKeyClass(String.class);
     job.setMapOutputValueClass(Host.class);
     job.setOutputFormatClass(GoraOutputFormat.class);
-    GoraOutputFormat.setOutput(job,
-        StorageUtils.createWebStore(job.getConfiguration(), String.class, Host.class), true);
+    GoraOutputFormat.setOutput(job, StorageUtils.createWebStore(
+        job.getConfiguration(), String.class, Host.class), true);
     job.setReducerClass(Reducer.class);
     job.setNumReduceTasks(0);
     return job.waitForCompletion(true);
Index: src/java/org/apache/nutch/host/package-info.java
===================================================================
--- src/java/org/apache/nutch/host/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Host database to store metadata per host.
  */
 package org.apache.nutch.host;
+
Index: src/java/org/apache/nutch/indexer/CleaningJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/CleaningJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/CleaningJob.java	(working copy)
@@ -44,26 +44,25 @@
 public class CleaningJob extends NutchTool implements Tool {
 
   public static final String ARG_COMMIT = "commit";
-  public static final Logger LOG = LoggerFactory
-      .getLogger(CleaningJob.class);
+  public static final Logger LOG = LoggerFactory.getLogger(CleaningJob.class);
   private Configuration conf;
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
-  
+
   static {
     FIELDS.add(WebPage.Field.STATUS);
   }
-  
+
   @Override
   public Configuration getConf() {
     return conf;
   }
-  
+
   @Override
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   public Collection<WebPage.Field> getFields(Job job) {
     Configuration conf = job.getConfiguration();
     Collection<WebPage.Field> columns = new HashSet<WebPage.Field>(FIELDS);
@@ -96,7 +95,7 @@
       }
     }
   }
-  
+
   public static class CleanReducer extends
       Reducer<String, WebPage, NullWritable, NullWritable> {
     private int numDeletes = 0;
@@ -128,12 +127,11 @@
       writers.close();
       if (numDeletes > 0 && commit) {
         writers.commit();
-      }   
+      }
       LOG.info("CleaningJob: deleted a total of " + numDeletes + " documents");
     }
   }
 
-
   @Override
   public Map<String, Object> run(Map<String, Object> args) throws Exception {
     getConf().setBoolean(ARG_COMMIT, (Boolean) args.get(ARG_COMMIT));
Index: src/java/org/apache/nutch/indexer/IndexCleaningFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexCleaningFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexCleaningFilter.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexCleaningFilter extends FieldPluggable, Configurable {
@@ -31,12 +31,12 @@
   /** The name of the extension point. */
   final static String X_POINT_ID = IndexCleaningFilter.class.getName();
 
-  /**   
-   * @param url page url
+  /**
+   * @param url
+   *          page url
    * @param page
    * @return true == remove false == keep
    * @throws IndexingException
    */
-  boolean remove(String url, WebPage page)
-  throws IndexingException;
+  boolean remove(String url, WebPage page) throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexCleaningFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexCleaningFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexCleaningFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.ObjectCache;
 
-/** Creates and caches {@link IndexCleaningFilter} implementing plugins.*/
+/** Creates and caches {@link IndexCleaningFilter} implementing plugins. */
 public class IndexCleaningFilters {
 
   public static final String IndexCleaningFilter_ORDER = "IndexCleaningFilterhbase.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexCleaningFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexCleaningFilters.class);
 
   private IndexCleaningFilter[] indexcleaningFilters;
 
@@ -60,10 +61,10 @@
         ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
             IndexCleaningFilter.X_POINT_ID);
         if (point == null)
-          throw new RuntimeException(IndexCleaningFilter.X_POINT_ID + " not found.");
+          throw new RuntimeException(IndexCleaningFilter.X_POINT_ID
+              + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexCleaningFilter> filterMap =
-          new HashMap<String, IndexCleaningFilter>();
+        HashMap<String, IndexCleaningFilter> filterMap = new HashMap<String, IndexCleaningFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexCleaningFilter filter = (IndexCleaningFilter) extension
@@ -78,20 +79,19 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexCleaningFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexCleaningFilter[0]));
+          objectCache.setObject(IndexCleaningFilter.class.getName(), filterMap
+              .values().toArray(new IndexCleaningFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexCleaningFilter> filters = new ArrayList<IndexCleaningFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-        	  IndexCleaningFilter filter = filterMap.get(orderedFilters[i]);
+            IndexCleaningFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexCleaningFilter.class.getName(), filters
-              .toArray(new IndexCleaningFilter[filters.size()]));
+          objectCache.setObject(IndexCleaningFilter.class.getName(),
+              filters.toArray(new IndexCleaningFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,13 +100,13 @@
           .getObject(IndexCleaningFilter.class.getName());
     }
   }
+
   /** Run all defined filters. */
-  public boolean remove(String url, WebPage page)
-  throws IndexingException {
+  public boolean remove(String url, WebPage page) throws IndexingException {
     for (IndexCleaningFilter indexcleaningFilter : indexcleaningFilters) {
-    	if(indexcleaningFilter.remove(url,page)){
-    		return true;
-    	}
+      if (indexcleaningFilter.remove(url, page)) {
+        return true;
+      }
     }
     return false;
   }
Index: src/java/org/apache/nutch/indexer/IndexUtil.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexUtil.java	(working copy)
@@ -26,37 +26,41 @@
 import org.apache.nutch.util.TableUtil;
 
 /**
- * Utility to create an indexed document from a webpage.  
- *
+ * Utility to create an indexed document from a webpage.
+ * 
  */
 public class IndexUtil {
   private static final Log LOG = LogFactory.getLog(new Object() {
   }.getClass().getEnclosingClass());
-  
-  
+
   private IndexingFilters filters;
   private ScoringFilters scoringFilters;
-  
+
   public IndexUtil(Configuration conf) {
     filters = new IndexingFilters(conf);
     scoringFilters = new ScoringFilters(conf);
   }
-  
+
   /**
    * Index a {@link Webpage}, here we add the following fields:
    * <ol>
    * <li><tt>id</tt>: default uniqueKey for the {@link NutchDocument}.</li>
-   * <li><tt>digest</tt>: Digest is used to identify pages (like unique ID) and is used to remove
-   * duplicates during the dedup procedure. It is calculated using {@link org.apache.nutch.crawl.MD5Signature} or
+   * <li><tt>digest</tt>: Digest is used to identify pages (like unique ID) and
+   * is used to remove duplicates during the dedup procedure. It is calculated
+   * using {@link org.apache.nutch.crawl.MD5Signature} or
    * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
-   * <li><tt>batchId</tt>: The page belongs to a unique batchId, this is its identifier.</li>
-   * <li><tt>boost</tt>: Boost is used to calculate document (field) score which can be used within
-   * queries submitted to the underlying indexing library to find the best results. It's part of the scoring algorithms. 
-   * See scoring.link, scoring.opic, scoring.tld, etc.</li>
+   * <li><tt>batchId</tt>: The page belongs to a unique batchId, this is its
+   * identifier.</li>
+   * <li><tt>boost</tt>: Boost is used to calculate document (field) score which
+   * can be used within queries submitted to the underlying indexing library to
+   * find the best results. It's part of the scoring algorithms. See
+   * scoring.link, scoring.opic, scoring.tld, etc.</li>
    * </ol>
    * 
-   * @param key The key of the page (reversed url).
-   * @param page The {@link Webpage}.
+   * @param key
+   *          The key of the page (reversed url).
+   * @param page
+   *          The {@link Webpage}.
    * @return The indexed document, or null if skipped by index filters.
    */
   public NutchDocument index(String key, WebPage page) {
@@ -66,7 +70,7 @@
     if (page.getBatchId() != null) {
       doc.add("batchId", page.getBatchId().toString());
     }
-    
+
     String url = TableUtil.unreverseUrl(key);
 
     if (LOG.isDebugEnabled()) {
@@ -76,12 +80,13 @@
     try {
       doc = filters.filter(doc, url, page);
     } catch (IndexingException e) {
-      LOG.warn("Error indexing "+key+": "+e);
+      LOG.warn("Error indexing " + key + ": " + e);
       return null;
     }
 
     // skip documents discarded by indexing filters
-    if (doc == null) return null;
+    if (doc == null)
+      return null;
 
     float boost = 1.0f;
     // run scoring filters
@@ -98,5 +103,5 @@
 
     return doc;
   }
-  
+
 }
Index: src/java/org/apache/nutch/indexer/IndexWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexWriter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexWriter.java	(working copy)
@@ -26,19 +26,22 @@
 public interface IndexWriter extends Configurable, Pluggable {
   /** The name of the extension point. */
   final static String X_POINT_ID = IndexWriter.class.getName();
-  
+
   public void open(Configuration job) throws IOException;
 
   public void write(NutchDocument doc) throws IOException;
-  
+
   public void delete(String key) throws IOException;
-  
+
   public void update(NutchDocument doc) throws IOException;
-  
+
   public void commit() throws IOException;
 
   public void close() throws IOException;
-  
-  /** Returns a String describing the IndexWriter instance and the specific parameters it can take */
+
+  /**
+   * Returns a String describing the IndexWriter instance and the specific
+   * parameters it can take
+   */
   public String describe();
 }
Index: src/java/org/apache/nutch/indexer/IndexWriters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexWriters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexWriters.java	(working copy)
@@ -32,8 +32,7 @@
 /** Creates and caches {@link IndexWriter} implementing plugins. */
 public class IndexWriters {
 
-  public final static Logger LOG = LoggerFactory
-      .getLogger(IndexWriters.class);
+  public final static Logger LOG = LoggerFactory.getLogger(IndexWriters.class);
 
   private IndexWriter[] indexWriters;
 
@@ -44,17 +43,15 @@
           .getObject(IndexWriter.class.getName());
       if (this.indexWriters == null) {
         try {
-          ExtensionPoint point = PluginRepository.get(conf)
-              .getExtensionPoint(IndexWriter.X_POINT_ID);
+          ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+              IndexWriter.X_POINT_ID);
           if (point == null)
-            throw new RuntimeException(IndexWriter.X_POINT_ID
-                + " not found.");
+            throw new RuntimeException(IndexWriter.X_POINT_ID + " not found.");
           Extension[] extensions = point.getExtensions();
           HashMap<String, IndexWriter> indexerMap = new HashMap<String, IndexWriter>();
           for (int i = 0; i < extensions.length; i++) {
             Extension extension = extensions[i];
-            IndexWriter writer = (IndexWriter) extension
-                .getExtensionInstance();
+            IndexWriter writer = (IndexWriter) extension.getExtensionInstance();
             LOG.info("Adding " + writer.getClass().getName());
             if (!indexerMap.containsKey(writer.getClass().getName())) {
               indexerMap.put(writer.getClass().getName(), writer);
Index: src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(working copy)
@@ -30,29 +30,29 @@
   public RecordWriter<String, NutchDocument> getRecordWriter(
       TaskAttemptContext job) throws IOException, InterruptedException {
 
-    //final IndexWriter[] writers =
-    //  NutchIndexWriterFactory.getNutchIndexWriters(job.getConfiguration());
+    // final IndexWriter[] writers =
+    // NutchIndexWriterFactory.getNutchIndexWriters(job.getConfiguration());
 
     final IndexWriters writers = new IndexWriters(job.getConfiguration());
-    
-//    for (final IndexWriter writer : writers) {
-//      writer.open(job);
-//    }
+
+    // for (final IndexWriter writer : writers) {
+    // writer.open(job);
+    // }
     writers.open(job.getConfiguration());
-    
+
     return new RecordWriter<String, NutchDocument>() {
 
       @Override
       public void write(String key, NutchDocument doc) throws IOException {
-        // TODO: Check Write Status for delete or write.  
+        // TODO: Check Write Status for delete or write.
         writers.write(doc);
       }
 
       @Override
       public void close(TaskAttemptContext context) throws IOException,
-      InterruptedException {
-          writers.close();
-        }
+          InterruptedException {
+        writers.close();
+      }
     };
   }
 
@@ -64,21 +64,26 @@
   @Override
   public OutputCommitter getOutputCommitter(TaskAttemptContext arg0)
       throws IOException, InterruptedException {
-    //return an empty outputcommitter
+    // return an empty outputcommitter
     return new OutputCommitter() {
       @Override
       public void setupTask(TaskAttemptContext arg0) throws IOException {
       }
+
       @Override
       public void setupJob(JobContext arg0) throws IOException {
       }
+
       @Override
-      public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+      public boolean needsTaskCommit(TaskAttemptContext arg0)
+          throws IOException {
         return false;
       }
+
       @Override
       public void commitTask(TaskAttemptContext arg0) throws IOException {
       }
+
       @Override
       public void abortTask(TaskAttemptContext arg0) throws IOException {
       }
Index: src/java/org/apache/nutch/indexer/IndexingFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFilter.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexingFilter extends FieldPluggable, Configurable {
@@ -33,15 +33,18 @@
 
   /**
    * Adds fields or otherwise modifies the document that will be indexed for a
-   * parse. Unwanted documents can be removed from indexing by returning a null value.
-   *
-   * @param doc document instance for collecting fields
-   * @param url page url
+   * parse. Unwanted documents can be removed from indexing by returning a null
+   * value.
+   * 
+   * @param doc
+   *          document instance for collecting fields
+   * @param url
+   *          page url
    * @param page
-   * @return modified (or a new) document instance, or null (meaning the document
-   * should be discarded)
+   * @return modified (or a new) document instance, or null (meaning the
+   *         document should be discarded)
    * @throws IndexingException
    */
   NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException;
+      throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexingFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.ObjectCache;
 
-/** Creates and caches {@link IndexingFilter} implementing plugins.*/
+/** Creates and caches {@link IndexingFilter} implementing plugins. */
 public class IndexingFilters {
 
   public static final String INDEXINGFILTER_ORDER = "indexingfilter.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexingFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexingFilters.class);
 
   private IndexingFilter[] indexingFilters;
 
@@ -62,8 +63,7 @@
         if (point == null)
           throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexingFilter> filterMap =
-          new HashMap<String, IndexingFilter>();
+        HashMap<String, IndexingFilter> filterMap = new HashMap<String, IndexingFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexingFilter filter = (IndexingFilter) extension
@@ -78,9 +78,8 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexingFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexingFilter[0]));
+          objectCache.setObject(IndexingFilter.class.getName(), filterMap
+              .values().toArray(new IndexingFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexingFilter> filters = new ArrayList<IndexingFilter>();
@@ -90,8 +89,8 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexingFilter.class.getName(), filters
-              .toArray(new IndexingFilter[filters.size()]));
+          objectCache.setObject(IndexingFilter.class.getName(),
+              filters.toArray(new IndexingFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,13 +99,15 @@
           .getObject(IndexingFilter.class.getName());
     }
   }
+
   /** Run all defined filters. */
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException {
+      throws IndexingException {
     for (IndexingFilter indexingFilter : indexingFilters) {
       doc = indexingFilter.filter(doc, url, page);
       // break the loop if an indexing filter discards the doc
-      if (doc == null) return null;
+      if (doc == null)
+        return null;
     }
 
     return doc;
@@ -113,10 +114,9 @@
   }
 
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   public Collection<WebPage.Field> getFields() {
     Collection<WebPage.Field> columns = new HashSet<WebPage.Field>();
Index: src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.indexer;
 
 import java.nio.ByteBuffer;
@@ -43,16 +43,19 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Reads and parses a URL and run the indexers on it. Displays the fields obtained and the first
- * 100 characters of their value
- *
- * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker http://www.lemonde.fr
+ * Reads and parses a URL and run the indexers on it. Displays the fields
+ * obtained and the first 100 characters of their value
+ * 
+ * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker
+ * http://www.lemonde.fr
+ * 
  * @author Julien Nioche
  **/
 
 public class IndexingFiltersChecker extends Configured implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(IndexingFiltersChecker.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexingFiltersChecker.class);
 
   public IndexingFiltersChecker() {
 
@@ -85,7 +88,7 @@
     ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
     page.setProtocolStatus(protocolOutput.getStatus());
     if (protocolOutput.getStatus().getCode() == ProtocolStatusCodes.SUCCESS) {
-      page.setStatus((int)CrawlStatus.STATUS_FETCHED);
+      page.setStatus((int) CrawlStatus.STATUS_FETCHED);
       page.setFetchTime(System.currentTimeMillis());
     } else {
       LOG.error("Fetch failed with protocol status: "
@@ -93,7 +96,7 @@
           + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
       return -1;
     }
-    
+
     Content content = protocolOutput.getContent();
     if (content == null) {
       LOG.warn("No content for " + url);
@@ -106,7 +109,7 @@
       return -1;
     }
     page.setContentType(new Utf8(contentType));
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("parsing: " + url);
       LOG.info("contentType: " + contentType);
@@ -136,7 +139,7 @@
       LOG.info("Document discarded by indexing filter");
       return 0;
     }
-    
+
     for (String fname : doc.getFieldNames()) {
       List<String> values = doc.getFieldValues(fname);
       if (values != null) {
Index: src/java/org/apache/nutch/indexer/IndexingJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingJob.java	(working copy)
@@ -180,7 +180,7 @@
 
     IndexWriters writers = new IndexWriters(getConf());
     LOG.info(writers.describe());
-    
+
     writers.open(getConf());
     if (getConf().getBoolean(SolrConstants.COMMIT_INDEX, true)) {
       writers.commit();
Index: src/java/org/apache/nutch/indexer/NutchDocument.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchDocument.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/NutchDocument.java	(working copy)
@@ -33,9 +33,9 @@
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.nutch.metadata.Metadata;
 
-/** A {@link NutchDocument} is the unit of indexing.*/
-public class NutchDocument
-implements Writable, Iterable<Entry<String, List<String>>> {
+/** A {@link NutchDocument} is the unit of indexing. */
+public class NutchDocument implements Writable,
+    Iterable<Entry<String, List<String>>> {
 
   public static final byte VERSION = 1;
 
@@ -139,11 +139,11 @@
   }
 
   /**
-   * A utility-like method which can easily be used to write
-   * any {@link org.apache.nutch.indexer.NutchDocument} object
-   * to string for simple debugging.
+   * A utility-like method which can easily be used to write any
+   * {@link org.apache.nutch.indexer.NutchDocument} object to string for simple
+   * debugging.
    */
-  public String toString() { 
+  public String toString() {
     StringBuilder sb = new StringBuilder();
     sb.append("doc {\n");
     for (Entry<String, List<String>> entry : fields.entrySet()) {
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -22,7 +22,7 @@
   public static final String SERVER_URL = SOLR_PREFIX + "server.url";
 
   public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
-  
+
   public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
@@ -32,15 +32,15 @@
   public static final String USERNAME = SOLR_PREFIX + "auth.username";
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -51,42 +51,44 @@
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 
-/** 
+/**
  * Utility class for deleting duplicate documents from a solr index.
- *
+ * 
  * The algorithm goes like follows:
  * 
  * Preparation:
  * <ol>
  * <li>Query the solr server for the number of documents (say, N)</li>
- * <li>Partition N among M map tasks. For example, if we have two map tasks
- * the first map task will deal with solr documents from 0 - (N / 2 - 1) and
- * the second will deal with documents from (N / 2) to (N - 1).</li>
+ * <li>Partition N among M map tasks. For example, if we have two map tasks the
+ * first map task will deal with solr documents from 0 - (N / 2 - 1) and the
+ * second will deal with documents from (N / 2) to (N - 1).</li>
  * </ol>
  * 
  * MapReduce:
  * <ul>
- * <li>Map: Identity map where keys are digests and values are {@link SolrRecord}
- * instances(which contain id, boost and timestamp)</li>
+ * <li>Map: Identity map where keys are digests and values are
+ * {@link SolrRecord} instances(which contain id, boost and timestamp)</li>
  * <li>Reduce: After map, {@link SolrRecord}s with the same digest will be
- * grouped together. Now, of these documents with the same digests, delete
- * all of them except the one with the highest score (boost field). If two
- * (or more) documents have the same score, then the document with the latest
- * timestamp is kept. Again, every other is deleted from solr index.
- * </li>
+ * grouped together. Now, of these documents with the same digests, delete all
+ * of them except the one with the highest score (boost field). If two (or more)
+ * documents have the same score, then the document with the latest timestamp is
+ * kept. Again, every other is deleted from solr index.</li>
  * </ul>
  * 
- * Note that we assume that two documents in
- * a solr index will never have the same URL. So this class only deals with
- * documents with <b>different</b> URLs but the same digest. 
+ * Note that we assume that two documents in a solr index will never have the
+ * same URL. So this class only deals with documents with <b>different</b> URLs
+ * but the same digest.
  */
 public class SolrDeleteDuplicates
-extends Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
-implements Tool {
+    extends
+    Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
+    implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SolrDeleteDuplicates.class);
 
-  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD + ":[* TO *]";
+  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD
+      + ":[* TO *]";
 
   private static final int NUM_MAX_DELETE_REQUEST = 1000;
 
@@ -96,7 +98,8 @@
     private long tstamp;
     private String id;
 
-    public SolrRecord() { }
+    public SolrRecord() {
+    }
 
     public SolrRecord(String id, float boost, long tstamp) {
       this.id = id;
@@ -117,10 +120,10 @@
     }
 
     public void readSolrDocument(SolrDocument doc) {
-      id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
-      boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
+      id = (String) doc.getFieldValue(SolrConstants.ID_FIELD);
+      boost = (Float) doc.getFieldValue(SolrConstants.BOOST_FIELD);
 
-      Date buffer = (Date)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+      Date buffer = (Date) doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
       tstamp = buffer.getTime();
     }
 
@@ -136,7 +139,7 @@
       Text.writeString(out, id);
       out.writeFloat(boost);
       out.writeLong(tstamp);
-    } 
+    }
   }
 
   public static class SolrInputSplit extends InputSplit implements Writable {
@@ -144,7 +147,8 @@
     private int docBegin;
     private int numDocs;
 
-    public SolrInputSplit() { }
+    public SolrInputSplit() {
+    }
 
     public SolrInputSplit(int docBegin, int numDocs) {
       this.docBegin = docBegin;
@@ -162,7 +166,7 @@
 
     @Override
     public String[] getLocations() throws IOException {
-      return new String[] {} ;
+      return new String[] {};
     }
 
     @Override
@@ -175,9 +179,9 @@
     public void write(DataOutput out) throws IOException {
       out.writeInt(docBegin);
       out.writeInt(numDocs);
-    } 
+    }
   }
-  
+
   public static class SolrRecordReader extends RecordReader<Text, SolrRecord> {
 
     private int currentDoc = 0;
@@ -185,21 +189,22 @@
     private Text text;
     private SolrRecord record;
     private SolrDocumentList solrDocs;
-    
+
     public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
       this.solrDocs = solrDocs;
       this.numDocs = numDocs;
     }
-    
+
     @Override
     public void initialize(InputSplit split, TaskAttemptContext context)
         throws IOException, InterruptedException {
       text = new Text();
-      record = new SolrRecord();   
+      record = new SolrRecord();
     }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {
+    }
 
     @Override
     public float getProgress() throws IOException {
@@ -231,14 +236,14 @@
       currentDoc++;
       return true;
     }
-   
+
   };
 
   public static class SolrInputFormat extends InputFormat<Text, SolrRecord> {
-    
+
     @Override
-    public List<InputSplit> getSplits(JobContext context)
-    throws IOException, InterruptedException {
+    public List<InputSplit> getSplits(JobContext context) throws IOException,
+        InterruptedException {
       Configuration conf = context.getConfiguration();
       int numSplits = context.getNumReduceTasks();
       SolrServer solr = SolrUtils.getHttpSolrServer(conf);
@@ -254,8 +259,8 @@
         throw new IOException(e);
       }
 
-      int numResults = (int)response.getResults().getNumFound();
-      int numDocsPerSplit = (numResults / numSplits); 
+      int numResults = (int) response.getResults().getNumFound();
+      int numDocsPerSplit = (numResults / numSplits);
       int currentDoc = 0;
       List<InputSplit> splits = new ArrayList<InputSplit>();
       for (int i = 0; i < numSplits - 1; i++) {
@@ -274,11 +279,10 @@
       SolrServer solr = SolrUtils.getHttpSolrServer(conf);
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = (int) solrSplit.getLength();
-      
+
       SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD, SolrConstants.BOOST_FIELD,
-                          SolrConstants.TIMESTAMP_FIELD,
-                          SolrConstants.DIGEST_FIELD);
+          SolrConstants.TIMESTAMP_FIELD, SolrConstants.DIGEST_FIELD);
       solrQuery.setStart(solrSplit.getDocBegin());
       solrQuery.setRows(numDocs);
 
@@ -318,7 +322,6 @@
     solr = SolrUtils.getHttpSolrServer(conf);
   }
 
-
   @Override
   public void cleanup(Context context) throws IOException {
     try {
@@ -334,14 +337,14 @@
 
   @Override
   public void reduce(Text key, Iterable<SolrRecord> values, Context context)
-  throws IOException {
+      throws IOException {
     Iterator<SolrRecord> iterator = values.iterator();
     SolrRecord recordToKeep = iterator.next();
     while (iterator.hasNext()) {
       SolrRecord solrRecord = iterator.next();
-      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
-          (solrRecord.getBoost() == recordToKeep.getBoost() && 
-              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
+      if (solrRecord.getBoost() > recordToKeep.getBoost()
+          || (solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord
+              .getTstamp() > recordToKeep.getTstamp())) {
         updateRequest.deleteById(recordToKeep.id);
         recordToKeep = solrRecord;
       } else {
@@ -360,13 +363,13 @@
     }
   }
 
-  public boolean dedup(String solrUrl)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public boolean dedup(String solrUrl) throws IOException,
+      InterruptedException, ClassNotFoundException {
     LOG.info("SolrDeleteDuplicates: starting...");
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
-    
+
     getConf().set(SolrConstants.SERVER_URL, solrUrl);
-    
+
     Job job = new Job(getConf(), "solrdedup");
 
     job.setInputFormatClass(SolrInputFormat.class);
@@ -376,11 +379,11 @@
     job.setMapperClass(Mapper.class);
     job.setReducerClass(SolrDeleteDuplicates.class);
 
-    return job.waitForCompletion(true);    
+    return job.waitForCompletion(true);
   }
 
-  public int run(String[] args)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public int run(String[] args) throws IOException, InterruptedException,
+      ClassNotFoundException {
     if (args.length != 1) {
       System.err.println("Usage: SolrDeleteDuplicates <solr url>");
       return 1;
Index: src/java/org/apache/nutch/indexer/solr/SolrUtils.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(working copy)
@@ -1,6 +1,5 @@
 package org.apache.nutch.indexer.solr;
 
-
 import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
@@ -18,7 +17,8 @@
 
   public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class);
 
-  public static HttpSolrServer getHttpSolrServer(Configuration job) throws MalformedURLException {
+  public static HttpSolrServer getHttpSolrServer(Configuration job)
+      throws MalformedURLException {
     DefaultHttpClient client = new DefaultHttpClient();
 
     // Check for username/password
@@ -27,10 +27,13 @@
 
       LOG.info("Authenticating as: " + username);
 
-      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT,
+          AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
 
-      client.getCredentialsProvider().setCredentials(scope, 
-          new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+      client.getCredentialsProvider().setCredentials(
+          scope,
+          new UsernamePasswordCredentials(username, job
+              .get(SolrConstants.PASSWORD)));
 
       HttpParams params = client.getParams();
       HttpClientParams.setAuthenticating(params, true);
@@ -48,12 +51,14 @@
     for (int i = 0; i < input.length(); i++) {
       ch = input.charAt(i);
 
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
+      // Strip all non-characters
+      // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line and
+      // carriage return
       if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
-              ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
-              (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
-              (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+          ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+          (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+          (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
 
         retval.append(ch);
       }
Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(working copy)
@@ -16,21 +16,20 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Creative Commons properties names.
- *
+ * 
  * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface CreativeCommons {
-  
+
   public final static String LICENSE_URL = "License-Url";
-  
+
   public final static String LICENSE_LOCATION = "License-Location";
-  
+
   public final static String WORK_TYPE = "Work-Type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(working copy)
@@ -16,62 +16,60 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Dublin Core metadata names.
- *
- * @see <a href="http://dublincore.org">dublincore.org</a> 
- *
+ * 
+ * @see <a href="http://dublincore.org">dublincore.org</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface DublinCore {
-  
-    
+
   /**
-   * Typically, Format may include the media-type or dimensions of the
-   * resource. Format may be used to determine the software, hardware or other
-   * equipment needed to display or operate the resource. Examples of
-   * dimensions include size and duration. Recommended best practice is to
-   * select a value from a controlled vocabulary (for example, the list of
-   * Internet Media Types [MIME] defining computer media formats).
+   * Typically, Format may include the media-type or dimensions of the resource.
+   * Format may be used to determine the software, hardware or other equipment
+   * needed to display or operate the resource. Examples of dimensions include
+   * size and duration. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+   * defining computer media formats).
    */
   public static final String FORMAT = "format";
-  
+
   /**
-   * Recommended best practice is to identify the resource by means of a
-   * string or number conforming to a formal identification system. Example
-   * formal identification systems include the Uniform Resource Identifier
-   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Recommended best practice is to identify the resource by means of a string
+   * or number conforming to a formal identification system. Example formal
+   * identification systems include the Uniform Resource Identifier (URI)
+   * (including the Uniform Resource Locator (URL)), the Digital Object
    * Identifier (DOI) and the International Standard Book Number (ISBN).
    */
   public static final String IDENTIFIER = "identifier";
-  
+
   /**
    * Date on which the resource was changed.
    */
   public static final String MODIFIED = "modified";
-  
+
   /**
    * An entity responsible for making contributions to the content of the
-   * resource. Examples of a Contributor include a person, an organisation, or
-   * a service. Typically, the name of a Contributor should be used to
-   * indicate the entity.
+   * resource. Examples of a Contributor include a person, an organisation, or a
+   * service. Typically, the name of a Contributor should be used to indicate
+   * the entity.
    */
   public static final String CONTRIBUTOR = "contributor";
-  
+
   /**
-   * The extent or scope of the content of the resource. Coverage will
-   * typically include spatial location (a place name or geographic
-   * coordinates), temporal period (a period label, date, or date range) or
-   * jurisdiction (such as a named administrative entity). Recommended best
-   * practice is to select a value from a controlled vocabulary (for example,
-   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
-   * named places or time periods be used in preference to numeric identifiers
-   * such as sets of coordinates or date ranges.
+   * The extent or scope of the content of the resource. Coverage will typically
+   * include spatial location (a place name or geographic coordinates), temporal
+   * period (a period label, date, or date range) or jurisdiction (such as a
+   * named administrative entity). Recommended best practice is to select a
+   * value from a controlled vocabulary (for example, the Thesaurus of
+   * Geographic Names [TGN]) and that, where appropriate, named places or time
+   * periods be used in preference to numeric identifiers such as sets of
+   * coordinates or date ranges.
    */
   public static final String COVERAGE = "coverage";
-  
+
   /**
    * An entity primarily responsible for making the content of the resource.
    * Examples of a Creator include a person, an organisation, or a service.
@@ -78,16 +76,15 @@
    * Typically, the name of a Creator should be used to indicate the entity.
    */
   public static final String CREATOR = "creator";
-  
+
   /**
    * A date associated with an event in the life cycle of the resource.
-   * Typically, Date will be associated with the creation or availability of
-   * the resource. Recommended best practice for encoding the date value is
-   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
-   * format.
+   * Typically, Date will be associated with the creation or availability of the
+   * resource. Recommended best practice for encoding the date value is defined
+   * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
    */
   public static final String DATE = "date";
-  
+
   /**
    * An account of the content of the resource. Description may include but is
    * not limited to: an abstract, table of contents, reference to a graphical
@@ -94,16 +91,16 @@
    * representation of content or a free-text account of the content.
    */
   public static final String DESCRIPTION = "description";
-  
+
   /**
    * A language of the intellectual content of the resource. Recommended best
    * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
-   * [ISO639], defines two- and three-letter primary language tags with
-   * optional subtags. Examples include "en" or "eng" for English, "akk" for
-   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   * [ISO639], defines two- and three-letter primary language tags with optional
+   * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+   * and "en-GB" for English used in the United Kingdom.
    */
   public static final String LANGUAGE = "language";
-  
+
   /**
    * An entity responsible for making the resource available. Examples of a
    * Publisher include a person, an organisation, or a service. Typically, the
@@ -110,7 +107,7 @@
    * name of a Publisher should be used to indicate the entity.
    */
   public static final String PUBLISHER = "publisher";
-  
+
   /**
    * A reference to a related resource. Recommended best practice is to
    * reference the resource by means of a string or number conforming to a
@@ -117,48 +114,48 @@
    * formal identification system.
    */
   public static final String RELATION = "relation";
-  
+
   /**
-   * Information about rights held in and over the resource. Typically, a
-   * Rights element will contain a rights management statement for the
-   * resource, or reference a service providing such information. Rights
-   * information often encompasses Intellectual Property Rights (IPR),
-   * Copyright, and various Property Rights. If the Rights element is absent,
-   * no assumptions can be made about the status of these and other rights
-   * with respect to the resource.
+   * Information about rights held in and over the resource. Typically, a Rights
+   * element will contain a rights management statement for the resource, or
+   * reference a service providing such information. Rights information often
+   * encompasses Intellectual Property Rights (IPR), Copyright, and various
+   * Property Rights. If the Rights element is absent, no assumptions can be
+   * made about the status of these and other rights with respect to the
+   * resource.
    */
   public static final String RIGHTS = "rights";
-  
+
   /**
    * A reference to a resource from which the present resource is derived. The
    * present resource may be derived from the Source resource in whole or in
-   * part. Recommended best practice is to reference the resource by means of
-   * a string or number conforming to a formal identification system.
+   * part. Recommended best practice is to reference the resource by means of a
+   * string or number conforming to a formal identification system.
    */
   public static final String SOURCE = "source";
-  
+
   /**
    * The topic of the content of the resource. Typically, a Subject will be
-   * expressed as keywords, key phrases or classification codes that describe
-   * a topic of the resource. Recommended best practice is to select a value
-   * from a controlled vocabulary or formal classification scheme.
+   * expressed as keywords, key phrases or classification codes that describe a
+   * topic of the resource. Recommended best practice is to select a value from
+   * a controlled vocabulary or formal classification scheme.
    */
   public static final String SUBJECT = "subject";
-  
+
   /**
    * A name given to the resource. Typically, a Title will be a name by which
    * the resource is formally known.
    */
   public static final String TITLE = "title";
-  
+
   /**
    * The nature or genre of the content of the resource. Type includes terms
-   * describing general categories, functions, genres, or aggregation levels
-   * for content. Recommended best practice is to select a value from a
-   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
-   * To describe the physical or digital manifestation of the resource, use
-   * the Format element.
+   * describing general categories, functions, genres, or aggregation levels for
+   * content. Recommended best practice is to select a value from a controlled
+   * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+   * the physical or digital manifestation of the resource, use the Format
+   * element.
    */
   public static final String TYPE = "type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(working copy)
@@ -16,14 +16,12 @@
  */
 package org.apache.nutch.metadata;
 
-
-
 /**
  * A collection of HTTP header names.
- *
- * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
- *      Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
+ * 
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol
+ *      -- HTTP/1.1 (RFC 2616)</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/metadata/MetaWrapper.java
===================================================================
--- src/java/org/apache/nutch/metadata/MetaWrapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/MetaWrapper.java	(working copy)
@@ -28,28 +28,29 @@
 /**
  * This is a simple decorator that adds metadata to any Writable-s that can be
  * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be
- * temporarily enriched during processing, but this
- * temporary metadata doesn't need to be permanently stored after the job is done.
+ * temporarily enriched during processing, but this temporary metadata doesn't
+ * need to be permanently stored after the job is done.
  * 
  * @author Andrzej Bialecki
  */
 public class MetaWrapper extends NutchWritable {
   private Metadata metadata;
-  
+
   public MetaWrapper() {
     super();
     metadata = new Metadata();
   }
-  
+
   public MetaWrapper(Writable instance, Configuration conf) {
     super(instance);
     metadata = new Metadata();
     setConf(conf);
   }
-  
+
   public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) {
     super(instance);
-    if (metadata == null) metadata = new Metadata();
+    if (metadata == null)
+      metadata = new Metadata();
     this.metadata = metadata;
     setConf(conf);
   }
@@ -60,18 +61,24 @@
   public Metadata getMetadata() {
     return metadata;
   }
-  
+
   /**
-   * Add metadata. See {@link Metadata#add(String, String)} for more information.
-   * @param name metadata name
-   * @param value metadata value
+   * Add metadata. See {@link Metadata#add(String, String)} for more
+   * information.
+   * 
+   * @param name
+   *          metadata name
+   * @param value
+   *          metadata value
    */
   public void addMeta(String name, String value) {
     metadata.add(name, value);
   }
-  
+
   /**
-   * Set metadata. See {@link Metadata#set(String, String)} for more information.
+   * Set metadata. See {@link Metadata#set(String, String)} for more
+   * information.
+   * 
    * @param name
    * @param value
    */
@@ -78,9 +85,10 @@
   public void setMeta(String name, String value) {
     metadata.set(name, value);
   }
-  
+
   /**
    * Get metadata. See {@link Metadata#get(String)} for more information.
+   * 
    * @param name
    * @return metadata value
    */
@@ -87,9 +95,11 @@
   public String getMeta(String name) {
     return metadata.get(name);
   }
-  
+
   /**
-   * Get multiple metadata. See {@link Metadata#getValues(String)} for more information.
+   * Get multiple metadata. See {@link Metadata#getValues(String)} for more
+   * information.
+   * 
    * @param name
    * @return multiple values
    */
@@ -96,7 +106,7 @@
   public String[] getMetaValues(String name) {
     return metadata.getValues(name);
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
     metadata = new Metadata();
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(working copy)
@@ -27,16 +27,15 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-
 /**
  * A multi-valued metadata container.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  */
-public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Feed {
+public class Metadata implements Writable, CreativeCommons, DublinCore,
+    HttpHeaders, Nutch, Feed {
 
   /**
    * A map of all metadata attributes.
@@ -43,7 +42,6 @@
    */
   private Map<String, String[]> metadata = null;
 
-
   /**
    * Constructs a new, empty metadata.
    */
@@ -53,9 +51,10 @@
 
   /**
    * Returns true if named value is multivalued.
-   * @param name name of metadata
-   * @return true is named value is multivalued, false if single
-   * value or null
+   * 
+   * @param name
+   *          name of metadata
+   * @return true is named value is multivalued, false if single value or null
    */
   public boolean isMultiValued(final String name) {
     return metadata.get(name) != null && metadata.get(name).length > 1;
@@ -63,6 +62,7 @@
 
   /**
    * Returns an array of the names contained in the metadata.
+   * 
    * @return Metadata names
    */
   public String[] names() {
@@ -70,11 +70,11 @@
   }
 
   /**
-   * Get the value associated to a metadata name.
-   * If many values are assiociated to the specified name, then the first
-   * one is returned.
-   *
-   * @param name of the metadata.
+   * Get the value associated to a metadata name. If many values are assiociated
+   * to the specified name, then the first one is returned.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the value associated to the specified metadata name.
    */
   public String get(final String name) {
@@ -88,13 +88,15 @@
 
   /**
    * Get the values associated to a metadata name.
-   * @param name of the metadata.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the values associated to a metadata name.
    */
   public String[] getValues(final String name) {
     return _getValues(name);
   }
-  
+
   private String[] _getValues(final String name) {
     String[] values = metadata.get(name);
     if (values == null) {
@@ -104,12 +106,13 @@
   }
 
   /**
-   * Add a metadata name/value mapping.
-   * Add the specified value to the list of values associated to the
-   * specified metadata name.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Add a metadata name/value mapping. Add the specified value to the list of
+   * values associated to the specified metadata name.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void add(final String name, final String value) {
     String[] values = metadata.get(name);
@@ -125,31 +128,37 @@
 
   /**
    * Copy All key-value pairs from properties.
-   * @param properties properties to copy from
+   * 
+   * @param properties
+   *          properties to copy from
    */
   public void setAll(Properties properties) {
     Enumeration<?> names = properties.propertyNames();
     while (names.hasMoreElements()) {
       String name = (String) names.nextElement();
-      metadata.put(name, new String[]{properties.getProperty(name)});
+      metadata.put(name, new String[] { properties.getProperty(name) });
     }
   }
 
   /**
-   * Set metadata name/value.
-   * Associate the specified value to the specified metadata name. If some
-   * previous values were associated to this name, they are removed.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Set metadata name/value. Associate the specified value to the specified
+   * metadata name. If some previous values were associated to this name, they
+   * are removed.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void set(String name, String value) {
-    metadata.put(name, new String[]{value});
+    metadata.put(name, new String[] { value });
   }
 
   /**
    * Remove a metadata and all its associated values.
-   * @param name metadata name to remove
+   * 
+   * @param name
+   *          metadata name to remove
    */
   public void remove(String name) {
     metadata.remove(name);
@@ -157,12 +166,13 @@
 
   /**
    * Returns the number of metadata names in this metadata.
+   * 
    * @return number of metadata names
    */
   public int size() {
     return metadata.size();
   }
-  
+
   /** Remove all mappings from metadata. */
   public void clear() {
     metadata.clear();
@@ -170,7 +180,9 @@
 
   public boolean equals(Object o) {
 
-    if (o == null) { return false; }
+    if (o == null) {
+      return false;
+    }
 
     Metadata other = null;
     try {
@@ -179,7 +191,9 @@
       return false;
     }
 
-    if (other.size() != size()) { return false; }
+    if (other.size() != size()) {
+      return false;
+    }
 
     String[] names = names();
     for (int i = 0; i < names.length; i++) {
@@ -203,10 +217,7 @@
     for (int i = 0; i < names.length; i++) {
       String[] values = _getValues(names[i]);
       for (int j = 0; j < values.length; j++) {
-        buf.append(names[i])
-           .append("=")
-           .append(values[j])
-           .append(" ");
+        buf.append(names[i]).append("=").append(values[j]).append(" ");
       }
     }
     return buf.toString();
Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(working copy)
@@ -19,20 +19,17 @@
 import org.apache.avro.util.Utf8;
 import org.apache.hadoop.io.Text;
 
-
 /**
  * A collection of Nutch internal metadata constants.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Nutch {
 
-  public static final String ORIGINAL_CHAR_ENCODING =
-          "OriginalCharEncoding";
+  public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding";
 
-  public static final String CHAR_ENCODING_FOR_CONVERSION =
-          "CharEncodingForConversion";
+  public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion";
 
   public static final String SIGNATURE_KEY = "nutch.content.digest";
 
@@ -42,20 +39,26 @@
 
   public static final String GENERATE_TIME_KEY = "_ngt_";
 
-  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY);
+  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
+      GENERATE_TIME_KEY);
 
   public static final String PROTO_STATUS_KEY = "_pst_";
 
-  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY);
+  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(
+      PROTO_STATUS_KEY);
 
   public static final String FETCH_TIME_KEY = "_ftk_";
 
   public static final String FETCH_STATUS_KEY = "_fst_";
 
-  /** Sites may request that search engines don't provide access to cached documents. */
+  /**
+   * Sites may request that search engines don't provide access to cached
+   * documents.
+   */
   public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden";
 
-  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(CACHING_FORBIDDEN_KEY);
+  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(
+      CACHING_FORBIDDEN_KEY);
 
   /** Show both original forbidden content and summaries (default). */
   public static final String CACHING_FORBIDDEN_NONE = "none";
@@ -75,8 +78,7 @@
   public static final Utf8 ALL_CRAWL_ID = new Utf8(ALL_BATCH_ID_STR);
 
   public static final String CRAWL_ID_KEY = "storage.crawl.id";
-  
-  
+
   // short constants for cmd-line args
   /** Batch id to select. */
   public static final String ARG_BATCH = "batch";
@@ -110,7 +112,7 @@
   public static final String ARG_CLASS = "class";
   /** Depth (number of cycles) of a crawl. */
   public static final String ARG_DEPTH = "depth";
-  
+
   // short constants for status / results fields
   /** Status / result message. */
   public static final String STAT_MESSAGE = "msg";
Index: src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(working copy)
@@ -33,7 +33,7 @@
 
   /**
    * Treshold divider.
-   *
+   * 
    * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code>
    */
   private static final int TRESHOLD_DIVIDER = 3;
@@ -52,7 +52,7 @@
 
     // Uses following array to fill the metanames index and the
     // metanames list.
-    Class<?>[] spellthese = {HttpHeaders.class};
+    Class<?>[] spellthese = { HttpHeaders.class };
 
     for (Class<?> spellCheckedNames : spellthese) {
       for (Field field : spellCheckedNames.getFields()) {
@@ -73,7 +73,7 @@
 
   /**
    * Normalizes String.
-   *
+   * 
    * @param str
    *          the string to normalize
    * @return normalized String
@@ -102,7 +102,7 @@
    * </ul>
    * If no matching with a well-known metadata name is found, then the original
    * name is returned.
-   *
+   * 
    * @param name
    *          Name to normalize
    * @return normalized name
Index: src/java/org/apache/nutch/net/URLFilter.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilter.java	(working copy)
@@ -23,10 +23,9 @@
 // Nutch imports
 import org.apache.nutch.plugin.Pluggable;
 
-
 /**
- * Interface used to limit which URLs enter Nutch.
- * Used by the injector and the db updater.
+ * Interface used to limit which URLs enter Nutch. Used by the injector and the
+ * db updater.
  */
 
 public interface URLFilter extends Pluggable, Configurable {
@@ -33,7 +32,9 @@
   /** The name of the extension point. */
   public final static String X_POINT_ID = URLFilter.class.getName();
 
-  /* Interface for a filter that transforms a URL: it can pass the
-     original URL through or "delete" the URL by returning null */
+  /*
+   * Interface for a filter that transforms a URL: it can pass the original URL
+   * through or "delete" the URL by returning null
+   */
   public String filter(String urlString);
 }
Index: src/java/org/apache/nutch/net/URLFilterChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilterChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilterChecker.java	(working copy)
@@ -38,23 +38,23 @@
   private Configuration conf;
 
   public URLFilterChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String filterName) throws Exception {
     URLFilter filter = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLFilter.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLFilter.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      filter = (URLFilter)extension.getExtensionInstance();
+      filter = (URLFilter) extension.getExtensionInstance();
       if (filter.getClass().getName().equals(filterName)) {
         break;
       } else {
@@ -63,19 +63,19 @@
     }
 
     if (filter == null)
-      throw new RuntimeException("Filter "+filterName+" not found.");
+      throw new RuntimeException("Filter " + filterName + " not found.");
 
     // jerome : should we keep this behavior?
-    //if (LogFormatter.hasLoggedSevere())
-    //  throw new RuntimeException("Severe error encountered.");
+    // if (LogFormatter.hasLoggedSevere())
+    // throw new RuntimeException("Severe error encountered.");
 
-    System.out.println("Checking URLFilter "+filterName);
+    System.out.println("Checking URLFilter " + filterName);
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
@@ -90,10 +90,10 @@
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       URLFilters filters = new URLFilters(this.conf);
       String out = filters.filter(line);
-      if(out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/java/org/apache/nutch/net/URLFilters.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilters.java	(working copy)
@@ -28,7 +28,8 @@
 import org.apache.nutch.util.ObjectCache;
 
 import org.apache.hadoop.conf.Configuration;
-/** Creates and caches {@link URLFilter} implementing plugins.*/
+
+/** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
@@ -37,7 +38,8 @@
   public URLFilters(Configuration conf) {
     String order = conf.get(URLFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,8 +62,8 @@
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(URLFilter.class.getName(), filterMap.values().toArray(
-              new URLFilter[0]));
+          objectCache.setObject(URLFilter.class.getName(), filterMap.values()
+              .toArray(new URLFilter[0]));
         } else {
           ArrayList<URLFilter> filters = new ArrayList<URLFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -70,13 +72,14 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(URLFilter.class.getName(), filters
-              .toArray(new URLFilter[filters.size()]));
+          objectCache.setObject(URLFilter.class.getName(),
+              filters.toArray(new URLFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+          .getName());
     }
   }
 
Index: src/java/org/apache/nutch/net/URLNormalizer.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizer.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizer.java	(working copy)
@@ -21,13 +21,17 @@
 
 import org.apache.hadoop.conf.Configurable;
 
-/** Interface used to convert URLs to normal form and optionally perform substitutions */
+/**
+ * Interface used to convert URLs to normal form and optionally perform
+ * substitutions
+ */
 public interface URLNormalizer extends Configurable {
-  
+
   /* Extension ID */
   public static final String X_POINT_ID = URLNormalizer.class.getName();
-  
+
   /* Interface for URL normalization */
-  public String normalize(String urlString, String scope) throws MalformedURLException;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException;
 
 }
Index: src/java/org/apache/nutch/net/URLNormalizerChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizerChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizerChecker.java	(working copy)
@@ -36,23 +36,23 @@
   private Configuration conf;
 
   public URLNormalizerChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String normalizerName, String scope) throws Exception {
     URLNormalizer normalizer = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLNormalizer.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      normalizer = (URLNormalizer)extension.getExtensionInstance();
+      normalizer = (URLNormalizer) extension.getExtensionInstance();
       if (normalizer.getClass().getName().equals(normalizerName)) {
         break;
       } else {
@@ -61,7 +61,8 @@
     }
 
     if (normalizer == null)
-      throw new RuntimeException("URLNormalizer "+normalizerName+" not found.");
+      throw new RuntimeException("URLNormalizer " + normalizerName
+          + " not found.");
 
     System.out.println("Checking URLNormalizer " + normalizerName);
 
@@ -79,7 +80,7 @@
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
     URLNormalizers normalizers = new URLNormalizers(conf, scope);
-    while((line = in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       String out = normalizers.normalize(line, scope);
       System.out.println(out);
     }
@@ -88,7 +89,7 @@
   public static void main(String[] args) throws Exception {
 
     String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
-      + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
+        + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
 
     String normalizerName = null;
     String scope = URLNormalizers.SCOPE_DEFAULT;
@@ -103,7 +104,8 @@
       }
     }
 
-    URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create());
+    URLNormalizerChecker checker = new URLNormalizerChecker(
+        NutchConfiguration.create());
     if (normalizerName != null) {
       checker.checkOne(normalizerName, scope);
     } else {
Index: src/java/org/apache/nutch/net/URLNormalizers.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizers.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizers.java	(working copy)
@@ -43,40 +43,55 @@
  * contexts where they are used (note however that they need to be activated
  * first through <tt>plugin.include</tt> property).
  * 
- * <p>There is one global scope defined by default, which consists of all
- * active normalizers. The order in which these normalizers
- * are executed may be defined in "urlnormalizer.order" property, which lists
- * space-separated implementation classes (if this property is missing normalizers
- * will be run in random order). If there are more
- * normalizers activated than explicitly named on this list, the remaining ones
- * will be run in random order after the ones specified on the list are executed.</p>
- * <p>You can define a set of contexts (or scopes) in which normalizers may be
+ * <p>
+ * There is one global scope defined by default, which consists of all active
+ * normalizers. The order in which these normalizers are executed may be defined
+ * in "urlnormalizer.order" property, which lists space-separated implementation
+ * classes (if this property is missing normalizers will be run in random
+ * order). If there are more normalizers activated than explicitly named on this
+ * list, the remaining ones will be run in random order after the ones specified
+ * on the list are executed.
+ * </p>
+ * <p>
+ * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
  * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
  * "urlnormalizer.order.<scope_name>" property). If any of these properties are
- * missing, default settings are used for the global scope.</p>
- * <p>In case no normalizers are required for any given scope, a
- * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p>
- * <p>Each normalizer may further select among many configurations, depending on
- * the scope in which it is called, because the scope name is passed as a parameter
- * to each normalizer. You can also use the same normalizer for many scopes.</p>
- * <p>Several scopes have been defined, and various Nutch tools will attempt using
- * scope-specific normalizers first (and fall back to default config if scope-specific
- * configuration is missing).</p>
- * <p>Normalizers may be run several times, to ensure that modifications introduced
+ * missing, default settings are used for the global scope.
+ * </p>
+ * <p>
+ * In case no normalizers are required for any given scope, a
+ * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should
+ * be used.
+ * </p>
+ * <p>
+ * Each normalizer may further select among many configurations, depending on
+ * the scope in which it is called, because the scope name is passed as a
+ * parameter to each normalizer. You can also use the same normalizer for many
+ * scopes.
+ * </p>
+ * <p>
+ * Several scopes have been defined, and various Nutch tools will attempt using
+ * scope-specific normalizers first (and fall back to default config if
+ * scope-specific configuration is missing).
+ * </p>
+ * <p>
+ * Normalizers may be run several times, to ensure that modifications introduced
  * by normalizers at the end of the list can be further reduced by normalizers
- * executed at the beginning. By default this loop is executed just once - if you want
- * to ensure that all possible combinations have been applied you may want to run
- * this loop up to the number of activated normalizers. This loop count can be configured
- * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is
- * unchanged the loop will stop and return the result.</p>
+ * executed at the beginning. By default this loop is executed just once - if
+ * you want to ensure that all possible combinations have been applied you may
+ * want to run this loop up to the number of activated normalizers. This loop
+ * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
+ * As soon as the url is unchanged the loop will stop and return the result.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
 public final class URLNormalizers {
-  
-  /** Default scope. If no scope properties are defined then the configuration for
-   * this scope will be used.
+
+  /**
+   * Default scope. If no scope properties are defined then the configuration
+   * for this scope will be used.
    */
   public static final String SCOPE_DEFAULT = "default";
   /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */
@@ -83,7 +98,8 @@
   public static final String SCOPE_PARTITION = "partition";
   /** Scope used by {@link org.apache.nutch.crawl.GeneratorJob}. */
   public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
-  /** Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
+  /**
+   * Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
    * redirect URLs.
    */
   public static final String SCOPE_FETCHER = "fetcher";
@@ -93,15 +109,18 @@
   public static final String SCOPE_LINKDB = "linkdb";
   /** Scope used by {@link org.apache.nutch.crawl.InjectorJob}. */
   public static final String SCOPE_INJECT = "inject";
-  /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */
+  /**
+   * Scope used when constructing new {@link org.apache.nutch.parse.Outlink}
+   * instances.
+   */
   public static final String SCOPE_OUTLINK = "outlink";
-  
 
-  public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(URLNormalizers.class);
 
   /* Empty extension list for caching purposes. */
   private final List<Extension> EMPTY_EXTENSION_LIST = Collections.emptyList();
-  
+
   private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
 
   private Configuration conf;
@@ -109,37 +128,39 @@
   private ExtensionPoint extensionPoint;
 
   private URLNormalizer[] normalizers;
-  
+
   private int loopCount;
 
   public URLNormalizers(Configuration conf, String scope) {
     this.conf = conf;
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
-            URLNormalizer.X_POINT_ID);
+        URLNormalizer.X_POINT_ID);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (this.extensionPoint == null) {
       throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
-              + " not found.");
+          + " not found.");
     }
 
-    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
+    normalizers = (URLNormalizer[]) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_" + scope);
     if (normalizers == null) {
       normalizers = getURLNormalizers(scope);
     }
     if (normalizers == EMPTY_NORMALIZERS) {
-      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
+      normalizers = (URLNormalizer[]) objectCache
+          .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
       if (normalizers == null) {
         normalizers = getURLNormalizers(SCOPE_DEFAULT);
       }
     }
-    
+
     loopCount = conf.getInt("urlnormalizer.loop.count", 1);
   }
 
   /**
-   * Function returns an array of {@link URLNormalizer}s for a given scope,
-   * with a specified order.
+   * Function returns an array of {@link URLNormalizer}s for a given scope, with
+   * a specified order.
    * 
    * @param scope
    *          The scope to return the <code>Array</code> of
@@ -151,13 +172,14 @@
   URLNormalizer[] getURLNormalizers(String scope) {
     List<Extension> extensions = getExtensions(scope);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (extensions == EMPTY_EXTENSION_LIST) {
       return EMPTY_NORMALIZERS;
     }
-    
-    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());
 
+    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(
+        extensions.size());
+
     Iterator<Extension> it = extensions.iterator();
     while (it.hasNext()) {
       Extension ext = it.next();
@@ -174,14 +196,13 @@
       } catch (PluginRuntimeException e) {
         e.printStackTrace();
         LOG.warn("URLNormalizers:PluginRuntimeException when "
-                + "initializing url normalizer plugin "
-                + ext.getDescriptor().getPluginId()
-                + " instance in getURLNormalizers "
-                + "function: attempting to continue instantiating plugins");
+            + "initializing url normalizer plugin "
+            + ext.getDescriptor().getPluginId()
+            + " instance in getURLNormalizers "
+            + "function: attempting to continue instantiating plugins");
       }
     }
-    return normalizers.toArray(new URLNormalizer[normalizers
-            .size()]);
+    return normalizers.toArray(new URLNormalizer[normalizers.size()]);
   }
 
   /**
@@ -196,9 +217,8 @@
   @SuppressWarnings("unchecked")
   private List<Extension> getExtensions(String scope) {
     ObjectCache objectCache = ObjectCache.get(conf);
-    List<Extension> extensions = 
-      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
-                                                + scope);
+    List<Extension> extensions = (List<Extension>) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope);
 
     // Just compare the reference:
     // if this is the empty list, we know we will find no extension.
@@ -209,11 +229,13 @@
     if (extensions == null) {
       extensions = findExtensions(scope);
       if (extensions != null) {
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            extensions);
       } else {
         // Put the empty extension list into cache
         // to remember we don't know any related extension.
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            EMPTY_EXTENSION_LIST);
         extensions = EMPTY_EXTENSION_LIST;
       }
     }
@@ -233,7 +255,8 @@
 
     String[] orders = null;
     String orderlist = conf.get("urlnormalizer.order." + scope);
-    if (orderlist == null) orderlist = conf.get("urlnormalizer.order");
+    if (orderlist == null)
+      orderlist = conf.get("urlnormalizer.order");
     if (orderlist != null && !orderlist.trim().equals("")) {
       orders = orderlist.split("\\s+");
     }
@@ -271,13 +294,17 @@
 
   /**
    * Normalize
-   * @param urlString The URL string to normalize.
-   * @param scope The given scope.
+   * 
+   * @param urlString
+   *          The URL string to normalize.
+   * @param scope
+   *          The given scope.
    * @return A normalized String, using the given <code>scope</code>
-   * @throws MalformedURLException If the given URL string is malformed.
+   * @throws MalformedURLException
+   *           If the given URL string is malformed.
    */
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     // optionally loop several times, and break if no further changes
     String initialString = urlString;
     for (int k = 0; k < loopCount; k++) {
@@ -286,7 +313,8 @@
           return null;
         urlString = this.normalizers[i].normalize(urlString, scope);
       }
-      if (initialString.equals(urlString)) break;
+      if (initialString.equals(urlString))
+        break;
       initialString = urlString;
     }
     return urlString;
Index: src/java/org/apache/nutch/net/package-info.java
===================================================================
--- src/java/org/apache/nutch/net/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * and {@link org.apache.nutch.net.URLNormalizer normalizers}.
  */
 package org.apache.nutch.net;
+
Index: src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(working copy)
@@ -26,15 +26,15 @@
 
 /**
  * class to handle HTTP dates.
- *
+ * 
  * Modified from FastHttpDateFormat.java in jakarta-tomcat.
- *
+ * 
  * @author John Xing
  */
 public class HttpDateFormat {
 
-  protected static SimpleDateFormat format = 
-    new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+  protected static SimpleDateFormat format = new SimpleDateFormat(
+      "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
   /**
    * HTTP date uses TimeZone GMT
@@ -43,29 +43,29 @@
     format.setTimeZone(TimeZone.getTimeZone("GMT"));
   }
 
-  //HttpDate (long t) {
-  //}
+  // HttpDate (long t) {
+  // }
 
-  //HttpDate (String s) {
-  //}
+  // HttpDate (String s) {
+  // }
 
-//  /**
-//   * Get the current date in HTTP format.
-//   */
-//  public static String getCurrentDate() {
-//
-//    long now = System.currentTimeMillis();
-//    if ((now - currentDateGenerated) > 1000) {
-//        synchronized (format) {
-//            if ((now - currentDateGenerated) > 1000) {
-//                currentDateGenerated = now;
-//                currentDate = format.format(new Date(now));
-//            }
-//        }
-//    }
-//    return currentDate;
-//
-//  }
+  // /**
+  // * Get the current date in HTTP format.
+  // */
+  // public static String getCurrentDate() {
+  //
+  // long now = System.currentTimeMillis();
+  // if ((now - currentDateGenerated) > 1000) {
+  // synchronized (format) {
+  // if ((now - currentDateGenerated) > 1000) {
+  // currentDateGenerated = now;
+  // currentDate = format.format(new Date(now));
+  // }
+  // }
+  // }
+  // return currentDate;
+  //
+  // }
 
   /**
    * Get the HTTP format of the specified date.
Index: src/java/org/apache/nutch/net/protocols/ProtocolException.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/ProtocolException.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/ProtocolException.java	(working copy)
@@ -21,6 +21,7 @@
 
 /**
  * Base exception for all protocol handlers
+ * 
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
 @Deprecated
@@ -27,7 +28,6 @@
 @SuppressWarnings("serial")
 public class ProtocolException extends Exception implements Serializable {
 
-
   public ProtocolException() {
     super();
   }
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -23,12 +23,11 @@
 import org.apache.nutch.metadata.HttpHeaders;
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * A response interface.  Makes all protocols model HTTP.
+ * A response interface. Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
-  
+
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -40,7 +39,7 @@
 
   /** Returns all the headers. */
   public Metadata getHeaders();
-  
+
   /** Returns the full content of the response. */
   public byte[] getContent();
 
Index: src/java/org/apache/nutch/net/protocols/package-info.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * interface, sea also {@link org.apache.nutch.protocol}.
  */
 package org.apache.nutch.net.protocols;
+
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(working copy)
@@ -24,8 +24,8 @@
 import org.apache.nutch.metadata.Metadata;
 
 /**
- * This class holds the information about HTML "meta" tags extracted from 
- * a page. Some special tags have convenience methods for easy checking.
+ * This class holds the information about HTML "meta" tags extracted from a
+ * page. Some special tags have convenience methods for easy checking.
  */
 public class HTMLMetaTags {
   private boolean noIndex = false;
@@ -156,8 +156,8 @@
   }
 
   /**
-   * A convenience method. Returns the current value of <code>refreshTime</code>.
-   * The value may be invalid if {@link #getRefresh()}returns
+   * A convenience method. Returns the current value of <code>refreshTime</code>
+   * . The value may be invalid if {@link #getRefresh()}returns
    * <code>false</code>.
    */
   public int getRefreshTime() {
@@ -179,16 +179,12 @@
   public Properties getHttpEquivTags() {
     return httpEquivTags;
   }
-  
+
   public String toString() {
     StringBuffer sb = new StringBuffer();
-    sb.append("base=" + baseHref
-            + ", noCache=" + noCache
-            + ", noFollow=" + noFollow
-            + ", noIndex=" + noIndex
-            + ", refresh=" + refresh
-            + ", refreshHref=" + refreshHref + "\n"
-            );
+    sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow="
+        + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh
+        + ", refreshHref=" + refreshHref + "\n");
     sb.append(" * general tags:\n");
     String[] names = generalTags.names();
     for (String name : names) {
@@ -198,7 +194,7 @@
     sb.append(" * http-equiv tags:\n");
     Iterator<Object> it = httpEquivTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + httpEquivTags.get(key) + "\n");
     }
     return sb.toString();
Index: src/java/org/apache/nutch/parse/Outlink.java
===================================================================
--- src/java/org/apache/nutch/parse/Outlink.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Outlink.java	(working copy)
@@ -28,11 +28,13 @@
   private String toUrl;
   private String anchor;
 
-  public Outlink() {}
+  public Outlink() {
+  }
 
   public Outlink(String toUrl, String anchor) throws MalformedURLException {
     this.toUrl = toUrl;
-    if (anchor == null) anchor = "";
+    if (anchor == null)
+      anchor = "";
     this.anchor = anchor;
   }
 
@@ -43,8 +45,8 @@
 
   /** Skips over one Outlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip toUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip toUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -58,21 +60,24 @@
     return outlink;
   }
 
-  public String getToUrl() { return toUrl; }
-  public String getAnchor() { return anchor; }
+  public String getToUrl() {
+    return toUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof Outlink))
       return false;
-    Outlink other = (Outlink)o;
-    return
-      this.toUrl.equals(other.toUrl) &&
-      this.anchor.equals(other.anchor);
+    Outlink other = (Outlink) o;
+    return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor);
   }
 
   public String toString() {
-    return "toUrl: " + toUrl + " anchor: " + anchor;  // removed "\n". toString, not printLine... WD.
+    return "toUrl: " + toUrl + " anchor: " + anchor; // removed "\n". toString,
+                                                     // not printLine... WD.
   }
 
 }
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java	(working copy)
@@ -34,8 +34,8 @@
 import org.apache.oro.text.regex.Perl5Matcher;
 
 /**
- * Extractor to extract {@link org.apache.nutch.parse.Outlink}s 
- * / URLs from plain text using Regular Expressions.
+ * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from
+ * plain text using Regular Expressions.
  * 
  * @see <a
  *      href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison
@@ -48,7 +48,8 @@
  * @since 0.7
  */
 public class OutlinkExtractor {
-  private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(OutlinkExtractor.class);
 
   /**
    * Regex pattern to get URLs within a plain text.
@@ -55,17 +56,19 @@
    * 
    * @see <a
    *      href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html
+
    *      </a>
    */
-  private static final String URL_PATTERN = 
-    "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
+  private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
 
   /**
-   * Extracts <code>Outlink</code> from given plain text.
-   * Applying this method to non-plain-text can result in extremely lengthy
-   * runtimes for parasitic cases (postscript is a known example).
-   * @param plainText  the plain text from wich URLs should be extracted.
+   * Extracts <code>Outlink</code> from given plain text. Applying this method
+   * to non-plain-text can result in extremely lengthy runtimes for parasitic
+   * cases (postscript is a known example).
    * 
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
   public static Outlink[] getOutlinks(final String plainText, Configuration conf) {
@@ -73,15 +76,18 @@
   }
 
   /**
-   * Extracts <code>Outlink</code> from given plain text and adds anchor
-   * to the extracted <code>Outlink</code>s
+   * Extracts <code>Outlink</code> from given plain text and adds anchor to the
+   * extracted <code>Outlink</code>s
    * 
-   * @param plainText the plain text from wich URLs should be extracted.
-   * @param anchor    the anchor of the url
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * @param anchor
+   *          the anchor of the url
    * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
-  public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
+  public static Outlink[] getOutlinks(final String plainText, String anchor,
+      Configuration conf) {
     long start = System.currentTimeMillis();
     final List<Outlink> outlinks = new ArrayList<Outlink>();
 
@@ -97,11 +103,11 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         // if this is taking too long, stop matching
-        //   (SHOULD really check cpu time used so that heavily loaded systems
-        //   do not unnecessarily hit this limit.)
+        // (SHOULD really check cpu time used so that heavily loaded systems
+        // do not unnecessarily hit this limit.)
         if (System.currentTimeMillis() - start >= 60000L) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("Time limit exceeded for getOutLinks");
@@ -117,13 +123,16 @@
         }
       }
     } catch (Exception ex) {
-      // if the matcher fails (perhaps a malformed URL) we just log it and move on
-      if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
+      // if the matcher fails (perhaps a malformed URL) we just log it and move
+      // on
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getOutlinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -132,7 +141,6 @@
 
     return retval;
   }
-  
 
   /**
    * Extracts outlinks from a plain text. <br />
@@ -162,7 +170,7 @@
     // url = re.getParen(0);
     //
     // if (LOG.isTraceEnabled()) {
-    //   LOG.trace("Extracted url: " + url);
+    // LOG.trace("Extracted url: " + url);
     // }
     //
     // try {
@@ -192,9 +200,8 @@
   }
 
   /**
-   * Extracts outlinks from a plain text.
-   * </p>
-   * This Method takes the JDK5 Regexp API.
+   * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp
+   * API.
    * 
    * @param plainText
    * 
@@ -243,5 +250,5 @@
     //
     // return retval;
   }
- 
+
 }
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -16,7 +16,6 @@
  ******************************************************************************/
 package org.apache.nutch.parse;
 
-
 public class Parse {
 
   private String text;
Index: src/java/org/apache/nutch/parse/ParseCallable.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseCallable.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseCallable.java	(working copy)
@@ -24,7 +24,7 @@
   private Parser p;
   private WebPage content;
   private String url;
-  
+
   public ParseCallable(Parser p, WebPage content, String url) {
     this.p = p;
     this.content = content;
@@ -34,5 +34,5 @@
   @Override
   public Parse call() throws Exception {
     return p.getParse(url, content);
-  }    
+  }
 }
Index: src/java/org/apache/nutch/parse/ParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseFilter.java	(working copy)
@@ -22,18 +22,19 @@
 import org.apache.nutch.storage.WebPage;
 import org.w3c.dom.DocumentFragment;
 
-
-/** Extension point for DOM-based parsers.  Permits one to add additional
- * metadata to parses provided by the html or tika plugins.  All plugins found which implement this extension
- * point are run sequentially on the parse.
+/**
+ * Extension point for DOM-based parsers. Permits one to add additional metadata
+ * to parses provided by the html or tika plugins. All plugins found which
+ * implement this extension point are run sequentially on the parse.
  */
 public interface ParseFilter extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = ParseFilter.class.getName();
 
-  /** Adds metadata or otherwise modifies a parse, given
-   * the DOM tree of a page. */
-  Parse filter(String url, WebPage page, Parse parse,
-                    HTMLMetaTags metaTags, DocumentFragment doc);
+  /**
+   * Adds metadata or otherwise modifies a parse, given the DOM tree of a page.
+   */
+  Parse filter(String url, WebPage page, Parse parse, HTMLMetaTags metaTags,
+      DocumentFragment doc);
 
 }
Index: src/java/org/apache/nutch/parse/ParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseFilters.java	(working copy)
@@ -31,7 +31,7 @@
 import org.apache.nutch.util.ObjectCache;
 import org.w3c.dom.DocumentFragment;
 
-/** Creates and caches {@link ParseFilter} implementing plugins.*/
+/** Creates and caches {@link ParseFilter} implementing plugins. */
 public class ParseFilters {
 
   private ParseFilter[] parseFilters;
@@ -41,7 +41,8 @@
   public ParseFilters(Configuration conf) {
     String order = conf.get(HTMLPARSEFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class
+        .getName());
     if (parseFilters == null) {
       /*
        * If ordered filters are required, prepare array of filters based on
@@ -51,21 +52,23 @@
       if (order != null && !order.trim().equals("")) {
         orderedFilters = order.split("\\s+");
       }
-      HashMap<String, ParseFilter> filterMap =
-        new HashMap<String, ParseFilter>();
+      HashMap<String, ParseFilter> filterMap = new HashMap<String, ParseFilter>();
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ParseFilter.X_POINT_ID);
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ParseFilter.X_POINT_ID);
         if (point == null)
           throw new RuntimeException(ParseFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ParseFilter parseFilter = (ParseFilter) extension.getExtensionInstance();
+          ParseFilter parseFilter = (ParseFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(parseFilter.getClass().getName())) {
             filterMap.put(parseFilter.getClass().getName(), parseFilter);
           }
         }
-        ParseFilter[] htmlParseFilters = filterMap.values().toArray(new ParseFilter[filterMap.size()]);
+        ParseFilter[] htmlParseFilters = filterMap.values().toArray(
+            new ParseFilter[filterMap.size()]);
         /*
          * If no ordered filters required, just get the filters in an
          * indeterminate order
@@ -77,19 +80,19 @@
         else {
           ArrayList<ParseFilter> filters = new ArrayList<ParseFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-            ParseFilter filter = filterMap
-            .get(orderedFilters[i]);
+            ParseFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(ParseFilter.class.getName(), filters
-              .toArray(new ParseFilter[filters.size()]));
+          objectCache.setObject(ParseFilter.class.getName(),
+              filters.toArray(new ParseFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+      this.parseFilters = (ParseFilter[]) objectCache
+          .getObject(ParseFilter.class.getName());
     }
   }
 
Index: src/java/org/apache/nutch/parse/ParsePluginList.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginList.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParsePluginList.java	(working copy)
@@ -22,25 +22,23 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
  * This class represents a natural ordering for which parsing plugin should get
  * called for a particular mimeType. It provides methods to store the
  * parse-plugins.xml data, and methods to retreive the name of the appropriate
  * parsing plugin for a contentType.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginList {
-  
+
   /* a map to link mimeType to an ordered list of parsing plugins */
   private Map<String, List<String>> fMimeTypeToPluginMap = null;
-  
+
   /* A list of aliases */
   private Map<String, String> aliases = null;
-  
-  
+
   /**
    * Constructs a new ParsePluginList
    */
@@ -48,7 +46,7 @@
     fMimeTypeToPluginMap = new HashMap<String, List<String>>();
     aliases = new HashMap<String, String>();
   }
-  
+
   public List<String> getPluginList(String mimeType) {
     return fMimeTypeToPluginMap.get(mimeType);
   }
@@ -56,18 +54,18 @@
   void setAliases(Map<String, String> aliases) {
     this.aliases = aliases;
   }
-  
+
   public Map<String, String> getAliases() {
     return aliases;
   }
-  
+
   void setPluginList(String mimeType, List<String> l) {
     fMimeTypeToPluginMap.put(mimeType, l);
   }
-  
+
   List<String> getSupportedMimeTypes() {
-    return Arrays.asList(fMimeTypeToPluginMap.keySet().toArray(
-            new String[] {}));
+    return Arrays
+        .asList(fMimeTypeToPluginMap.keySet().toArray(new String[] {}));
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParsePluginsReader.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginsReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParsePluginsReader.java	(working copy)
@@ -42,19 +42,19 @@
 // Nutch imports
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * A reader to load the information stored in the
  * <code>$NUTCH_HOME/conf/parse-plugins.xml</code> file.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginsReader {
-  
+
   /* our log stream */
-  public static final Logger LOG = LoggerFactory.getLogger(ParsePluginsReader.class);
-  
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ParsePluginsReader.class);
+
   /** The property name of the parse-plugins location */
   private static final String PP_FILE_PROP = "parse.plugin.file";
 
@@ -61,31 +61,31 @@
   /** the parse-plugins file */
   private String fParsePluginsFile = null;
 
-  
   /**
    * Constructs a new ParsePluginsReader
    */
-  public ParsePluginsReader() { }
-  
+  public ParsePluginsReader() {
+  }
+
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
    * {@link #ParsePluginList} defined by it.
-   *
+   * 
    * @return A {@link #ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
    * @throws Exception
-   *             If any parsing error occurs.
+   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
-    
+
     ParsePluginList pList = new ParsePluginList();
-    
+
     // open up the XML file
     DocumentBuilderFactory factory = null;
     DocumentBuilder parser = null;
     Document document = null;
     InputSource inputSource = null;
-    
+
     InputStream ppInputStream = null;
     if (fParsePluginsFile != null) {
       URL parsePluginUrl = null;
@@ -94,18 +94,17 @@
         ppInputStream = parsePluginUrl.openStream();
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Unable to load parse plugins file from URL " +
-                   "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+          LOG.warn("Unable to load parse plugins file from URL " + "["
+              + fParsePluginsFile + "]. Reason is [" + e + "]");
         }
         return pList;
       }
     } else {
-      ppInputStream = conf.getConfResourceAsInputStream(
-                          conf.get(PP_FILE_PROP));
+      ppInputStream = conf.getConfResourceAsInputStream(conf.get(PP_FILE_PROP));
     }
-    
+
     inputSource = new InputSource(ppInputStream);
-    
+
     try {
       factory = DocumentBuilderFactory.newInstance();
       parser = factory.newDocumentBuilder();
@@ -112,30 +111,30 @@
       document = parser.parse(inputSource);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
-                 "Reason is [" + e + "]");
+        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." + "Reason is ["
+            + e + "]");
       }
       return null;
     }
-    
+
     Element parsePlugins = document.getDocumentElement();
-    
+
     // build up the alias hash map
     Map<String, String> aliases = getAliases(parsePlugins);
     // And store it on the parse plugin list
     pList.setAliases(aliases);
-     
+
     // get all the mime type nodes
     NodeList mimeTypes = parsePlugins.getElementsByTagName("mimeType");
-    
+
     // iterate through the mime types
     for (int i = 0; i < mimeTypes.getLength(); i++) {
       Element mimeType = (Element) mimeTypes.item(i);
       String mimeTypeStr = mimeType.getAttribute("name");
-      
+
       // for each mimeType, get the plugin list
       NodeList pluginList = mimeType.getElementsByTagName("plugin");
-      
+
       // iterate through the plugins, add them in order read
       // OR if they have a special order="" attribute, then hold those in
       // a separate list, and then insert them into the final list at the
@@ -142,8 +141,8 @@
       // order specified
       if (pluginList != null && pluginList.getLength() > 0) {
         List<String> plugList = new ArrayList<String>(pluginList.getLength());
-        
-        for (int j = 0; j<pluginList.getLength(); j++) {
+
+        for (int j = 0; j < pluginList.getLength(); j++) {
           Element plugin = (Element) pluginList.item(j);
           String pluginId = plugin.getAttribute("id");
           String extId = aliases.get(pluginId);
@@ -163,65 +162,65 @@
             plugList.add(extId);
           }
         }
-        
+
         // now add the plugin list and map it to this mimeType
         pList.setPluginList(mimeTypeStr, plugList);
-        
+
       } else if (LOG.isWarnEnabled()) {
         LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
-                 + mimeTypeStr + ", continuing parse");
+            + mimeTypeStr + ", continuing parse");
       }
     }
     return pList;
   }
-  
+
   /**
    * Tests parsing of the parse-plugins.xml file. An alternative name for the
-   * file can be specified via the <code>--file</code> option, although the
-   * file must be located in the <code>$NUTCH_HOME/conf</code> directory.
-   *
+   * file can be specified via the <code>--file</code> option, although the file
+   * must be located in the <code>$NUTCH_HOME/conf</code> directory.
+   * 
    * @param args
-   *            Currently only the --file argument to specify an alternative
-   *            name for the parse-plugins.xml file is supported.
+   *          Currently only the --file argument to specify an alternative name
+   *          for the parse-plugins.xml file is supported.
    */
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
-    
-    if (( args.length != 0 && args.length != 2 )
+
+    if ((args.length != 0 && args.length != 2)
         || (args.length == 2 && !"--file".equals(args[0]))) {
       System.err.println(usage);
       System.exit(1);
     }
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {
         parsePluginFile = args[++i];
       }
     }
-    
+
     ParsePluginsReader reader = new ParsePluginsReader();
-    
+
     if (parsePluginFile != null) {
       reader.setFParsePluginsFile(parsePluginFile);
     }
-    
+
     ParsePluginList prefs = reader.parse(NutchConfiguration.create());
-    
+
     for (String mimeType : prefs.getSupportedMimeTypes()) {
-      
+
       System.out.println("MIMETYPE: " + mimeType);
       List<String> plugList = prefs.getPluginList(mimeType);
-      
+
       System.out.println("EXTENSION IDs:");
-      
+
       for (String j : plugList) {
         System.out.println(j);
       }
     }
-    
+
   }
-  
+
   /**
    * @return Returns the fParsePluginsFile.
    */
@@ -228,20 +227,20 @@
   public String getFParsePluginsFile() {
     return fParsePluginsFile;
   }
-  
+
   /**
    * @param parsePluginsFile
-   *            The fParsePluginsFile to set.
+   *          The fParsePluginsFile to set.
    */
   public void setFParsePluginsFile(String parsePluginsFile) {
     fParsePluginsFile = parsePluginsFile;
   }
-  
+
   private Map<String, String> getAliases(Element parsePluginsRoot) {
 
     Map<String, String> aliases = new HashMap<String, String>();
     NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
-	  
+
     if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("No aliases defined in parse-plugins.xml!");
@@ -248,7 +247,7 @@
       }
       return aliases;
     }
-	  
+
     if (aliasRoot.getLength() > 1) {
       // log a warning, but try and continue processing
       if (LOG.isWarnEnabled()) {
@@ -255,18 +254,18 @@
         LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
       }
     }
-	  
-    Element aliasRootElem = (Element)aliasRoot.item(0);
+
+    Element aliasRootElem = (Element) aliasRoot.item(0);
     NodeList aliasElements = aliasRootElem.getElementsByTagName("alias");
-	  
+
     if (aliasElements != null && aliasElements.getLength() > 0) {
-      for (int i=0; i<aliasElements.getLength(); i++) {
-        Element aliasElem = (Element)aliasElements.item(i);
-	String parsePluginId = aliasElem.getAttribute("name");
-	String extensionId = aliasElem.getAttribute("extension-id");
+      for (int i = 0; i < aliasElements.getLength(); i++) {
+        Element aliasElem = (Element) aliasElements.item(i);
+        String parsePluginId = aliasElem.getAttribute("name");
+        String extensionId = aliasElem.getAttribute("extension-id");
         if (LOG.isTraceEnabled()) {
-          LOG.trace("Found alias: plugin-id: " + parsePluginId +
-                    ", extension-id: " + extensionId);
+          LOG.trace("Found alias: plugin-id: " + parsePluginId
+              + ", extension-id: " + extensionId);
         }
         if (parsePluginId != null && extensionId != null) {
           aliases.put(parsePluginId, extensionId);
@@ -275,5 +274,5 @@
     }
     return aliases;
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParseStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusCodes.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseStatusCodes.java	(working copy)
@@ -22,44 +22,52 @@
   // Primary status codes:
 
   /** Parsing was not performed. */
-  public static final byte NOTPARSED       = 0;
+  public static final byte NOTPARSED = 0;
   /** Parsing succeeded. */
-  public static final byte SUCCESS         = 1;
+  public static final byte SUCCESS = 1;
   /** General failure. There may be a more specific error message in arguments. */
-  public static final byte FAILED          = 2;
+  public static final byte FAILED = 2;
 
-  public static final String[] majorCodes = {
-    "notparsed",
-    "success",
-    "failed"
-  };
+  public static final String[] majorCodes = { "notparsed", "success", "failed" };
 
   // Secondary success codes go here:
 
-  public static final short SUCCESS_OK                = 0;
+  public static final short SUCCESS_OK = 0;
 
-  /** Parsed content contains a directive to redirect to another URL.
-   * The target URL can be retrieved from the arguments.
+  /**
+   * Parsed content contains a directive to redirect to another URL. The target
+   * URL can be retrieved from the arguments.
    */
-  public static final short SUCCESS_REDIRECT          = 100;
+  public static final short SUCCESS_REDIRECT = 100;
 
   // Secondary failure codes go here:
 
-  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
-  public static final short FAILED_EXCEPTION          = 200;
-  /** Parsing failed. Content was truncated, but the parser cannot handle incomplete content. */
-  public static final short FAILED_TRUNCATED          = 202;
-  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
-  public static final short FAILED_INVALID_FORMAT     = 203;
-  /** Parsing failed. Other related parts of the content are needed to complete
+  /**
+   * Parsing failed. An Exception occured (which may be retrieved from the
+   * arguments).
+   */
+  public static final short FAILED_EXCEPTION = 200;
+  /**
+   * Parsing failed. Content was truncated, but the parser cannot handle
+   * incomplete content.
+   */
+  public static final short FAILED_TRUNCATED = 202;
+  /**
+   * Parsing failed. Invalid format - the content may be corrupted or of wrong
+   * type.
+   */
+  public static final short FAILED_INVALID_FORMAT = 203;
+  /**
+   * Parsing failed. Other related parts of the content are needed to complete
    * parsing. The list of URLs to missing parts may be provided in arguments.
    * The Fetcher may decide to fetch these parts at once, then put them into
    * Content.metadata, and supply them for re-parsing.
    */
-  public static final short FAILED_MISSING_PARTS      = 204;
-  /** Parsing failed. There was no content to be parsed - probably caused
-   * by errors at protocol stage.
+  public static final short FAILED_MISSING_PARTS = 204;
+  /**
+   * Parsing failed. There was no content to be parsed - probably caused by
+   * errors at protocol stage.
    */
-  public static final short FAILED_MISSING_CONTENT    = 205;
-  
+  public static final short FAILED_MISSING_CONTENT = 205;
+
 }
Index: src/java/org/apache/nutch/parse/ParseStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseStatusUtils.java	(working copy)
@@ -29,10 +29,10 @@
 public class ParseStatusUtils {
 
   public static ParseStatus STATUS_SUCCESS = ParseStatus.newBuilder().build();
-  public static final HashMap<Short,String> minorCodes = new HashMap<Short,String>();
+  public static final HashMap<Short, String> minorCodes = new HashMap<Short, String>();
 
   static {
-    STATUS_SUCCESS.setMajorCode((int)ParseStatusCodes.SUCCESS);
+    STATUS_SUCCESS.setMajorCode((int) ParseStatusCodes.SUCCESS);
     minorCodes.put(ParseStatusCodes.SUCCESS_OK, "ok");
     minorCodes.put(ParseStatusCodes.SUCCESS_REDIRECT, "redirect");
     minorCodes.put(ParseStatusCodes.FAILED_EXCEPTION, "exception");
@@ -49,8 +49,9 @@
     return status.getMajorCode() == ParseStatusCodes.SUCCESS;
   }
 
-  /** A convenience method. Return a String representation of the first
-   * argument, or null.
+  /**
+   * A convenience method. Return a String representation of the first argument,
+   * or null.
    */
   public static String getMessage(ParseStatus status) {
     List<CharSequence> args = status.getArgs();
@@ -77,29 +78,30 @@
 
   public static Parse getEmptyParse(Exception e, Configuration conf) {
     ParseStatus status = ParseStatus.newBuilder().build();
-    status.setMajorCode((int)ParseStatusCodes.FAILED);
-    status.setMinorCode((int)ParseStatusCodes.FAILED_EXCEPTION);
+    status.setMajorCode((int) ParseStatusCodes.FAILED);
+    status.setMinorCode((int) ParseStatusCodes.FAILED_EXCEPTION);
     status.getArgs().add(new Utf8(e.toString()));
 
     return new Parse("", "", new Outlink[0], status);
   }
 
-  public static Parse getEmptyParse(int minorCode, String message, Configuration conf) {
+  public static Parse getEmptyParse(int minorCode, String message,
+      Configuration conf) {
     ParseStatus status = ParseStatus.newBuilder().build();
-    status.setMajorCode((int)ParseStatusCodes.FAILED);
+    status.setMajorCode((int) ParseStatusCodes.FAILED);
     status.setMinorCode(minorCode);
     status.getArgs().add(new Utf8(message));
 
     return new Parse("", "", new Outlink[0], status);
   }
-  
+
   public static String toString(ParseStatus status) {
     if (status == null) {
       return "(null)";
     }
     StringBuilder sb = new StringBuilder();
-    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] +
-        "/" + minorCodes.get(status.getMinorCode().shortValue()));
+    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] + "/"
+        + minorCodes.get(status.getMinorCode().shortValue()));
     sb.append(" (" + status.getMajorCode() + "/" + status.getMinorCode() + ")");
     sb.append(", args=[");
     List<CharSequence> args = status.getArgs();
@@ -107,7 +109,8 @@
       int i = 0;
       Iterator<CharSequence> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(working copy)
@@ -49,7 +49,7 @@
  * A Utility class containing methods to simply perform parsing utilities such
  * as iterating through a preferred list of {@link Parser}s to obtain
  * {@link Parse} objects.
- *
+ * 
  * @author mattmann
  * @author J&eacute;r&ocirc;me Charron
  * @author S&eacute;bastien Le Callonnec
@@ -60,7 +60,7 @@
   public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
 
   private static final int DEFAULT_MAX_PARSE_TIME = 30;
-  
+
   private Configuration conf;
   private Signature sig;
   private URLFilters filters;
@@ -71,9 +71,9 @@
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int maxParseTime;
   private ExecutorService executorService;
-  
+
   /**
-   *
+   * 
    * @param conf
    */
   public ParseUtil(Configuration conf) {
@@ -90,15 +90,16 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     parserFactory = new ParserFactory(conf);
-    maxParseTime=conf.getInt("parser.timeout", DEFAULT_MAX_PARSE_TIME);
+    maxParseTime = conf.getInt("parser.timeout", DEFAULT_MAX_PARSE_TIME);
     sig = SignatureFactory.getSignature(conf);
     filters = new URLFilters(conf);
     normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
     int maxOutlinksPerPage = conf.getInt("db.max.outlinks.per.page", 100);
-    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE : maxOutlinksPerPage;
+    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
+        : maxOutlinksPerPage;
     ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
     executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
-      .setNameFormat("parse-%d").setDaemon(true).build());
+        .setNameFormat("parse-%d").setDaemon(true).build());
   }
 
   /**
@@ -106,11 +107,13 @@
    * until a successful parse is performed and a {@link Parse} object is
    * returned. If the parse is unsuccessful, a message is logged to the
    * <code>WARNING</code> level, and an empty parse is returned.
-   *
-   * @throws ParserNotFound If there is no suitable parser found. 
-   * @throws ParseException If there is an error parsing.
+   * 
+   * @throws ParserNotFound
+   *           If there is no suitable parser found.
+   * @throws ParseException
+   *           If there is an error parsing.
    */
-  public Parse parse(String url, WebPage page) throws ParserNotFound, 
+  public Parse parse(String url, WebPage page) throws ParserNotFound,
       ParseException {
     Parser[] parsers = null;
 
@@ -118,28 +121,29 @@
 
     parsers = this.parserFactory.getParsers(contentType, url);
 
-    for (int i=0; i<parsers.length; i++) {
+    for (int i = 0; i < parsers.length; i++) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Parsing [" + url + "] with [" + parsers[i] + "]");
       }
       Parse parse = null;
-      
-      if (maxParseTime!=-1)
-    	  parse = runParser(parsers[i], url, page);
-      else 
-    	  parse = parsers[i].getParse(url, page);
-      
-      if (parse!=null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
+
+      if (maxParseTime != -1)
+        parse = runParser(parsers[i], url, page);
+      else
+        parse = parsers[i].getParse(url, page);
+
+      if (parse != null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
         return parse;
       }
     }
 
-    LOG.warn("Unable to successfully parse content " + url +
-        " of type " + contentType);
-    return ParseStatusUtils.getEmptyParse(new ParseException("Unable to successfully parse content"), null);
+    LOG.warn("Unable to successfully parse content " + url + " of type "
+        + contentType);
+    return ParseStatusUtils.getEmptyParse(new ParseException(
+        "Unable to successfully parse content"), null);
   }
-  
-  private Parse runParser(Parser p, String url, WebPage page) {    
+
+  private Parse runParser(Parser p, String url, WebPage page) {
     ParseCallable pc = new ParseCallable(p, page, url);
     Future<Parse> task = executorService.submit(pc);
     Parse res = null;
@@ -155,8 +159,9 @@
   }
 
   /**
-   * Parses given web page and stores parsed content within page. Puts
-   * a meta-redirect to outlinks.
+   * Parses given web page and stores parsed content within page. Puts a
+   * meta-redirect to outlinks.
+   * 
    * @param key
    * @param page
    */
@@ -165,7 +170,8 @@
     byte status = page.getStatus().byteValue();
     if (status != CrawlStatus.STATUS_FETCHED) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Skipping " + url + " as status is: " + CrawlStatus.getName(status));
+        LOG.debug("Skipping " + url + " as status is: "
+            + CrawlStatus.getName(status));
       }
       return;
     }
@@ -213,7 +219,8 @@
           return;
         }
         page.getOutlinks().put(new Utf8(newUrl), new Utf8());
-        page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
+        page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED,
+            TableUtil.YES_VAL);
         if (newUrl == null || newUrl.equals(url)) {
           String reprUrl = URLUtil.chooseRepr(url, newUrl,
               refreshTime < FetcherJob.PERM_REFRESH_TIME);
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -22,9 +22,10 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-/** A parser for content generated by a {@link org.apache.nutch.protocol.Protocol}
- * implementation.  This interface is implemented by extensions.  Nutch's core
- * contains no page parsing code.
+/**
+ * A parser for content generated by a
+ * {@link org.apache.nutch.protocol.Protocol} implementation. This interface is
+ * implemented by extensions. Nutch's core contains no page parsing code.
  */
 public interface Parser extends FieldPluggable, Configurable {
   /** The name of the extension point. */
@@ -34,8 +35,9 @@
    * <p>
    * This method parses content in WebPage instance
    * </p>
-   *
-   * @param url Page's URL
+   * 
+   * @param url
+   *          Page's URL
    * @param page
    */
   Parse getParse(String url, WebPage page);
Index: src/java/org/apache/nutch/parse/ParserChecker.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserChecker.java	(working copy)
@@ -37,16 +37,16 @@
 import java.util.Map.Entry;
 
 /**
- * Parser checker, useful for testing parser.
- * It also accurately reports possible fetching and 
- * parsing failures and presents protocol status signals to aid 
- * debugging. The tool enables us to retrieve the following data from 
- * any url:
+ * Parser checker, useful for testing parser. It also accurately reports
+ * possible fetching and parsing failures and presents protocol status signals
+ * to aid debugging. The tool enables us to retrieve the following data from any
+ * url:
  * <ol>
- * <li><tt>contentType</tt>: The URL {@link org.apache.nutch.protocol.Content} type.</li>
- * <li><tt>signature</tt>: Digest is used to identify pages (like unique ID) and is used to remove
- * duplicates during the dedup procedure. 
- * It is calculated using {@link org.apache.nutch.crawl.MD5Signature} or
+ * <li><tt>contentType</tt>: The URL {@link org.apache.nutch.protocol.Content}
+ * type.</li>
+ * <li><tt>signature</tt>: Digest is used to identify pages (like unique ID) and
+ * is used to remove duplicates during the dedup procedure. It is calculated
+ * using {@link org.apache.nutch.crawl.MD5Signature} or
  * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
  * <li><tt>Version</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
  * <li><tt>Status</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
@@ -53,12 +53,14 @@
  * <li><tt>Title</tt>: of the URL</li>
  * <li><tt>Outlinks</tt>: associated with the URL</li>
  * <li><tt>Content Metadata</tt>: such as <i>X-AspNet-Version</i>, <i>Date</i>,
- * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>, <i>Cache-Control</>, etc.</li>
+ * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>,
+ * <i>Cache-Control</>, etc.</li>
  * <li><tt>Parse Metadata</tt>: such as <i>CharEncodingForConversion</i>,
  * <i>OriginalCharEncoding</i>, <i>language</i>, etc.</li>
- * <li><tt>ParseText</tt>: The page parse text which varies in length depdnecing on 
- * <code>content.length</code> configuration.</li>
+ * <li><tt>ParseText</tt>: The page parse text which varies in length depdnecing
+ * on <code>content.length</code> configuration.</li>
  * </ol>
+ * 
  * @author John Xing
  */
 
@@ -107,7 +109,7 @@
 
     ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
 
-    if(!protocolOutput.getStatus().isSuccess()) {
+    if (!protocolOutput.getStatus().isSuccess()) {
       LOG.error("Fetch failed with protocol status: "
           + ProtocolStatusUtils.getName(protocolOutput.getStatus().getCode())
           + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
@@ -155,7 +157,6 @@
       LOG.info("signature: " + StringUtil.toHexString(signature));
     }
 
-
     LOG.info("---------\nUrl\n---------------\n");
     System.out.print(url + "\n");
     LOG.info("---------\nMetadata\n---------\n");
@@ -167,7 +168,7 @@
       while (iterator.hasNext()) {
         Entry<CharSequence, ByteBuffer> entry = iterator.next();
         sb.append(entry.getKey().toString()).append(" : \t")
-        .append(Bytes.toString(entry.getValue())).append("\n");
+            .append(Bytes.toString(entry.getValue())).append("\n");
       }
       System.out.print(sb.toString());
     }
@@ -182,12 +183,12 @@
       Map<CharSequence, CharSequence> headers = page.getHeaders();
       StringBuffer headersb = new StringBuffer();
       if (metadata != null) {
-        Iterator<Entry<CharSequence, CharSequence>> iterator = headers.entrySet()
-            .iterator();
+        Iterator<Entry<CharSequence, CharSequence>> iterator = headers
+            .entrySet().iterator();
         while (iterator.hasNext()) {
           Entry<CharSequence, CharSequence> entry = iterator.next();
           headersb.append(entry.getKey().toString()).append(" : \t")
-          .append(entry.getValue()).append("\n");
+              .append(entry.getValue()).append("\n");
         }
         System.out.print(headersb.toString());
       }
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -34,8 +34,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
-
-/** Creates and caches {@link Parser} plugins.*/
+/** Creates and caches {@link Parser} plugins. */
 public final class ParserFactory {
 
   public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
@@ -44,8 +43,7 @@
   public static final String DEFAULT_PLUGIN = "*";
 
   /** Empty extension list for caching purposes. */
-  private final List<Extension> EMPTY_EXTENSION_LIST =
-    new ArrayList<Extension>();
+  private final List<Extension> EMPTY_EXTENSION_LIST = new ArrayList<Extension>();
 
   private final Configuration conf;
   private final ExtensionPoint extensionPoint;
@@ -56,10 +54,12 @@
     ObjectCache objectCache = ObjectCache.get(conf);
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
         Parser.X_POINT_ID);
-    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -71,33 +71,34 @@
     }
   }
 
-
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
+      throws ParserNotFound {
 
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
@@ -107,7 +108,7 @@
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -118,49 +119,50 @@
     }
 
     parsers = new ArrayList<Parser>(parserExts.size());
-    for (Extension ext : parserExts){
+    for (Extension ext : parserExts) {
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers: ", e);
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers: ", e);
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
 
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
-   *
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
+   * 
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -184,7 +186,7 @@
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -192,9 +194,9 @@
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
@@ -212,7 +214,7 @@
           columns.addAll(pluginFields);
         }
       } catch (PluginRuntimeException e) {
-        LOG.error("PluginRuntimeException",e);
+        LOG.error("PluginRuntimeException", e);
       }
     }
     return columns;
@@ -220,10 +222,11 @@
 
   /**
    * Finds the best-suited parse plugin for a given contentType.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   @SuppressWarnings("unchecked")
   protected List<Extension> getExtensions(String contentType) {
@@ -246,8 +249,8 @@
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-      	// Put the empty extension list into cache
-      	// to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
@@ -256,12 +259,14 @@
 
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
 
@@ -268,10 +273,10 @@
     Extension[] extensions = this.extensionPoint.getExtensions();
 
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
@@ -284,20 +289,23 @@
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
+      Extension[] extensions, String contentType) {
 
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
@@ -315,7 +323,7 @@
         // in either case, LOG the appropriate error message to WARN level
 
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
 
           if (LOG.isWarnEnabled()) {
@@ -323,17 +331,17 @@
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -353,12 +361,12 @@
       // any extensions where this is the case, throw a
       // NotMappedParserException
 
-      for (int i=0; i<extensions.length; i++) {
-      	if ("*".equals(extensions[i].getAttribute("contentType"))){
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-      	else if (extensions[i].getAttribute("contentType") != null
-            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
@@ -367,21 +375,23 @@
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-        	  if (!isFirst) extensionsIDs.append(" - ");
-        	  else isFirst=false;
-        	  extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-    	  extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
 
@@ -389,23 +399,22 @@
   }
 
   private String escapeContentType(String contentType) {
-  	// Escapes contentType in order to use as a regex 
-  	// (and keep backwards compatibility).
-  	// This enables to accept multiple types for a single parser. 
-  	return contentType.replace("+", "\\+").replace(".", "\\.");
-	}
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
-
-	private boolean match(Extension extension, String id, String type) {
-    return (id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") ||
-             type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN));
+  private boolean match(Extension extension, String id, String type) {
+    return (id.equals(extension.getId()))
+        && (extension.getAttribute("contentType").equals("*")
+            || type.matches(escapeContentType(extension
+                .getAttribute("contentType"))) || type.equals(DEFAULT_PLUGIN));
   }
 
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
@@ -414,7 +423,7 @@
   }
 
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
Index: src/java/org/apache/nutch/parse/ParserJob.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserJob.java	(working copy)
@@ -58,9 +58,9 @@
 
   private static final String RESUME_KEY = "parse.job.resume";
   private static final String FORCE_KEY = "parse.job.force";
-  
+
   public static final String SKIP_TRUNCATED = "parser.skip.truncated";
-  
+
   private static final Utf8 REPARSE = new Utf8("-reparse");
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
@@ -79,9 +79,8 @@
     FIELDS.add(WebPage.Field.HEADERS);
   }
 
-
-  public static class ParserMapper 
-      extends GoraMapper<String, WebPage, String, WebPage> {
+  public static class ParserMapper extends
+      GoraMapper<String, WebPage, String, WebPage> {
     private ParseUtil parseUtil;
 
     private boolean shouldResume;
@@ -91,7 +90,7 @@
     private Utf8 batchId;
 
     private boolean skipTruncated;
-    
+
     @Override
     public void setup(Context context) throws IOException {
       Configuration conf = context.getConfiguration();
@@ -98,8 +97,9 @@
       parseUtil = new ParseUtil(conf);
       shouldResume = conf.getBoolean(RESUME_KEY, false);
       force = conf.getBoolean(FORCE_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
-      skipTruncated=conf.getBoolean(SKIP_TRUNCATED, true);
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      skipTruncated = conf.getBoolean(SKIP_TRUNCATED, true);
     }
 
     @Override
@@ -131,7 +131,6 @@
       if (skipTruncated && isTruncated(unreverseKey, page)) {
         return;
       }
-      
 
       parseUtil.process(key, page);
       ParseStatus pstatus = page.getParseStatus();
@@ -141,9 +140,9 @@
       }
 
       context.write(key, page);
-    }    
+    }
   }
-  
+
   public ParserJob() {
 
   }
@@ -151,13 +150,14 @@
   public ParserJob(Configuration conf) {
     setConf(conf);
   }
-  
+
   /**
    * Checks if the page's content is truncated.
-   * @param url 
+   * 
+   * @param url
    * @param page
-   * @return If the page is truncated <code>true</code>. When it is not,
-   * or when it could be determined, <code>false</code>. 
+   * @return If the page is truncated <code>true</code>. When it is not, or when
+   *         it could be determined, <code>false</code>.
    */
   public static boolean isTruncated(String url, WebPage page) {
     ByteBuffer content = page.getContent();
@@ -164,7 +164,8 @@
     if (content == null) {
       return false;
     }
-    CharSequence lengthUtf8 = page.getHeaders().get(new Utf8(HttpHeaders.CONTENT_LENGTH));
+    CharSequence lengthUtf8 = page.getHeaders().get(
+        new Utf8(HttpHeaders.CONTENT_LENGTH));
     if (lengthUtf8 == null) {
       return false;
     }
@@ -186,7 +187,8 @@
       return true;
     }
     if (LOG.isDebugEnabled()) {
-      LOG.debug(url + " actualSize=" + actualSize + " inHeaderSize=" + inHeaderSize);
+      LOG.debug(url + " actualSize=" + actualSize + " inHeaderSize="
+          + inHeaderSize);
     }
     return false;
   }
@@ -198,8 +200,8 @@
     ParseFilters parseFilters = new ParseFilters(conf);
 
     Collection<WebPage.Field> parsePluginFields = parserFactory.getFields();
-    Collection<WebPage.Field> signaturePluginFields =
-      SignatureFactory.getFields(conf);
+    Collection<WebPage.Field> signaturePluginFields = SignatureFactory
+        .getFields(conf);
     Collection<WebPage.Field> htmlParsePluginFields = parseFilters.getFields();
 
     if (parsePluginFields != null) {
@@ -226,11 +228,11 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Boolean force = (Boolean)args.get(Nutch.ARG_FORCE);
-    
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Boolean force = (Boolean) args.get(Nutch.ARG_FORCE);
+
     if (batchId != null) {
       getConf().set(GeneratorJob.BATCH_ID, batchId);
     }
@@ -241,7 +243,8 @@
       getConf().setBoolean(FORCE_KEY, force);
     }
     LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
-    LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
+    LOG.info("ParserJob: forced reparse:\t"
+        + getConf().getBoolean(FORCE_KEY, false));
     if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
       LOG.info("ParserJob: parsing all");
     } else {
@@ -248,10 +251,10 @@
       LOG.info("ParserJob: batchId:\t" + batchId);
     }
     currentJob = new NutchJob(getConf(), "parse");
-    
+
     Collection<WebPage.Field> fields = getFields(currentJob);
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
-	StorageUtils.initMapperJob(currentJob, fields, String.class, WebPage.class,
+    StorageUtils.initMapperJob(currentJob, fields, String.class, WebPage.class,
         ParserMapper.class, batchIdFilter);
     StorageUtils.initReducerJob(currentJob, IdentityPageReducer.class);
     currentJob.setNumReduceTasks(0);
@@ -275,20 +278,20 @@
     return filter;
   }
 
-  public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
-    
+  public int parse(String batchId, boolean shouldResume, boolean force)
+      throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("ParserJob: starting at " + sdf.format(start));
 
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_FORCE, force));
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_RESUME,
+        shouldResume, Nutch.ARG_FORCE, force));
     LOG.info("ParserJob: success");
-    
+
     long finish = System.currentTimeMillis();
-    LOG.info("ParserJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("ParserJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
     return 0;
   }
 
@@ -298,12 +301,18 @@
     String batchId = null;
 
     if (args.length < 1) {
-      System.err.println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
-      System.err.println("    <batchId>     - symbolic batch ID created by Generator");
-      System.err.println("    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)");
-      System.err.println("    -all          - consider pages from all crawl jobs");
-      System.err.println("    -resume       - resume a previous incomplete job");
-      System.err.println("    -force        - force re-parsing even if a page is already parsed");
+      System.err
+          .println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
+      System.err
+          .println("    <batchId>     - symbolic batch ID created by Generator");
+      System.err
+          .println("    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)");
+      System.err
+          .println("    -all          - consider pages from all crawl jobs");
+      System.err
+          .println("    -resume       - resume a previous incomplete job");
+      System.err
+          .println("    -force        - force re-parsing even if a page is already parsed");
       return -1;
     }
     for (int i = 0; i < args.length; i++) {
Index: src/java/org/apache/nutch/parse/ParserNotFound.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserNotFound.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserNotFound.java	(working copy)
@@ -18,17 +18,17 @@
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }
Index: src/java/org/apache/nutch/parse/package-info.java
===================================================================
--- src/java/org/apache/nutch/parse/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.parse.Parse Parse} interface and related classes.
  */
 package org.apache.nutch.parse;
+
Index: src/java/org/apache/nutch/plugin/CircularDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/CircularDependencyException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/CircularDependencyException.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Extension.java	(working copy)
@@ -94,8 +94,10 @@
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -105,7 +107,8 @@
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -115,7 +118,8 @@
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
@@ -147,10 +151,10 @@
     // The same is in PluginRepository.getPluginInstance().
     // Suggested by Stefan Groschupf <sg@media-style.com>
     synchronized (getId()) {
-      try {      
+      try {
         PluginRepository pluginRepository = PluginRepository.get(conf);
-        Class extensionClazz = 
-          pluginRepository.getCachedClass(fDescriptor, getClazz());
+        Class extensionClazz = pluginRepository.getCachedClass(fDescriptor,
+            getClazz());
         // lazy loading of Plugin in case there is no instance of the plugin
         // already.
         pluginRepository.getPluginInstance(getDescriptor());
Index: src/java/org/apache/nutch/plugin/ExtensionPoint.java
===================================================================
--- src/java/org/apache/nutch/plugin/ExtensionPoint.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/ExtensionPoint.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;
Index: src/java/org/apache/nutch/plugin/MissingDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/MissingDependencyException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/MissingDependencyException.java	(working copy)
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/plugin/Pluggable.java
===================================================================
--- src/java/org/apache/nutch/plugin/Pluggable.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Pluggable.java	(working copy)
@@ -17,15 +17,14 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
 }
Index: src/java/org/apache/nutch/plugin/Plugin.java
===================================================================
--- src/java/org/apache/nutch/plugin/Plugin.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Plugin.java	(working copy)
@@ -33,8 +33,8 @@
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;
Index: src/java/org/apache/nutch/plugin/PluginClassLoader.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginClassLoader.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginClassLoader.java	(working copy)
@@ -45,11 +45,11 @@
    */
   public PluginClassLoader(URL[] urls, ClassLoader parent) {
     super(urls, parent);
-    
+
     this.urls = urls;
     this.parent = parent;
   }
-  
+
   @Override
   public int hashCode() {
     final int PRIME = 31;
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -30,12 +30,11 @@
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  */
 public class PluginDescriptor {
   private String fPluginPath;
@@ -51,7 +50,8 @@
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -204,7 +204,8 @@
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
@@ -217,7 +218,8 @@
    */
   public void addExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL();
+    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI()
+        .toURL();
     fExportedLibs.add(url);
   }
 
@@ -246,7 +248,8 @@
    */
   public void addNotExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL();
+    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI()
+        .toURL();
     fNotExportedLibs.add(url);
   }
 
@@ -283,8 +286,8 @@
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -306,7 +309,7 @@
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -39,8 +39,8 @@
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -93,7 +93,8 @@
             PluginDescriptor p = parseManifestFile(manifestPath);
             map.put(p.getPluginId(), p);
           } catch (Exception e) {
-            LOG.warn("Error while loading plugin `" + manifestPath + "` " + e.toString());
+            LOG.warn("Error while loading plugin `" + manifestPath + "` "
+                + e.toString());
           }
         }
       }
@@ -182,7 +183,7 @@
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -289,8 +290,8 @@
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java	(working copy)
@@ -50,13 +50,13 @@
   private HashMap<String, ExtensionPoint> fExtensionPoints;
 
   private HashMap<String, Plugin> fActivatedPlugins;
-  
-  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE =
-    new HashMap<String, Map<PluginClassLoader,Class>>();
 
+  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<String, Map<PluginClassLoader, Class>>();
+
   private Configuration conf;
 
-  public static final Logger LOG = LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -68,7 +68,8 @@
     this.conf = new Configuration(conf);
     this.auto = conf.getBoolean("plugin.auto-activation", true);
     String[] pluginFolders = conf.getStrings("plugin.folders");
-    PluginManifestParser manifestParser = new PluginManifestParser(this.conf, this);
+    PluginManifestParser manifestParser = new PluginManifestParser(this.conf,
+        this);
     Map<String, PluginDescriptor> allPlugins = manifestParser
         .parsePluginFolder(pluginFolders);
     if (allPlugins.isEmpty()) {
@@ -85,7 +86,7 @@
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -112,8 +113,8 @@
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -128,7 +129,7 @@
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -156,7 +157,7 @@
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
@@ -271,7 +272,8 @@
       // The same is in Extension.getExtensionInstance().
       // Suggested by Stefan Groschupf <sg@media-style.com>
       synchronized (pDescriptor) {
-        Class<?> pluginClass = getCachedClass(pDescriptor, pDescriptor.getPluginClass());
+        Class<?> pluginClass = getCachedClass(pDescriptor,
+            pDescriptor.getPluginClass());
         Constructor<?> constructor = pluginClass.getConstructor(new Class<?>[] {
             PluginDescriptor.class, Configuration.class });
         Plugin plugin = (Plugin) constructor.newInstance(new Object[] {
@@ -312,9 +314,9 @@
       plugin.shutDown();
     }
   }
-  
+
   public Class getCachedClass(PluginDescriptor pDescriptor, String className)
-  throws ClassNotFoundException {
+      throws ClassNotFoundException {
     Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
     if (descMap == null) {
       descMap = new HashMap<PluginClassLoader, Class>();
Index: src/java/org/apache/nutch/plugin/PluginRuntimeException.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(working copy)
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -41,7 +41,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -85,7 +85,7 @@
     this.mimeTypes = new MimeUtil(conf);
     this.contentType = getContentType(contentType, url, content);
   }
-  
+
   public Content(String url, String base, byte[] content, String contentType,
       Metadata metadata, MimeUtil mimeTypes) {
 
@@ -141,11 +141,11 @@
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -163,14 +163,14 @@
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -204,8 +204,9 @@
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -220,7 +221,9 @@
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/">
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -276,9 +279,9 @@
       System.out.println("usage:" + usage);
       return;
     }
-    
-    GenericOptionsParser optParser =
-      new GenericOptionsParser(NutchConfiguration.create(), args);
+
+    GenericOptionsParser optParser = new GenericOptionsParser(
+        NutchConfiguration.create(), args);
     String[] argv = optParser.getRemainingArgs();
     Configuration conf = optParser.getConfiguration();
 
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -25,7 +25,7 @@
 
 import crawlercommons.robots.BaseRobotRules;
 
-/** A retriever of url content.  Implemented by protocol extensions. */
+/** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
@@ -55,7 +55,9 @@
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
+   * 
+   * @param url
+   *          url to check
    * @param page
    * @return robot rules (specific for this url or default), never null
    */
Index: src/java/org/apache/nutch/protocol/ProtocolNotFound.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(working copy)
@@ -22,7 +22,7 @@
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }
Index: src/java/org/apache/nutch/protocol/ProtocolOutput.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolOutput.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolOutput.java	(working copy)
@@ -17,10 +17,10 @@
 
 package org.apache.nutch.protocol;
 
-
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ProtocolOutput {
Index: src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(working copy)
@@ -19,38 +19,42 @@
 public interface ProtocolStatusCodes {
 
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. */
-  public static final int FAILED               = 2;
+  public static final int FAILED = 2;
 
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
+  public static final int BLOCKED = 23;
 }
Index: src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(working copy)
@@ -100,7 +100,7 @@
     }
     return TableUtil.toString(args.iterator().next());
   }
-  
+
   public static String toString(ProtocolStatus status) {
     if (status == null) {
       return "(null)";
@@ -113,7 +113,8 @@
       int i = 0;
       Iterator<CharSequence> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/protocol/RobotRules.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRules.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/RobotRules.java	(working copy)
@@ -35,9 +35,8 @@
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 
Index: src/java/org/apache/nutch/protocol/RobotRulesParser.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRulesParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/RobotRulesParser.java	(working copy)
@@ -43,35 +43,38 @@
 import crawlercommons.robots.SimpleRobotRulesParser;
 
 /**
- * This class uses crawler-commons for handling the parsing of {@code robots.txt} files.
- * It emits SimpleRobotRules objects, which describe the download permissions
- * as described in SimpleRobotRulesParser.
+ * This class uses crawler-commons for handling the parsing of
+ * {@code robots.txt} files. It emits SimpleRobotRules objects, which describe
+ * the download permissions as described in SimpleRobotRulesParser.
  */
 public abstract class RobotRulesParser implements Configurable {
 
-  public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(RobotRulesParser.class);
 
-  protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable<String, BaseRobotRules> ();
+  protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable<String, BaseRobotRules>();
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use
-   *  when the {@code robots.txt} file is empty or missing;
-   *  all requests are allowed.
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is empty or missing; all requests are allowed.
    */
-  public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(RobotRulesMode.ALLOW_ALL);
+  public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_ALL);
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use when the 
-   *  {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
-   *  response; all requests are disallowed. 
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
+   * response; all requests are disallowed.
    */
-  public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(RobotRulesMode.ALLOW_NONE);
+  public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_NONE);
 
   private static SimpleRobotRulesParser robotParser = new SimpleRobotRulesParser();
   private Configuration conf;
   protected String agentNames;
 
-  public RobotRulesParser() { }
+  public RobotRulesParser() {
+  }
 
   public RobotRulesParser(Configuration conf) {
     setConf(conf);
@@ -90,9 +93,10 @@
     }
     agentNames = agentName;
 
-    // If there are any other agents specified, append those to the list of agents
+    // If there are any other agents specified, append those to the list of
+    // agents
     String otherAgents = conf.get("http.robots.agents");
-    if(otherAgents != null && !otherAgents.trim().isEmpty()) {
+    if (otherAgents != null && !otherAgents.trim().isEmpty()) {
       StringTokenizer tok = new StringTokenizer(otherAgents, ",");
       StringBuilder sb = new StringBuilder(agentNames);
       while (tok.hasMoreTokens()) {
@@ -99,7 +103,8 @@
         String str = tok.nextToken().trim();
         if (str.equals("*") || str.equals(agentName)) {
           // skip wildcard "*" or agent name itself
-          // (required for backward compatibility, cf. NUTCH-1715 and NUTCH-1718)
+          // (required for backward compatibility, cf. NUTCH-1715 and
+          // NUTCH-1718)
         } else {
           sb.append(",").append(str);
         }
@@ -117,16 +122,23 @@
   }
 
   /**
-   * Parses the robots content using the {@link SimpleRobotRulesParser} from crawler commons
-   *    
-   * @param url A string containing url
-   * @param content Contents of the robots file in a byte array 
-   * @param contentType The content type of the robots file
-   * @param robotName A string containing all the robots agent names used by parser for matching
-   * @return BaseRobotRules object 
+   * Parses the robots content using the {@link SimpleRobotRulesParser} from
+   * crawler commons
+   * 
+   * @param url
+   *          A string containing url
+   * @param content
+   *          Contents of the robots file in a byte array
+   * @param contentType
+   *          The content type of the robots file
+   * @param robotName
+   *          A string containing all the robots agent names used by parser for
+   *          matching
+   * @return BaseRobotRules object
    */
-  public BaseRobotRules parseRules (String url, byte[] content, String contentType, String robotName) {
-    return robotParser.parseContent(url, content, contentType, robotName); 
+  public BaseRobotRules parseRules(String url, byte[] content,
+      String contentType, String robotName) {
+    return robotParser.parseContent(url, content, contentType, robotName);
   }
 
   public BaseRobotRules getRobotRulesSet(Protocol protocol, String url) {
@@ -145,23 +157,29 @@
   public static void main(String[] argv) {
 
     if (argv.length != 3) {
-      System.err.println("Usage: RobotRulesParser <robots-file> <url-file> <agent-names>\n");
-      System.err.println("    <robots-file> - Input robots.txt file which will be parsed.");
-      System.err.println("    <url-file>    - Contains input URLs (1 per line) which are tested against the rules.");
-      System.err.println("    <agent-names> - Input agent names. Multiple agent names can be provided using");
-      System.err.println("                    comma as a delimiter without any spaces.");
+      System.err
+          .println("Usage: RobotRulesParser <robots-file> <url-file> <agent-names>\n");
+      System.err
+          .println("    <robots-file> - Input robots.txt file which will be parsed.");
+      System.err
+          .println("    <url-file>    - Contains input URLs (1 per line) which are tested against the rules.");
+      System.err
+          .println("    <agent-names> - Input agent names. Multiple agent names can be provided using");
+      System.err
+          .println("                    comma as a delimiter without any spaces.");
       System.exit(-1);
     }
 
     try {
       byte[] robotsBytes = Files.toByteArray(new File(argv[0]));
-      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes, "text/plain", argv[2]);
+      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes,
+          "text/plain", argv[2]);
 
       LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
       String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ? "allowed" : "not allowed") +
-            ":\t" + testPath);
+        System.out.println((rules.isAllowed(testPath) ? "allowed"
+            : "not allowed") + ":\t" + testPath);
         testPath = testsIn.readLine();
       }
       testsIn.close();
Index: src/java/org/apache/nutch/protocol/package-info.java
===================================================================
--- src/java/org/apache/nutch/protocol/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * see also {@link org.apache.nutch.net.protocols}.
  */
 package org.apache.nutch.protocol;
+
Index: src/java/org/apache/nutch/scoring/ScoreDatum.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoreDatum.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoreDatum.java	(working copy)
@@ -35,9 +35,10 @@
   private String anchor;
   private int distance;
   private Map<String, byte[]> metaData = new HashMap<String, byte[]>();
-  
-  public ScoreDatum() { }
-  
+
+  public ScoreDatum() {
+  }
+
   public ScoreDatum(float score, String url, String anchor, int depth) {
     this.score = score;
     this.url = url;
@@ -52,13 +53,13 @@
     anchor = Text.readString(in);
     distance = WritableUtils.readVInt(in);
     metaData.clear();
-    
+
     int size = WritableUtils.readVInt(in);
     for (int i = 0; i < size; i++) {
       String key = Text.readString(in);
       byte[] value = Bytes.readByteArray(in);
       metaData.put(key, value);
-    }    
+    }
   }
 
   @Override
@@ -67,7 +68,7 @@
     Text.writeString(out, url);
     Text.writeString(out, anchor);
     WritableUtils.writeVInt(out, distance);
-    
+
     WritableUtils.writeVInt(out, metaData.size());
     for (Entry<String, byte[]> e : metaData.entrySet()) {
       Text.writeString(out, e.getKey());
@@ -74,23 +75,23 @@
       Bytes.writeByteArray(out, e.getValue());
     }
   }
-  
+
   public byte[] getMeta(String key) {
     return metaData.get(key);
   }
-  
+
   public void setMeta(String key, byte[] value) {
     metaData.put(key, value);
   }
-  
+
   public byte[] deleteMeta(String key) {
     return metaData.remove(key);
   }
-  
+
   public float getScore() {
     return score;
   }
-  
+
   public void setScore(float score) {
     this.score = score;
   }
@@ -98,7 +99,7 @@
   public String getUrl() {
     return url;
   }
-  
+
   public void setUrl(String url) {
     this.url = url;
   }
@@ -106,7 +107,7 @@
   public String getAnchor() {
     return anchor;
   }
-  
+
   public int getDistance() {
     return distance;
   }
@@ -114,8 +115,7 @@
   @Override
   public String toString() {
     return "ScoreDatum [score=" + score + ", url=" + url + ", anchor=" + anchor
-        + ", distance="+distance + ", metaData=" + metaData + "]";
+        + ", distance=" + distance + ", metaData=" + metaData + "]";
   }
-  
-  
+
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilter.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilter.java	(working copy)
@@ -26,11 +26,11 @@
 
 /**
  * A contract defining behavior of scoring plugins.
- *
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
- *
+ * 
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
+ * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, FieldPluggable {
@@ -39,74 +39,101 @@
 
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param page new page. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param page
+   *          new page. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(String url, WebPage page) throws ScoringFilterException;
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered pages
-   * have at least one inlink with its score contribution, so filter implementations
-   * may choose to set initial score to zero (unknown value), and then the inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the new
+   * page.
+   * 
+   * @param url
+   *          url of the page
    * @param page
    * @throws ScoringFilterException
    */
-  public void initialScore(String url, WebPage page) throws ScoringFilterException;
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page row. Modifications will be persisted.
-   * @param initSort initial sort value, or a value from previous filters in chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page row. Modifications will be persisted.
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(String url, WebPage page, float initSort) throws ScoringFilterException;
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException;
 
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param row page row
-   * @param scoreData A list of {@link OutlinkedScoreDatum}s for every outlink.
-   * These {@link OutlinkedScoreDatum}s will be passed to
-   * {@link #updateScore(String, OldWebTableRow, List)}
-   * for every outlinked URL.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param row
+   *          page row
+   * @param scoreData
+   *          A list of {@link OutlinkedScoreDatum}s for every outlink. These
+   *          {@link OutlinkedScoreDatum}s will be passed to
+   *          {@link #updateScore(String, OldWebTableRow, List)} for every
+   *          outlinked URL.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @throws ScoringFilterException
    */
-  public void distributeScoreToOutlinks(String fromUrl,
-      WebPage page, Collection<ScoreDatum> scoreData,
-      int allCount) throws ScoringFilterException;
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException;
 
   /**
-   * This method calculates a new score during table update, based on the values contributed
-   * by inlinked pages.
-   * @param url url of the page
+   * This method calculates a new score during table update, based on the values
+   * contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param inlinked list of {@link OutlinkedScoreDatum}s for all inlinks pointing to this URL.
+   * @param inlinked
+   *          list of {@link OutlinkedScoreDatum}s for all inlinks pointing to
+   *          this URL.
    * @throws ScoringFilterException
    */
-  public void updateScore(String url, WebPage page, List<ScoreDatum> inlinkedScoreData)
-  throws ScoringFilterException;
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException;
 
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc document. NOTE: this already contains all information collected
-   * by indexing filters. Implementations may modify this instance, in order to store/remove
-   * some information.
-   * @param row page row
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an argument
-   * to the next scoring filter in chain. NOTE: implementations may also express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          document. NOTE: this already contains all information collected by
+   *          indexing filters. Implementations may modify this instance, in
+   *          order to store/remove some information.
+   * @param row
+   *          page row
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
-  public float indexerScore(String url, NutchDocument doc, WebPage page, float initScore)
-  throws ScoringFilterException;
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException;
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilterException.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilterException.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilterException.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**
Index: src/java/org/apache/nutch/scoring/ScoringFilters.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilters.java	(working copy)
@@ -35,7 +35,7 @@
 
 /**
  * Creates and caches {@link ScoringFilter} implementing plugins.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class ScoringFilters extends Configured implements ScoringFilter {
@@ -46,7 +46,8 @@
     super(conf);
     ObjectCache objectCache = ObjectCache.get(conf);
     String order = conf.get("scoring.filter.order");
-    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -55,20 +56,23 @@
       }
 
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ScoringFilter.X_POINT_ID);
-        if (point == null) throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ScoringFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, ScoringFilter> filterMap =
-          new HashMap<String, ScoringFilter>();
+        HashMap<String, ScoringFilter> filterMap = new HashMap<String, ScoringFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ScoringFilter filter = (ScoringFilter) extension.getExtensionInstance();
+          ScoringFilter filter = (ScoringFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(filter.getClass().getName())) {
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(ScoringFilter.class.getName(), filterMap.values().toArray(new ScoringFilter[0]));
+          objectCache.setObject(ScoringFilter.class.getName(), filterMap
+              .values().toArray(new ScoringFilter[0]));
         } else {
           ScoringFilter[] filter = new ScoringFilter[orderedFilters.length];
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -79,7 +83,8 @@
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+      this.filters = (ScoringFilter[]) objectCache
+          .getObject(ScoringFilter.class.getName());
     }
   }
 
@@ -86,7 +91,7 @@
   /** Calculate a sort value for Generate. */
   @Override
   public float generatorSortValue(String url, WebPage row, float initSort)
-  throws ScoringFilterException {
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       initSort = filter.generatorSortValue(url, row, initSort);
     }
@@ -95,7 +100,8 @@
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
   @Override
-  public void initialScore(String url, WebPage row) throws ScoringFilterException {
+  public void initialScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.initialScore(url, row);
     }
@@ -103,7 +109,8 @@
 
   /** Calculate a new initial score, used when injecting new pages. */
   @Override
-  public void injectedScore(String url, WebPage row) throws ScoringFilterException {
+  public void injectedScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.injectedScore(url, row);
     }
Index: src/java/org/apache/nutch/scoring/package-info.java
===================================================================
--- src/java/org/apache/nutch/scoring/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
  */
 package org.apache.nutch.scoring;
+
Index: src/java/org/apache/nutch/storage/Host.java
===================================================================
--- src/java/org/apache/nutch/storage/Host.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/Host.java	(working copy)
@@ -1,25 +1,25 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage; 
+package org.apache.nutch.storage;
 
 import org.apache.avro.util.Utf8;
 import org.apache.nutch.util.Bytes;
@@ -26,15 +26,15 @@
 
 @SuppressWarnings("all")
 /** Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet */
-public class Host extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
+public class Host extends org.apache.gora.persistency.impl.PersistentBase
+    implements org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    METADATA(0, "metadata"),
-    OUTLINKS(1, "outlinks"),
-    INLINKS(2, "inlinks"),
-    ;
+    METADATA(0, "metadata"), OUTLINKS(1, "outlinks"), INLINKS(2, "inlinks"), ;
     /**
      * Field's index.
      */
@@ -47,38 +47,51 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "metadata",
-  "outlinks",
-  "inlinks",
-  };
+  public static final String[] _ALL_FIELDS = { "metadata", "outlinks",
+      "inlinks", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -85,103 +98,160 @@
     return Host._ALL_FIELDS.length;
   }
 
-  /** A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc */
-  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-  /** Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-  /** Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+  /**
+   * A multivalued metadata container used for storing a wide variety of host
+   * metadata such as structured web server characterists etc
+   */
+  private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+  /**
+   * Hyperlinks which direct outside of the current host domain these can used
+   * in a histogram style manner to generate host statistics
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+  /**
+   * Hyperlinks which link to pages within the current host domain these can
+   * used in a histogram style manner to generate host statistics
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return metadata;
-    case 1: return outlinks;
-    case 2: return inlinks;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return metadata;
+    case 1:
+      return outlinks;
+    case 2:
+      return inlinks;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 1: outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 2: inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 1:
+      outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 2:
+      inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'metadata' field.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   */
-  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+   * Gets the value of the 'metadata' field. A multivalued metadata container
+   * used for storing a wide variety of host metadata such as structured web
+   * server characterists etc
+   */
+  public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
     return metadata;
   }
 
   /**
-   * Sets the value of the 'metadata' field.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   * @param value the value to set.
+   * Sets the value of the 'metadata' field. A multivalued metadata container
+   * used for storing a wide variety of host metadata such as structured web
+   * server characterists etc * @param value the value to set.
    */
-  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMetadata(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   * @param value the value to set.
+   * Checks the dirty status of the 'metadata' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * multivalued metadata container used for storing a wide variety of host
+   * metadata such as structured web server characterists etc * @param value the
+   * value to set.
    */
-  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+  public boolean isMetadataDirty(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
     return isDirty(0);
   }
 
   /**
-   * Gets the value of the 'outlinks' field.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+   * Gets the value of the 'outlinks' field. Hyperlinks which direct outside of
+   * the current host domain these can used in a histogram style manner to
+   * generate host statistics
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
     return outlinks;
   }
 
   /**
-   * Sets the value of the 'outlinks' field.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Sets the value of the 'outlinks' field. Hyperlinks which direct outside of
+   * the current host domain these can used in a histogram style manner to
+   * generate host statistics * @param value the value to set.
    */
-  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setOutlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * Hyperlinks which direct outside of the current host domain these can used
+   * in a histogram style manner to generate host statistics * @param value the
+   * value to set.
    */
-  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isOutlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(1);
   }
 
   /**
-   * Gets the value of the 'inlinks' field.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+   * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
+   * within the current host domain these can used in a histogram style manner
+   * to generate host statistics
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
     return inlinks;
   }
 
   /**
-   * Sets the value of the 'inlinks' field.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
+   * within the current host domain these can used in a histogram style manner
+   * to generate host statistics * @param value the value to set.
    */
-  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setInlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * Hyperlinks which link to pages within the current host domain these can
+   * used in a histogram style manner to generate host statistics * @param value
+   * the value to set.
    */
-  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isInlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(2);
   }
 
@@ -189,17 +259,19 @@
   public static org.apache.nutch.storage.Host.Builder newBuilder() {
     return new org.apache.nutch.storage.Host.Builder();
   }
-  
+
   /** Creates a new Host RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host.Builder other) {
+  public static org.apache.nutch.storage.Host.Builder newBuilder(
+      org.apache.nutch.storage.Host.Builder other) {
     return new org.apache.nutch.storage.Host.Builder(other);
   }
-  
+
   /** Creates a new Host RecordBuilder by copying an existing Host instance */
-  public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host other) {
+  public static org.apache.nutch.storage.Host.Builder newBuilder(
+      org.apache.nutch.storage.Host other) {
     return new org.apache.nutch.storage.Host.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -222,62 +294,67 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for Host instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Host>
-    implements org.apache.avro.data.RecordBuilder<Host> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<Host> implements
+      org.apache.avro.data.RecordBuilder<Host> {
 
-    private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
+    private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
 
     /** Creates a new Builder */
     private Builder() {
       super(org.apache.nutch.storage.Host.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.Host.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing Host instance */
     private Builder(org.apache.nutch.storage.Host other) {
-            super(org.apache.nutch.storage.Host.SCHEMA$);
+      super(org.apache.nutch.storage.Host.SCHEMA$);
       if (isValidValue(fields()[0], other.metadata)) {
-        this.metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[0].schema(), other.metadata);
+        this.metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) data()
+            .deepCopy(fields()[0].schema(), other.metadata);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.outlinks)) {
-        this.outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[1].schema(), other.outlinks);
+        this.outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[1].schema(), other.outlinks);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.inlinks)) {
-        this.inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[2].schema(), other.inlinks);
+        this.inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[2].schema(), other.inlinks);
         fieldSetFlags()[2] = true;
       }
     }
 
     /** Gets the value of the 'metadata' field */
-    public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
       return metadata;
     }
-    
+
     /** Sets the value of the 'metadata' field */
-    public org.apache.nutch.storage.Host.Builder setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+    public org.apache.nutch.storage.Host.Builder setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
       validate(fields()[0], value);
       this.metadata = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'metadata' field has been set */
     public boolean hasMetadata() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'metadata' field */
     public org.apache.nutch.storage.Host.Builder clearMetadata() {
       metadata = null;
@@ -284,25 +361,26 @@
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'outlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
       return outlinks;
     }
-    
+
     /** Sets the value of the 'outlinks' field */
-    public org.apache.nutch.storage.Host.Builder setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.Host.Builder setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[1], value);
       this.outlinks = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'outlinks' field has been set */
     public boolean hasOutlinks() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'outlinks' field */
     public org.apache.nutch.storage.Host.Builder clearOutlinks() {
       outlinks = null;
@@ -309,25 +387,26 @@
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'inlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
       return inlinks;
     }
-    
+
     /** Sets the value of the 'inlinks' field */
-    public org.apache.nutch.storage.Host.Builder setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.Host.Builder setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[2], value);
       this.inlinks = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'inlinks' field has been set */
     public boolean hasInlinks() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'inlinks' field */
     public org.apache.nutch.storage.Host.Builder clearInlinks() {
       inlinks = null;
@@ -334,14 +413,20 @@
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public Host build() {
       try {
         Host record = new Host();
-        record.metadata = fieldSetFlags()[0] ? this.metadata : (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[0]));
-        record.outlinks = fieldSetFlags()[1] ? this.outlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[1]));
-        record.inlinks = fieldSetFlags()[2] ? this.inlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[2]));
+        record.metadata = fieldSetFlags()[0] ? this.metadata
+            : (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[0]));
+        record.outlinks = fieldSetFlags()[1] ? this.outlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[1]));
+        record.inlinks = fieldSetFlags()[2] ? this.inlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[2]));
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -348,115 +433,155 @@
       }
     }
   }
-  
-  public Host.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public Host.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public Host newInstance(){
+  public Host newInstance() {
     return newBuilder().build();
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public boolean contains(String key) {
     return metadata.containsKey(new Utf8(key));
   }
-  
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public String getValue(String key, String defaultValue) {
-    if (!contains(key)) return defaultValue;
+    if (!contains(key))
+      return defaultValue;
     return Bytes.toString(metadata.get(new Utf8(key)));
   }
-  
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public int getInt(String key, int defaultValue) {
-    if (!contains(key)) return defaultValue;
-    return Integer.parseInt(getValue(key,null));
+    if (!contains(key))
+      return defaultValue;
+    return Integer.parseInt(getValue(key, null));
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public long getLong(String key, long defaultValue) {
-    if (!contains(key)) return defaultValue;
-    return Long.parseLong(getValue(key,null));
+    if (!contains(key))
+      return defaultValue;
+    return Long.parseLong(getValue(key, null));
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends Host implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'metadata' field.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   */
-	  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'metadata' field.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   * @param value the value to set.
-	   */
-	  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   * @param value the value to set.
-	   */
-	  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'outlinks' field.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'outlinks' field.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'inlinks' field.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'inlinks' field.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends Host implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'metadata' field. A multivalued metadata container
+     * used for storing a wide variety of host metadata such as structured web
+     * server characterists etc
+     */
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'metadata' field. A multivalued metadata container
+     * used for storing a wide variety of host metadata such as structured web
+     * server characterists etc * @param value the value to set.
+     */
+    public void setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'metadata' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * multivalued metadata container used for storing a wide variety of host
+     * metadata such as structured web server characterists etc * @param value
+     * the value to set.
+     */
+    public boolean isMetadataDirty(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'outlinks' field. Hyperlinks which direct outside
+     * of the current host domain these can used in a histogram style manner to
+     * generate host statistics
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'outlinks' field. Hyperlinks which direct outside
+     * of the current host domain these can used in a histogram style manner to
+     * generate host statistics * @param value the value to set.
+     */
+    public void setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Hyperlinks which direct outside of the current host domain these can used
+     * in a histogram style manner to generate host statistics * @param value
+     * the value to set.
+     */
+    public boolean isOutlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
+     * within the current host domain these can used in a histogram style manner
+     * to generate host statistics
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
+     * within the current host domain these can used in a histogram style manner
+     * to generate host statistics * @param value the value to set.
+     */
+    public void setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Hyperlinks which link to pages within the current host domain these can
+     * used in a histogram style manner to generate host statistics * @param
+     * value the value to set.
+     */
+    public boolean isInlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/Mark.java
===================================================================
--- src/java/org/apache/nutch/storage/Mark.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/Mark.java	(working copy)
@@ -19,8 +19,8 @@
 import org.apache.avro.util.Utf8;
 
 public enum Mark {
-  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"),
-  PARSE_MARK("__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
+  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"), PARSE_MARK(
+      "__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
 
   private Utf8 name;
 
@@ -29,7 +29,7 @@
   }
 
   public void putMark(WebPage page, Utf8 markValue) {
-      page.getMarkers().put(name, markValue);
+    page.getMarkers().put(name, markValue);
   }
 
   public void putMark(WebPage page, String markValue) {
@@ -46,7 +46,9 @@
 
   /**
    * Remove the mark only if the mark is present on the page.
-   * @param page The page to remove the mark from.
+   * 
+   * @param page
+   *          The page to remove the mark from.
    * @return If the mark was present.
    */
   public Utf8 removeMarkIfExist(WebPage page) {
@@ -55,8 +57,8 @@
     }
     return null;
   }
-  
+
   public Utf8 getName() {
-	return name;
+    return name;
   }
 }
Index: src/java/org/apache/nutch/storage/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ParseStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/ParseStatus.java	(working copy)
@@ -1,36 +1,38 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
+
 @SuppressWarnings("all")
 /** A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage */
-public class ParseStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
+public class ParseStatus extends
+    org.apache.gora.persistency.impl.PersistentBase implements
+    org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    MAJOR_CODE(0, "majorCode"),
-    MINOR_CODE(1, "minorCode"),
-    ARGS(2, "args"),
-    ;
+    MAJOR_CODE(0, "majorCode"), MINOR_CODE(1, "minorCode"), ARGS(2, "args"), ;
     /**
      * Field's index.
      */
@@ -43,38 +45,51 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "majorCode",
-  "minorCode",
-  "args",
-  };
+  public static final String[] _ALL_FIELDS = { "majorCode", "minorCode",
+      "args", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -81,53 +96,98 @@
     return ParseStatus._ALL_FIELDS.length;
   }
 
-  /** Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) */
+  /**
+   * Major parsing status' including NOTPARSED (Parsing was not performed),
+   * SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more
+   * specific error message in arguments.)
+   */
   private int majorCode;
-  /** Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. */
+  /**
+   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of
+   * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive
+   * to redirect to another URL. The target URL can be retrieved from the
+   * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
+   * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
+   * Content was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other
+   * related parts of the content are needed to complete parsing. The list of
+   * URLs to missing parts may be provided in arguments. The Fetcher may decide
+   * to fetch these parts at once, then put them into Content.metadata, and
+   * supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There
+   * was no content to be parsed - probably caused by errors at protocol stage.
+   */
   private int minorCode;
-  /** Optional arguments supplied to compliment and/or justify the parse status code. */
+  /**
+   * Optional arguments supplied to compliment and/or justify the parse status
+   * code.
+   */
   private java.util.List<java.lang.CharSequence> args;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return majorCode;
-    case 1: return minorCode;
-    case 2: return args;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return majorCode;
+    case 1:
+      return minorCode;
+    case 2:
+      return args;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: majorCode = (java.lang.Integer)(value); break;
-    case 1: minorCode = (java.lang.Integer)(value); break;
-    case 2: args = (java.util.List<java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      majorCode = (java.lang.Integer) (value);
+      break;
+    case 1:
+      minorCode = (java.lang.Integer) (value);
+      break;
+    case 2:
+      args = (java.util.List<java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyListWrapper(
+              (java.util.List) value));
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'majorCode' field.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   */
+   * Gets the value of the 'majorCode' field. Major parsing status' including
+   * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
+   * (General failure. There may be a more specific error message in arguments.)
+   */
   public java.lang.Integer getMajorCode() {
     return majorCode;
   }
 
   /**
-   * Sets the value of the 'majorCode' field.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   * @param value the value to set.
+   * Sets the value of the 'majorCode' field. Major parsing status' including
+   * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
+   * (General failure. There may be a more specific error message in arguments.)
+   * * @param value the value to set.
    */
   public void setMajorCode(java.lang.Integer value) {
     this.majorCode = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   * @param value the value to set.
+   * Checks the dirty status of the 'majorCode' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Major
+   * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
+   * (Parsing succeeded), FAILED (General failure. There may be a more specific
+   * error message in arguments.) * @param value the value to set.
    */
   public boolean isMajorCodeDirty(java.lang.Integer value) {
     return isDirty(0);
@@ -134,24 +194,65 @@
   }
 
   /**
-   * Gets the value of the 'minorCode' field.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   */
+   * Gets the value of the 'minorCode' field. Minor parsing status' including
+   * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+   * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+   * another URL. The target URL can be retrieved from the arguments.,
+   * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+   * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+   * was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage.
+   */
   public java.lang.Integer getMinorCode() {
     return minorCode;
   }
 
   /**
-   * Sets the value of the 'minorCode' field.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   * @param value the value to set.
+   * Sets the value of the 'minorCode' field. Minor parsing status' including
+   * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+   * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+   * another URL. The target URL can be retrieved from the arguments.,
+   * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+   * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+   * was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage. * @param value the value to set.
    */
   public void setMinorCode(java.lang.Integer value) {
     this.minorCode = value;
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   * @param value the value to set.
+   * Checks the dirty status of the 'minorCode' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Minor
+   * parsing status' including SUCCESS_OK - Successful parse devoid of anomalies
+   * or issues, SUCCESS_REDIRECT - Parsed content contains a directive to
+   * redirect to another URL. The target URL can be retrieved from the
+   * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
+   * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
+   * Content was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage. * @param value the value to set.
    */
   public boolean isMinorCodeDirty(java.lang.Integer value) {
     return isDirty(1);
@@ -158,24 +259,29 @@
   }
 
   /**
-   * Gets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   */
+   * Gets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the parse status code.
+   */
   public java.util.List<java.lang.CharSequence> getArgs() {
     return args;
   }
 
   /**
-   * Sets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   * @param value the value to set.
+   * Sets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the parse status code. * @param value the value
+   * to set.
    */
   public void setArgs(java.util.List<java.lang.CharSequence> value) {
-    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   * @param value the value to set.
+   * Checks the dirty status of the 'args' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Optional
+   * arguments supplied to compliment and/or justify the parse status code. * @param
+   * value the value to set.
    */
   public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
     return isDirty(2);
@@ -185,17 +291,22 @@
   public static org.apache.nutch.storage.ParseStatus.Builder newBuilder() {
     return new org.apache.nutch.storage.ParseStatus.Builder();
   }
-  
+
   /** Creates a new ParseStatus RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus.Builder other) {
+  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
+      org.apache.nutch.storage.ParseStatus.Builder other) {
     return new org.apache.nutch.storage.ParseStatus.Builder(other);
   }
-  
-  /** Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus instance */
-  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus other) {
+
+  /**
+   * Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus
+   * instance
+   */
+  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
+      org.apache.nutch.storage.ParseStatus other) {
     return new org.apache.nutch.storage.ParseStatus.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -218,12 +329,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for ParseStatus instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<ParseStatus>
-    implements org.apache.avro.data.RecordBuilder<ParseStatus> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<ParseStatus> implements
+      org.apache.avro.data.RecordBuilder<ParseStatus> {
 
     private int majorCode;
     private int minorCode;
@@ -233,25 +345,28 @@
     private Builder() {
       super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.ParseStatus.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing ParseStatus instance */
     private Builder(org.apache.nutch.storage.ParseStatus other) {
-            super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
+      super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
       if (isValidValue(fields()[0], other.majorCode)) {
-        this.majorCode = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.majorCode);
+        this.majorCode = (java.lang.Integer) data().deepCopy(
+            fields()[0].schema(), other.majorCode);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.minorCode)) {
-        this.minorCode = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.minorCode);
+        this.minorCode = (java.lang.Integer) data().deepCopy(
+            fields()[1].schema(), other.minorCode);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.args)) {
-        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(fields()[2].schema(), other.args);
+        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(
+            fields()[2].schema(), other.args);
         fieldSetFlags()[2] = true;
       }
     }
@@ -260,68 +375,69 @@
     public java.lang.Integer getMajorCode() {
       return majorCode;
     }
-    
+
     /** Sets the value of the 'majorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder setMajorCode(int value) {
       validate(fields()[0], value);
       this.majorCode = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'majorCode' field has been set */
     public boolean hasMajorCode() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'majorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearMajorCode() {
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'minorCode' field */
     public java.lang.Integer getMinorCode() {
       return minorCode;
     }
-    
+
     /** Sets the value of the 'minorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder setMinorCode(int value) {
       validate(fields()[1], value);
       this.minorCode = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'minorCode' field has been set */
     public boolean hasMinorCode() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'minorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearMinorCode() {
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'args' field */
     public java.util.List<java.lang.CharSequence> getArgs() {
       return args;
     }
-    
+
     /** Sets the value of the 'args' field */
-    public org.apache.nutch.storage.ParseStatus.Builder setArgs(java.util.List<java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.ParseStatus.Builder setArgs(
+        java.util.List<java.lang.CharSequence> value) {
       validate(fields()[2], value);
       this.args = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'args' field has been set */
     public boolean hasArgs() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'args' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearArgs() {
       args = null;
@@ -328,14 +444,18 @@
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public ParseStatus build() {
       try {
         ParseStatus record = new ParseStatus();
-        record.majorCode = fieldSetFlags()[0] ? this.majorCode : (java.lang.Integer) defaultValue(fields()[0]);
-        record.minorCode = fieldSetFlags()[1] ? this.minorCode : (java.lang.Integer) defaultValue(fields()[1]);
-        record.args = fieldSetFlags()[2] ? this.args : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[2]));
+        record.majorCode = fieldSetFlags()[0] ? this.majorCode
+            : (java.lang.Integer) defaultValue(fields()[0]);
+        record.minorCode = fieldSetFlags()[1] ? this.minorCode
+            : (java.lang.Integer) defaultValue(fields()[1]);
+        record.args = fieldSetFlags()[2] ? this.args
+            : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper(
+                (java.util.List) defaultValue(fields()[2]));
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -342,92 +462,155 @@
       }
     }
   }
-  
-  public ParseStatus.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public ParseStatus.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public ParseStatus newInstance(){
+  public ParseStatus newInstance() {
     return newBuilder().build();
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends ParseStatus implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'majorCode' field.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   */
-	  public java.lang.Integer getMajorCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'majorCode' field.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   * @param value the value to set.
-	   */
-	  public void setMajorCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   * @param value the value to set.
-	   */
-	  public boolean isMajorCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'minorCode' field.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   */
-	  public java.lang.Integer getMinorCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'minorCode' field.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   * @param value the value to set.
-	   */
-	  public void setMinorCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   * @param value the value to set.
-	   */
-	  public boolean isMinorCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   */
-	  public java.util.List<java.lang.CharSequence> getArgs() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   * @param value the value to set.
-	   */
-	  public void setArgs(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   * @param value the value to set.
-	   */
-	  public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends ParseStatus implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'majorCode' field. Major parsing status' including
+     * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
+     * FAILED (General failure. There may be a more specific error message in
+     * arguments.)
+     */
+    public java.lang.Integer getMajorCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'majorCode' field. Major parsing status' including
+     * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
+     * FAILED (General failure. There may be a more specific error message in
+     * arguments.) * @param value the value to set.
+     */
+    public void setMajorCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'majorCode' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Major
+     * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
+     * (Parsing succeeded), FAILED (General failure. There may be a more
+     * specific error message in arguments.) * @param value the value to set.
+     */
+    public boolean isMajorCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'minorCode' field. Minor parsing status' including
+     * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+     * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+     * another URL. The target URL can be retrieved from the arguments.,
+     * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+     * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+     * was truncated, but the parser cannot handle incomplete content.,
+     * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
+     * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
+     * failed. Other related parts of the content are needed to complete
+     * parsing. The list of URLs to missing parts may be provided in arguments.
+     * The Fetcher may decide to fetch these parts at once, then put them into
+     * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
+     * - Parsing failed. There was no content to be parsed - probably caused by
+     * errors at protocol stage.
+     */
+    public java.lang.Integer getMinorCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'minorCode' field. Minor parsing status' including
+     * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+     * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+     * another URL. The target URL can be retrieved from the arguments.,
+     * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+     * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+     * was truncated, but the parser cannot handle incomplete content.,
+     * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
+     * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
+     * failed. Other related parts of the content are needed to complete
+     * parsing. The list of URLs to missing parts may be provided in arguments.
+     * The Fetcher may decide to fetch these parts at once, then put them into
+     * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
+     * - Parsing failed. There was no content to be parsed - probably caused by
+     * errors at protocol stage. * @param value the value to set.
+     */
+    public void setMinorCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'minorCode' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Minor
+     * parsing status' including SUCCESS_OK - Successful parse devoid of
+     * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a
+     * directive to redirect to another URL. The target URL can be retrieved
+     * from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception
+     * occured which may be retrieved from the arguments., FAILED_TRUNCATED -
+     * Parsing failed. Content was truncated, but the parser cannot handle
+     * incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid
+     * format e.g. the content may be corrupted or of wrong type.,
+     * FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content
+     * are needed to complete parsing. The list of URLs to missing parts may be
+     * provided in arguments. The Fetcher may decide to fetch these parts at
+     * once, then put them into Content.metadata, and supply them for
+     * re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content
+     * to be parsed - probably caused by errors at protocol stage. * @param
+     * value the value to set.
+     */
+    public boolean isMinorCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the parse status code.
+     */
+    public java.util.List<java.lang.CharSequence> getArgs() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the parse status code. * @param value the value
+     * to set.
+     */
+    public void setArgs(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'args' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Optional arguments supplied to compliment and/or justify the parse status
+     * code. * @param value the value to set.
+     */
+    public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ProtocolStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/ProtocolStatus.java	(working copy)
@@ -1,39 +1,40 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
 
 import org.apache.nutch.protocol.ProtocolStatusUtils;
 
 @SuppressWarnings("all")
 /** A nested container representing data captured from web server responses. */
-public class ProtocolStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
+public class ProtocolStatus extends
+    org.apache.gora.persistency.impl.PersistentBase implements
+    org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    CODE(0, "code"),
-    ARGS(1, "args"),
-    LAST_MODIFIED(2, "lastModified"),
-    ;
+    CODE(0, "code"), ARGS(1, "args"), LAST_MODIFIED(2, "lastModified"), ;
     /**
      * Field's index.
      */
@@ -46,38 +47,50 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "code",
-  "args",
-  "lastModified",
-  };
+  public static final String[] _ALL_FIELDS = { "code", "args", "lastModified", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -84,53 +97,140 @@
     return ProtocolStatus._ALL_FIELDS.length;
   }
 
-  /** A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. */
+  /**
+   * A protocol response code which can be one of SUCCESS - content was
+   * retrieved without errors, FAILED - Content was not retrieved. Any further
+   * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
+   * found. Application may attempt to retry later, GONE - Resource is gone,
+   * MOVED - Resource has moved permanently. New url should be found in args,
+   * TEMP_MOVED - Resource has moved temporarily. New url should be found in
+   * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
+   * Application may retry immediately., EXCEPTION - Unspecified exception
+   * occured. Further information may be provided in args., ACCESS_DENIED -
+   * Access denied - authorization required, but missing/incorrect.,
+   * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
+   * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since
+   * the last fetch., WOULDBLOCK - Request was refused by protocol plugins,
+   * because it would block. The expected number of milliseconds to wait before
+   * retry may be provided in args., BLOCKED - Thread was blocked http.max.delays
+   * times during fetching.
+   */
   private int code;
-  /** Optional arguments supplied to compliment and/or justify the response code. */
+  /**
+   * Optional arguments supplied to compliment and/or justify the response code.
+   */
   private java.util.List<java.lang.CharSequence> args;
-  /** A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. */
+  /**
+   * A server reponse indicating when this page was last modified, this can be
+   * unreliable at times hence this is used as a default fall back value for the
+   * preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage
+   * itself.
+   */
   private long lastModified;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return code;
-    case 1: return args;
-    case 2: return lastModified;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return code;
+    case 1:
+      return args;
+    case 2:
+      return lastModified;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: code = (java.lang.Integer)(value); break;
-    case 1: args = (java.util.List<java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
-    case 2: lastModified = (java.lang.Long)(value); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      code = (java.lang.Integer) (value);
+      break;
+    case 1:
+      args = (java.util.List<java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyListWrapper(
+              (java.util.List) value));
+      break;
+    case 2:
+      lastModified = (java.lang.Long) (value);
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'code' field.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   */
+   * Gets the value of the 'code' field. A protocol response code which can be
+   * one of SUCCESS - content was retrieved without errors, FAILED - Content was
+   * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
+   * - This protocol was not found. Application may attempt to retry later, GONE
+   * - Resource is gone, MOVED - Resource has moved permanently. New url should
+   * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
+   * should be found in args., NOTFOUND - Resource was not found, RETRY -
+   * Temporary failure. Application may retry immediately., EXCEPTION -
+   * Unspecified exception occured. Further information may be provided in
+   * args., ACCESS_DENIED - Access denied - authorization required, but
+   * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
+   * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
+   * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
+   * refused by protocol plugins, because it would block. The expected number of
+   * milliseconds to wait before retry may be provided in args., BLOCKED -
+   * Thread was blocked http.max.delays times during fetching.
+   */
   public java.lang.Integer getCode() {
     return code;
   }
 
   /**
-   * Sets the value of the 'code' field.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   * @param value the value to set.
+   * Sets the value of the 'code' field. A protocol response code which can be
+   * one of SUCCESS - content was retrieved without errors, FAILED - Content was
+   * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
+   * - This protocol was not found. Application may attempt to retry later, GONE
+   * - Resource is gone, MOVED - Resource has moved permanently. New url should
+   * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
+   * should be found in args., NOTFOUND - Resource was not found, RETRY -
+   * Temporary failure. Application may retry immediately., EXCEPTION -
+   * Unspecified exception occured. Further information may be provided in
+   * args., ACCESS_DENIED - Access denied - authorization required, but
+   * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
+   * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
+   * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
+   * refused by protocol plugins, because it would block. The expected number of
+   * milliseconds to wait before retry may be provided in args., BLOCKED -
+   * Thread was blocked http.max.delays times during fetching. * @param value
+   * the value to set.
    */
   public void setCode(java.lang.Integer value) {
     this.code = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   * @param value the value to set.
+   * Checks the dirty status of the 'code' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * protocol response code which can be one of SUCCESS - content was retrieved
+   * without errors, FAILED - Content was not retrieved. Any further errors may
+   * be indicated in args, PROTO_NOT_FOUND - This protocol was not found.
+   * Application may attempt to retry later, GONE - Resource is gone, MOVED -
+   * Resource has moved permanently. New url should be found in args, TEMP_MOVED
+   * - Resource has moved temporarily. New url should be found in args.,
+   * NOTFOUND - Resource was not found, RETRY - Temporary failure. Application
+   * may retry immediately., EXCEPTION - Unspecified exception occured. Further
+   * information may be provided in args., ACCESS_DENIED - Access denied -
+   * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+   * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+   * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch.,
+   * WOULDBLOCK - Request was refused by protocol plugins, because it would
+   * block. The expected number of milliseconds to wait before retry may be
+   * provided in args., BLOCKED - Thread was blocked http.max.delays times
+   * during fetching. * @param value the value to set.
    */
   public boolean isCodeDirty(java.lang.Integer value) {
     return isDirty(0);
@@ -137,24 +237,29 @@
   }
 
   /**
-   * Gets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the response code.   */
+   * Gets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the response code.
+   */
   public java.util.List<java.lang.CharSequence> getArgs() {
     return args;
   }
 
   /**
-   * Sets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the response code.   * @param value the value to set.
+   * Sets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the response code. * @param value the value to
+   * set.
    */
   public void setArgs(java.util.List<java.lang.CharSequence> value) {
-    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Optional arguments supplied to compliment and/or justify the response code.   * @param value the value to set.
+   * Checks the dirty status of the 'args' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Optional
+   * arguments supplied to compliment and/or justify the response code. * @param
+   * value the value to set.
    */
   public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
     return isDirty(1);
@@ -161,24 +266,34 @@
   }
 
   /**
-   * Gets the value of the 'lastModified' field.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   */
+   * Gets the value of the 'lastModified' field. A server reponse indicating
+   * when this page was last modified, this can be unreliable at times hence
+   * this is used as a default fall back value for the preferred 'modifiedTime'
+   * and 'preModifiedTime' obtained from the WebPage itself.
+   */
   public java.lang.Long getLastModified() {
     return lastModified;
   }
 
   /**
-   * Sets the value of the 'lastModified' field.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   * @param value the value to set.
+   * Sets the value of the 'lastModified' field. A server reponse indicating
+   * when this page was last modified, this can be unreliable at times hence
+   * this is used as a default fall back value for the preferred 'modifiedTime'
+   * and 'preModifiedTime' obtained from the WebPage itself. * @param value the
+   * value to set.
    */
   public void setLastModified(java.lang.Long value) {
     this.lastModified = value;
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   * @param value the value to set.
+   * Checks the dirty status of the 'lastModified' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A server
+   * reponse indicating when this page was last modified, this can be unreliable
+   * at times hence this is used as a default fall back value for the preferred
+   * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
+   * value the value to set.
    */
   public boolean isLastModifiedDirty(java.lang.Long value) {
     return isDirty(2);
@@ -188,17 +303,22 @@
   public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder() {
     return new org.apache.nutch.storage.ProtocolStatus.Builder();
   }
-  
+
   /** Creates a new ProtocolStatus RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
+  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
+      org.apache.nutch.storage.ProtocolStatus.Builder other) {
     return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
   }
-  
-  /** Creates a new ProtocolStatus RecordBuilder by copying an existing ProtocolStatus instance */
-  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus other) {
+
+  /**
+   * Creates a new ProtocolStatus RecordBuilder by copying an existing
+   * ProtocolStatus instance
+   */
+  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
+      org.apache.nutch.storage.ProtocolStatus other) {
     return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -221,12 +341,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for ProtocolStatus instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<ProtocolStatus>
-    implements org.apache.avro.data.RecordBuilder<ProtocolStatus> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<ProtocolStatus>
+      implements org.apache.avro.data.RecordBuilder<ProtocolStatus> {
 
     private int code;
     private java.util.List<java.lang.CharSequence> args;
@@ -236,25 +357,28 @@
     private Builder() {
       super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing ProtocolStatus instance */
     private Builder(org.apache.nutch.storage.ProtocolStatus other) {
-            super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
+      super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
       if (isValidValue(fields()[0], other.code)) {
-        this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.code);
+        this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(),
+            other.code);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.args)) {
-        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(fields()[1].schema(), other.args);
+        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(
+            fields()[1].schema(), other.args);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.lastModified)) {
-        this.lastModified = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.lastModified);
+        this.lastModified = (java.lang.Long) data().deepCopy(
+            fields()[2].schema(), other.lastModified);
         fieldSetFlags()[2] = true;
       }
     }
@@ -263,44 +387,45 @@
     public java.lang.Integer getCode() {
       return code;
     }
-    
+
     /** Sets the value of the 'code' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder setCode(int value) {
       validate(fields()[0], value);
       this.code = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'code' field has been set */
     public boolean hasCode() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'code' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearCode() {
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'args' field */
     public java.util.List<java.lang.CharSequence> getArgs() {
       return args;
     }
-    
+
     /** Sets the value of the 'args' field */
-    public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(java.util.List<java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(
+        java.util.List<java.lang.CharSequence> value) {
       validate(fields()[1], value);
       this.args = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'args' field has been set */
     public boolean hasArgs() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'args' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearArgs() {
       args = null;
@@ -307,38 +432,43 @@
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'lastModified' field */
     public java.lang.Long getLastModified() {
       return lastModified;
     }
-    
+
     /** Sets the value of the 'lastModified' field */
-    public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(long value) {
+    public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(
+        long value) {
       validate(fields()[2], value);
       this.lastModified = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'lastModified' field has been set */
     public boolean hasLastModified() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'lastModified' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearLastModified() {
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public ProtocolStatus build() {
       try {
         ProtocolStatus record = new ProtocolStatus();
-        record.code = fieldSetFlags()[0] ? this.code : (java.lang.Integer) defaultValue(fields()[0]);
-        record.args = fieldSetFlags()[1] ? this.args : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[1]));
-        record.lastModified = fieldSetFlags()[2] ? this.lastModified : (java.lang.Long) defaultValue(fields()[2]);
+        record.code = fieldSetFlags()[0] ? this.code
+            : (java.lang.Integer) defaultValue(fields()[0]);
+        record.args = fieldSetFlags()[1] ? this.args
+            : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper(
+                (java.util.List) defaultValue(fields()[1]));
+        record.lastModified = fieldSetFlags()[2] ? this.lastModified
+            : (java.lang.Long) defaultValue(fields()[2]);
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -345,101 +475,173 @@
       }
     }
   }
-  
-  public ProtocolStatus.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public ProtocolStatus.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public ProtocolStatus newInstance(){
+  public ProtocolStatus newInstance() {
     return newBuilder().build();
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   /**
    * A convenience method which returns a successful {@link ProtocolStatus}.
+   * 
    * @return the {@link ProtocolStatus} value for 200 (success).
    */
   public boolean isSuccess() {
-    return code == ProtocolStatusUtils.SUCCESS; 
+    return code == ProtocolStatusUtils.SUCCESS;
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends ProtocolStatus implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'code' field.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   */
-	  public java.lang.Integer getCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'code' field.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   * @param value the value to set.
-	   */
-	  public void setCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   * @param value the value to set.
-	   */
-	  public boolean isCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   */
-	  public java.util.List<java.lang.CharSequence> getArgs() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   * @param value the value to set.
-	   */
-	  public void setArgs(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   * @param value the value to set.
-	   */
-	  public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'lastModified' field.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   */
-	  public java.lang.Long getLastModified() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'lastModified' field.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   * @param value the value to set.
-	   */
-	  public void setLastModified(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   * @param value the value to set.
-	   */
-	  public boolean isLastModifiedDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends ProtocolStatus implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'code' field. A protocol response code which can be
+     * one of SUCCESS - content was retrieved without errors, FAILED - Content
+     * was not retrieved. Any further errors may be indicated in args,
+     * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
+     * retry later, GONE - Resource is gone, MOVED - Resource has moved
+     * permanently. New url should be found in args, TEMP_MOVED - Resource has
+     * moved temporarily. New url should be found in args., NOTFOUND - Resource
+     * was not found, RETRY - Temporary failure. Application may retry
+     * immediately., EXCEPTION - Unspecified exception occured. Further
+     * information may be provided in args., ACCESS_DENIED - Access denied -
+     * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+     * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+     * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
+     * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
+     * would block. The expected number of milliseconds to wait before retry may
+     * be provided in args., BLOCKED - Thread was blocked http.max.delays times
+     * during fetching.
+     */
+    public java.lang.Integer getCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'code' field. A protocol response code which can be
+     * one of SUCCESS - content was retrieved without errors, FAILED - Content
+     * was not retrieved. Any further errors may be indicated in args,
+     * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
+     * retry later, GONE - Resource is gone, MOVED - Resource has moved
+     * permanently. New url should be found in args, TEMP_MOVED - Resource has
+     * moved temporarily. New url should be found in args., NOTFOUND - Resource
+     * was not found, RETRY - Temporary failure. Application may retry
+     * immediately., EXCEPTION - Unspecified exception occured. Further
+     * information may be provided in args., ACCESS_DENIED - Access denied -
+     * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+     * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+     * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
+     * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
+     * would block. The expected number of milliseconds to wait before retry may
+     * be provided in args., BLOCKED - Thread was blocked http.max.delays times
+     * during fetching. * @param value the value to set.
+     */
+    public void setCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'code' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * protocol response code which can be one of SUCCESS - content was
+     * retrieved without errors, FAILED - Content was not retrieved. Any further
+     * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
+     * found. Application may attempt to retry later, GONE - Resource is gone,
+     * MOVED - Resource has moved permanently. New url should be found in args,
+     * TEMP_MOVED - Resource has moved temporarily. New url should be found in
+     * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
+     * Application may retry immediately., EXCEPTION - Unspecified exception
+     * occured. Further information may be provided in args., ACCESS_DENIED -
+     * Access denied - authorization required, but missing/incorrect.,
+     * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
+     * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged
+     * since the last fetch., WOULDBLOCK - Request was refused by protocol
+     * plugins, because it would block. The expected number of milliseconds to
+     * wait before retry may be provided in args., BLOCKED - Thread was blocked
+     * http.max.delays times during fetching. * @param value the value to set.
+     */
+    public boolean isCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the response code.
+     */
+    public java.util.List<java.lang.CharSequence> getArgs() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the response code. * @param value the value to
+     * set.
+     */
+    public void setArgs(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'args' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Optional arguments supplied to compliment and/or justify the response
+     * code. * @param value the value to set.
+     */
+    public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'lastModified' field. A server reponse indicating
+     * when this page was last modified, this can be unreliable at times hence
+     * this is used as a default fall back value for the preferred
+     * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.
+     */
+    public java.lang.Long getLastModified() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'lastModified' field. A server reponse indicating
+     * when this page was last modified, this can be unreliable at times hence
+     * this is used as a default fall back value for the preferred
+     * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
+     * value the value to set.
+     */
+    public void setLastModified(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'lastModified' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. A
+     * server reponse indicating when this page was last modified, this can be
+     * unreliable at times hence this is used as a default fall back value for
+     * the preferred 'modifiedTime' and 'preModifiedTime' obtained from the
+     * WebPage itself. * @param value the value to set.
+     */
+    public boolean isLastModifiedDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/StorageUtils.java
===================================================================
--- src/java/org/apache/nutch/storage/StorageUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/StorageUtils.java	(working copy)
@@ -35,13 +35,14 @@
 import java.util.Iterator;
 
 /**
- * Entry point to Gora store/mapreduce functionality.
- * Translates the concept of "crawlid" to the corresponding Gora support.
+ * Entry point to Gora store/mapreduce functionality. Translates the concept of
+ * "crawlid" to the corresponding Gora support.
  */
 public class StorageUtils {
 
-  /** Creates a store for the given persistentClass.
-   * Currently supports Webpage and Host stores.
+  /**
+   * Creates a store for the given persistentClass. Currently supports Webpage
+   * and Host stores.
    * 
    * @param conf
    * @param keyClass
@@ -51,8 +52,9 @@
    * @throws GoraException
    */
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> DataStore<K, V> createWebStore(Configuration conf,
-      Class<K> keyClass, Class<V> persistentClass) throws ClassNotFoundException, GoraException {
+  public static <K, V extends Persistent> DataStore<K, V> createWebStore(
+      Configuration conf, Class<K> keyClass, Class<V> persistentClass)
+      throws ClassNotFoundException, GoraException {
 
     String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
     String schemaPrefix = "";
@@ -59,7 +61,7 @@
     if (!crawlId.isEmpty()) {
       schemaPrefix = crawlId + "_";
     }
-      
+
     String schema;
     if (WebPage.class.equals(persistentClass)) {
       schema = conf.get("storage.schema.webpage", "webpage");
@@ -68,52 +70,52 @@
       schema = conf.get("storage.schema.host", "host");
       conf.set("preferred.schema.name", schemaPrefix + "host");
     } else {
-      throw new UnsupportedOperationException("Unable to create store for class " + persistentClass);
+      throw new UnsupportedOperationException(
+          "Unable to create store for class " + persistentClass);
     }
 
-    Class<? extends DataStore<K, V>> dataStoreClass =
-      (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
-    return DataStoreFactory.createDataStore(dataStoreClass,
-            keyClass, persistentClass, conf, schema);
+    Class<? extends DataStore<K, V>> dataStoreClass = (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
+    return DataStoreFactory.createDataStore(dataStoreClass, keyClass,
+        persistentClass, conf, schema);
   }
-  
+
   /**
    * Return the Persistent Gora class used to persist Nutch Web data.
    * 
-   * @param the Nutch configuration 
+   * @param the
+   *          Nutch configuration
    * @return the Gora DataStore persistent class
    * @throws ClassNotFoundException
    */
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> Class<? extends DataStore<K, V>>
-  getDataStoreClass(Configuration conf)  throws ClassNotFoundException {
-    return (Class<? extends DataStore<K, V>>)
-      Class.forName(conf.get("storage.data.store.class",
-          "org.apache.gora.sql.store.SqlStore"));
+  public static <K, V extends Persistent> Class<? extends DataStore<K, V>> getDataStoreClass(
+      Configuration conf) throws ClassNotFoundException {
+    return (Class<? extends DataStore<K, V>>) Class.forName(conf.get(
+        "storage.data.store.class", "org.apache.gora.sql.store.SqlStore"));
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, null, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass, null,
+        true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, partitionerClass, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass,
+        partitionerClass, true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass, boolean reuseObjects)
       throws ClassNotFoundException, IOException {
@@ -154,10 +156,10 @@
 
   public static <K, V> void initReducerJob(Job job,
       Class<? extends GoraReducer<K, V, String, WebPage>> reducerClass)
-  throws ClassNotFoundException, GoraException {
+      throws ClassNotFoundException, GoraException {
     Configuration conf = job.getConfiguration();
-    DataStore<String, WebPage> store =
-      StorageUtils.createWebStore(conf, String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(conf,
+        String.class, WebPage.class);
     GoraReducer.initReducerJob(job, store, reducerClass);
     GoraOutputFormat.setOutput(job, store, true);
   }
Index: src/java/org/apache/nutch/storage/WebPage.java
===================================================================
--- src/java/org/apache/nutch/storage/WebPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/WebPage.java	(working copy)
@@ -1,57 +1,46 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
+
 @SuppressWarnings("all")
 /** WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time */
-public class WebPage extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
+public class WebPage extends org.apache.gora.persistency.impl.PersistentBase
+    implements org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    BASE_URL(0, "baseUrl"),
-    STATUS(1, "status"),
-    FETCH_TIME(2, "fetchTime"),
-    PREV_FETCH_TIME(3, "prevFetchTime"),
-    FETCH_INTERVAL(4, "fetchInterval"),
-    RETRIES_SINCE_FETCH(5, "retriesSinceFetch"),
-    MODIFIED_TIME(6, "modifiedTime"),
-    PREV_MODIFIED_TIME(7, "prevModifiedTime"),
-    PROTOCOL_STATUS(8, "protocolStatus"),
-    CONTENT(9, "content"),
-    CONTENT_TYPE(10, "contentType"),
-    PREV_SIGNATURE(11, "prevSignature"),
-    SIGNATURE(12, "signature"),
-    TITLE(13, "title"),
-    TEXT(14, "text"),
-    PARSE_STATUS(15, "parseStatus"),
-    SCORE(16, "score"),
-    REPR_URL(17, "reprUrl"),
-    HEADERS(18, "headers"),
-    OUTLINKS(19, "outlinks"),
-    INLINKS(20, "inlinks"),
-    MARKERS(21, "markers"),
-    METADATA(22, "metadata"),
-    BATCH_ID(23, "batchId"),
-    ;
+    BASE_URL(0, "baseUrl"), STATUS(1, "status"), FETCH_TIME(2, "fetchTime"), PREV_FETCH_TIME(
+        3, "prevFetchTime"), FETCH_INTERVAL(4, "fetchInterval"), RETRIES_SINCE_FETCH(
+        5, "retriesSinceFetch"), MODIFIED_TIME(6, "modifiedTime"), PREV_MODIFIED_TIME(
+        7, "prevModifiedTime"), PROTOCOL_STATUS(8, "protocolStatus"), CONTENT(
+        9, "content"), CONTENT_TYPE(10, "contentType"), PREV_SIGNATURE(11,
+        "prevSignature"), SIGNATURE(12, "signature"), TITLE(13, "title"), TEXT(
+        14, "text"), PARSE_STATUS(15, "parseStatus"), SCORE(16, "score"), REPR_URL(
+        17, "reprUrl"), HEADERS(18, "headers"), OUTLINKS(19, "outlinks"), INLINKS(
+        20, "inlinks"), MARKERS(21, "markers"), METADATA(22, "metadata"), BATCH_ID(
+        23, "batchId"), ;
     /**
      * Field's index.
      */
@@ -64,59 +53,55 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "baseUrl",
-  "status",
-  "fetchTime",
-  "prevFetchTime",
-  "fetchInterval",
-  "retriesSinceFetch",
-  "modifiedTime",
-  "prevModifiedTime",
-  "protocolStatus",
-  "content",
-  "contentType",
-  "prevSignature",
-  "signature",
-  "title",
-  "text",
-  "parseStatus",
-  "score",
-  "reprUrl",
-  "headers",
-  "outlinks",
-  "inlinks",
-  "markers",
-  "metadata",
-  "batchId",
-  };
+  public static final String[] _ALL_FIELDS = { "baseUrl", "status",
+      "fetchTime", "prevFetchTime", "fetchInterval", "retriesSinceFetch",
+      "modifiedTime", "prevModifiedTime", "protocolStatus", "content",
+      "contentType", "prevSignature", "signature", "title", "text",
+      "parseStatus", "score", "reprUrl", "headers", "outlinks", "inlinks",
+      "markers", "metadata", "batchId", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -125,28 +110,72 @@
 
   /** The original associated with this WebPage. */
   private java.lang.CharSequence baseUrl;
-  /** A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified */
+  /**
+   * A crawl status associated with the WebPage, can be of value
+   * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
+   * was successfully fetched, STATUS_GONE - WebPage no longer exists,
+   * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
+   * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   private int status;
   /** The system time in milliseconds for when the page was fetched. */
   private long fetchTime;
-  /** The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation */
+  /**
+   * The system time in milliseconds for when the page was last fetched if it
+   * was previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation
+   */
   private long prevFetchTime;
-  /** The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. */
+  /**
+   * The default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented.
+   */
   private int fetchInterval;
-  /** The number of retried attempts at fetching the WebPage since it was last successfully fetched. */
+  /**
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched.
+   */
   private int retriesSinceFetch;
-  /** The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage.
+   */
   private long modifiedTime;
-  /** The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage.
+   */
   private long prevModifiedTime;
   private org.apache.nutch.storage.ProtocolStatus protocolStatus;
   /** The entire raw document content e.g. raw XHTML */
   private java.nio.ByteBuffer content;
-  /** The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. */
+  /**
+   * The type of the content contained within the document itself. ContentType
+   * is an alias for MimeType. Historically, this parameter was only called
+   * MimeType, but since this is actually the value included in the HTTP
+   * Content-Type header, it can also include the character set encoding, which
+   * makes it more than just a MimeType specification. If MimeType is specified
+   * e.g. not None, that value is used. Otherwise, ContentType is used. If
+   * neither is given, the DEFAULT_CONTENT_TYPE setting is used.
+   */
   private java.lang.CharSequence contentType;
-  /** An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. */
+  /**
+   * An implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints.
+   */
   private java.nio.ByteBuffer prevSignature;
-  /** An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. */
+  /**
+   * An implementation of a WebPage's signature from which it can be identified
+   * and referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time.
+   */
   private java.nio.ByteBuffer signature;
   /** The title of the WebPage. */
   private java.lang.CharSequence title;
@@ -153,105 +182,223 @@
   /** The textual content of the WebPage devoid from native markup. */
   private java.lang.CharSequence text;
   private org.apache.nutch.storage.ParseStatus parseStatus;
-  /** A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. */
+  /**
+   * A score used to determine a WebPage's relevance within the web graph it is
+   * part of. This score may change over time based on graph characteristics.
+   */
   private float score;
-  /** In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler */
+  /**
+   * In the case where we are given two urls, a source and a destination of a
+   * redirect, we should determine and persist the representative url. The logic
+   * used to determine this is based largely on Yahoo!'s Slurp Crawler
+   */
   private java.lang.CharSequence reprUrl;
-  /** Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
+  /**
+   * Header information returned from the web server used to server the content
+   * which is subsequently fetched from. This includes keys such as
+   * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
+   * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
+   * LAST_MODIFIED and LOCATION.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> headers;
   /** Embedded hyperlinks which direct outside of the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
   /** Embedded hyperlinks which link to pages within the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-  /** Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
-  /** A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-  /** A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+  /**
+   * Markers flags which represent user and machine decisions which have
+   * affected influenced a WebPage's current state. Markers can be system
+   * specific and user machine driven in nature. They are assigned to a WebPage
+   * on a job-by-job basis and thier values indicative of what actions should be
+   * associated with a WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> markers;
+  /**
+   * A multi-valued metadata container used for storing everything from
+   * structured WebPage characterists, to ad-hoc extraction and metadata
+   * augmentation for any given WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+  /**
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in
+   * batches, called fetchlists. Pages are partitioned but can always be
+   * associated and fetched alongside pages of similar value (within a crawl
+   * cycle) based on batchId.
+   */
   private java.lang.CharSequence batchId;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return baseUrl;
-    case 1: return status;
-    case 2: return fetchTime;
-    case 3: return prevFetchTime;
-    case 4: return fetchInterval;
-    case 5: return retriesSinceFetch;
-    case 6: return modifiedTime;
-    case 7: return prevModifiedTime;
-    case 8: return protocolStatus;
-    case 9: return content;
-    case 10: return contentType;
-    case 11: return prevSignature;
-    case 12: return signature;
-    case 13: return title;
-    case 14: return text;
-    case 15: return parseStatus;
-    case 16: return score;
-    case 17: return reprUrl;
-    case 18: return headers;
-    case 19: return outlinks;
-    case 20: return inlinks;
-    case 21: return markers;
-    case 22: return metadata;
-    case 23: return batchId;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return baseUrl;
+    case 1:
+      return status;
+    case 2:
+      return fetchTime;
+    case 3:
+      return prevFetchTime;
+    case 4:
+      return fetchInterval;
+    case 5:
+      return retriesSinceFetch;
+    case 6:
+      return modifiedTime;
+    case 7:
+      return prevModifiedTime;
+    case 8:
+      return protocolStatus;
+    case 9:
+      return content;
+    case 10:
+      return contentType;
+    case 11:
+      return prevSignature;
+    case 12:
+      return signature;
+    case 13:
+      return title;
+    case 14:
+      return text;
+    case 15:
+      return parseStatus;
+    case 16:
+      return score;
+    case 17:
+      return reprUrl;
+    case 18:
+      return headers;
+    case 19:
+      return outlinks;
+    case 20:
+      return inlinks;
+    case 21:
+      return markers;
+    case 22:
+      return metadata;
+    case 23:
+      return batchId;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: baseUrl = (java.lang.CharSequence)(value); break;
-    case 1: status = (java.lang.Integer)(value); break;
-    case 2: fetchTime = (java.lang.Long)(value); break;
-    case 3: prevFetchTime = (java.lang.Long)(value); break;
-    case 4: fetchInterval = (java.lang.Integer)(value); break;
-    case 5: retriesSinceFetch = (java.lang.Integer)(value); break;
-    case 6: modifiedTime = (java.lang.Long)(value); break;
-    case 7: prevModifiedTime = (java.lang.Long)(value); break;
-    case 8: protocolStatus = (org.apache.nutch.storage.ProtocolStatus)(value); break;
-    case 9: content = (java.nio.ByteBuffer)(value); break;
-    case 10: contentType = (java.lang.CharSequence)(value); break;
-    case 11: prevSignature = (java.nio.ByteBuffer)(value); break;
-    case 12: signature = (java.nio.ByteBuffer)(value); break;
-    case 13: title = (java.lang.CharSequence)(value); break;
-    case 14: text = (java.lang.CharSequence)(value); break;
-    case 15: parseStatus = (org.apache.nutch.storage.ParseStatus)(value); break;
-    case 16: score = (java.lang.Float)(value); break;
-    case 17: reprUrl = (java.lang.CharSequence)(value); break;
-    case 18: headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 19: outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 20: inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 21: markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 22: metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 23: batchId = (java.lang.CharSequence)(value); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      baseUrl = (java.lang.CharSequence) (value);
+      break;
+    case 1:
+      status = (java.lang.Integer) (value);
+      break;
+    case 2:
+      fetchTime = (java.lang.Long) (value);
+      break;
+    case 3:
+      prevFetchTime = (java.lang.Long) (value);
+      break;
+    case 4:
+      fetchInterval = (java.lang.Integer) (value);
+      break;
+    case 5:
+      retriesSinceFetch = (java.lang.Integer) (value);
+      break;
+    case 6:
+      modifiedTime = (java.lang.Long) (value);
+      break;
+    case 7:
+      prevModifiedTime = (java.lang.Long) (value);
+      break;
+    case 8:
+      protocolStatus = (org.apache.nutch.storage.ProtocolStatus) (value);
+      break;
+    case 9:
+      content = (java.nio.ByteBuffer) (value);
+      break;
+    case 10:
+      contentType = (java.lang.CharSequence) (value);
+      break;
+    case 11:
+      prevSignature = (java.nio.ByteBuffer) (value);
+      break;
+    case 12:
+      signature = (java.nio.ByteBuffer) (value);
+      break;
+    case 13:
+      title = (java.lang.CharSequence) (value);
+      break;
+    case 14:
+      text = (java.lang.CharSequence) (value);
+      break;
+    case 15:
+      parseStatus = (org.apache.nutch.storage.ParseStatus) (value);
+      break;
+    case 16:
+      score = (java.lang.Float) (value);
+      break;
+    case 17:
+      reprUrl = (java.lang.CharSequence) (value);
+      break;
+    case 18:
+      headers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 19:
+      outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 20:
+      inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 21:
+      markers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 22:
+      metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 23:
+      batchId = (java.lang.CharSequence) (value);
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   */
+   * Gets the value of the 'baseUrl' field. The original associated with this
+   * WebPage.
+   */
   public java.lang.CharSequence getBaseUrl() {
     return baseUrl;
   }
 
   /**
-   * Sets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   * @param value the value to set.
+   * Sets the value of the 'baseUrl' field. The original associated with this
+   * WebPage. * @param value the value to set.
    */
   public void setBaseUrl(java.lang.CharSequence value) {
     this.baseUrl = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The original associated with this WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * original associated with this WebPage. * @param value the value to set.
    */
   public boolean isBaseUrlDirty(java.lang.CharSequence value) {
     return isDirty(0);
@@ -258,24 +405,43 @@
   }
 
   /**
-   * Gets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   */
+   * Gets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   public java.lang.Integer getStatus() {
     return status;
   }
 
   /**
-   * Sets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
+   * Sets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   * * @param value the value to set.
    */
   public void setStatus(java.lang.Integer value) {
     this.status = value;
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
+   * Checks the dirty status of the 'status' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A crawl
+   * status associated with the WebPage, can be of value STATUS_UNFETCHED -
+   * WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully
+   * fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP -
+   * WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage
+   * permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful,
+   * needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching
+   * successful - page is not modified * @param value the value to set.
    */
   public boolean isStatusDirty(java.lang.Integer value) {
     return isDirty(1);
@@ -282,24 +448,27 @@
   }
 
   /**
-   * Gets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   */
+   * Gets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched.
+   */
   public java.lang.Long getFetchTime() {
     return fetchTime;
   }
 
   /**
-   * Sets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
+   * Sets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched. * @param value the value to set.
    */
   public void setFetchTime(java.lang.Long value) {
     this.fetchTime = value;
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
+   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was fetched. * @param value
+   * the value to set.
    */
   public boolean isFetchTimeDirty(java.lang.Long value) {
     return isDirty(2);
@@ -306,24 +475,32 @@
   }
 
   /**
-   * Gets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   */
+   * Gets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation
+   */
   public java.lang.Long getPrevFetchTime() {
     return prevFetchTime;
   }
 
   /**
-   * Sets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
+   * Sets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation * @param value the value to set.
    */
   public void setPrevFetchTime(java.lang.Long value) {
     this.prevFetchTime = value;
     setDirty(3);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
+   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was last fetched if it was
+   * previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation * @param value the value to set.
    */
   public boolean isPrevFetchTimeDirty(java.lang.Long value) {
     return isDirty(3);
@@ -330,24 +507,30 @@
   }
 
   /**
-   * Gets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   */
+   * Gets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless a
+   * custom fetch schedle is implemented.
+   */
   public java.lang.Integer getFetchInterval() {
     return fetchInterval;
   }
 
   /**
-   * Sets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
+   * Sets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless a
+   * custom fetch schedle is implemented. * @param value the value to set.
    */
   public void setFetchInterval(java.lang.Integer value) {
     this.fetchInterval = value;
     setDirty(4);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
+   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented. * @param
+   * value the value to set.
    */
   public boolean isFetchIntervalDirty(java.lang.Integer value) {
     return isDirty(4);
@@ -354,24 +537,28 @@
   }
 
   /**
-   * Gets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   */
+   * Gets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched.
+   */
   public java.lang.Integer getRetriesSinceFetch() {
     return retriesSinceFetch;
   }
 
   /**
-   * Sets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
+   * Sets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched. * @param
+   * value the value to set.
    */
   public void setRetriesSinceFetch(java.lang.Integer value) {
     this.retriesSinceFetch = value;
     setDirty(5);
   }
-  
+
   /**
-   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
+   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched. * @param value the value to set.
    */
   public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
     return isDirty(5);
@@ -378,24 +565,34 @@
   }
 
   /**
-   * Gets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   */
+   * Gets the value of the 'modifiedTime' field. The system time in milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage.
+   */
   public java.lang.Long getModifiedTime() {
     return modifiedTime;
   }
 
   /**
-   * Sets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
+   * Sets the value of the 'modifiedTime' field. The system time in milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage. * @param value the value
+   * to set.
    */
   public void setModifiedTime(java.lang.Long value) {
     this.modifiedTime = value;
     setDirty(6);
   }
-  
+
   /**
-   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage. * @param value the value to set.
    */
   public boolean isModifiedTimeDirty(java.lang.Long value) {
     return isDirty(6);
@@ -402,24 +599,35 @@
   }
 
   /**
-   * Gets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   */
+   * Gets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage.
+   */
   public java.lang.Long getPrevModifiedTime() {
     return prevModifiedTime;
   }
 
   /**
-   * Sets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
+   * Sets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage. * @param value the value to set.
    */
   public void setPrevModifiedTime(java.lang.Long value) {
     this.prevModifiedTime = value;
     setDirty(7);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage. * @param value the value to set.
    */
   public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
     return isDirty(7);
@@ -434,40 +642,48 @@
 
   /**
    * Sets the value of the 'protocolStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
     this.protocolStatus = value;
     setDirty(8);
   }
-  
+
   /**
-   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if
+   * it represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
-  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
+  public boolean isProtocolStatusDirty(
+      org.apache.nutch.storage.ProtocolStatus value) {
     return isDirty(8);
   }
 
   /**
-   * Gets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   */
+   * Gets the value of the 'content' field. The entire raw document content e.g.
+   * raw XHTML
+   */
   public java.nio.ByteBuffer getContent() {
     return content;
   }
 
   /**
-   * Sets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
+   * Sets the value of the 'content' field. The entire raw document content e.g.
+   * raw XHTML * @param value the value to set.
    */
   public void setContent(java.nio.ByteBuffer value) {
     this.content = value;
     setDirty(9);
   }
-  
+
   /**
-   * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
+   * Checks the dirty status of the 'content' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * entire raw document content e.g. raw XHTML * @param value the value to set.
    */
   public boolean isContentDirty(java.nio.ByteBuffer value) {
     return isDirty(9);
@@ -474,24 +690,44 @@
   }
 
   /**
-   * Gets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   */
+   * Gets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used.
+   */
   public java.lang.CharSequence getContentType() {
     return contentType;
   }
 
   /**
-   * Sets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
+   * Sets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public void setContentType(java.lang.CharSequence value) {
     this.contentType = value;
     setDirty(10);
   }
-  
+
   /**
-   * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
+   * Checks the dirty status of the 'contentType' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The type
+   * of the content contained within the document itself. ContentType is an
+   * alias for MimeType. Historically, this parameter was only called MimeType,
+   * but since this is actually the value included in the HTTP Content-Type
+   * header, it can also include the character set encoding, which makes it more
+   * than just a MimeType specification. If MimeType is specified e.g. not None,
+   * that value is used. Otherwise, ContentType is used. If neither is given,
+   * the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public boolean isContentTypeDirty(java.lang.CharSequence value) {
     return isDirty(10);
@@ -498,24 +734,33 @@
   }
 
   /**
-   * Gets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   */
+   * Gets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints.
+   */
   public java.nio.ByteBuffer getPrevSignature() {
     return prevSignature;
   }
 
   /**
-   * Sets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
+   * Sets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints. * @param value the value to set.
    */
   public void setPrevSignature(java.nio.ByteBuffer value) {
     this.prevSignature = value;
     setDirty(11);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
+   * Checks the dirty status of the 'prevSignature' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints. * @param value
+   * the value to set.
    */
   public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(11);
@@ -522,24 +767,33 @@
   }
 
   /**
-   * Gets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   */
+   * Gets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time.
+   */
   public java.nio.ByteBuffer getSignature() {
     return signature;
   }
 
   /**
-   * Sets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
+   * Sets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time. * @param value the value to set.
    */
   public void setSignature(java.nio.ByteBuffer value) {
     this.signature = value;
     setDirty(12);
   }
-  
+
   /**
-   * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
+   * Checks the dirty status of the 'signature' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's signature from which it can be identified and
+   * referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time. * @param value the
+   * value to set.
    */
   public boolean isSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(12);
@@ -546,24 +800,25 @@
   }
 
   /**
-   * Gets the value of the 'title' field.
-   * The title of the WebPage.   */
+   * Gets the value of the 'title' field. The title of the WebPage.
+   */
   public java.lang.CharSequence getTitle() {
     return title;
   }
 
   /**
-   * Sets the value of the 'title' field.
-   * The title of the WebPage.   * @param value the value to set.
+   * Sets the value of the 'title' field. The title of the WebPage. * @param
+   * value the value to set.
    */
   public void setTitle(java.lang.CharSequence value) {
     this.title = value;
     setDirty(13);
   }
-  
+
   /**
-   * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The title of the WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'title' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * title of the WebPage. * @param value the value to set.
    */
   public boolean isTitleDirty(java.lang.CharSequence value) {
     return isDirty(13);
@@ -570,24 +825,27 @@
   }
 
   /**
-   * Gets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   */
+   * Gets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup.
+   */
   public java.lang.CharSequence getText() {
     return text;
   }
 
   /**
-   * Sets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
+   * Sets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup. * @param value the value to set.
    */
   public void setText(java.lang.CharSequence value) {
     this.text = value;
     setDirty(14);
   }
-  
+
   /**
-   * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
+   * Checks the dirty status of the 'text' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * textual content of the WebPage devoid from native markup. * @param value
+   * the value to set.
    */
   public boolean isTextDirty(java.lang.CharSequence value) {
     return isDirty(14);
@@ -602,16 +860,21 @@
 
   /**
    * Sets the value of the 'parseStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
     this.parseStatus = value;
     setDirty(15);
   }
-  
+
   /**
-   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
   public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
     return isDirty(15);
@@ -618,24 +881,30 @@
   }
 
   /**
-   * Gets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   */
+   * Gets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics.
+   */
   public java.lang.Float getScore() {
     return score;
   }
 
   /**
-   * Sets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
+   * Sets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics. * @param value the value to set.
    */
   public void setScore(java.lang.Float value) {
     this.score = value;
     setDirty(16);
   }
-  
+
   /**
-   * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
+   * Checks the dirty status of the 'score' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A score
+   * used to determine a WebPage's relevance within the web graph it is part of.
+   * This score may change over time based on graph characteristics. * @param
+   * value the value to set.
    */
   public boolean isScoreDirty(java.lang.Float value) {
     return isDirty(16);
@@ -642,24 +911,33 @@
   }
 
   /**
-   * Gets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   */
+   * Gets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler
+   */
   public java.lang.CharSequence getReprUrl() {
     return reprUrl;
   }
 
   /**
-   * Sets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
+   * Sets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
    */
   public void setReprUrl(java.lang.CharSequence value) {
     this.reprUrl = value;
     setDirty(17);
   }
-  
+
   /**
-   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
+   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. In the
+   * case where we are given two urls, a source and a destination of a redirect,
+   * we should determine and persist the representative url. The logic used to
+   * determine this is based largely on Yahoo!'s Slurp Crawler * @param value
+   * the value to set.
    */
   public boolean isReprUrlDirty(java.lang.CharSequence value) {
     return isDirty(17);
@@ -666,144 +944,208 @@
   }
 
   /**
-   * Gets the value of the 'headers' field.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
+   * Gets the value of the 'headers' field. Header information returned from the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
     return headers;
   }
 
   /**
-   * Sets the value of the 'headers' field.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
+   * Sets the value of the 'headers' field. Header information returned from the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
+   * value to set.
    */
-  public void setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setHeaders(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(18);
   }
-  
+
   /**
-   * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
+   * Checks the dirty status of the 'headers' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Header
+   * information returned from the web server used to server the content which
+   * is subsequently fetched from. This includes keys such as TRANSFER_ENCODING,
+   * CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION,
+   * CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+   * * @param value the value to set.
    */
-  public boolean isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isHeadersDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(18);
   }
 
   /**
-   * Gets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+   * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
     return outlinks;
   }
 
   /**
-   * Sets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
+   * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain. * @param value the value to set.
    */
-  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setOutlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(19);
   }
-  
+
   /**
-   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
+   * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Embedded
+   * hyperlinks which direct outside of the current domain. * @param value the
+   * value to set.
    */
-  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isOutlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(19);
   }
 
   /**
-   * Gets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+   * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
     return inlinks;
   }
 
   /**
-   * Sets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
+   * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain. * @param value the value to set.
    */
-  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setInlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(20);
   }
-  
+
   /**
-   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
+   * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Embedded
+   * hyperlinks which link to pages within the current domain. * @param value
+   * the value to set.
    */
-  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isInlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(20);
   }
 
   /**
-   * Gets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
+   * Gets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
     return markers;
   }
 
   /**
-   * Sets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
+   * Sets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage. * @param
+   * value the value to set.
    */
-  public void setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMarkers(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(21);
   }
-  
+
   /**
-   * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'markers' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Markers
+   * flags which represent user and machine decisions which have affected
+   * influenced a WebPage's current state. Markers can be system specific and
+   * user machine driven in nature. They are assigned to a WebPage on a
+   * job-by-job basis and thier values indicative of what actions should be
+   * associated with a WebPage. * @param value the value to set.
    */
-  public boolean isMarkersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isMarkersDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(21);
   }
 
   /**
-   * Gets the value of the 'metadata' field.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   */
-  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+   * Gets the value of the 'metadata' field. A multi-valued metadata container
+   * used for storing everything from structured WebPage characterists, to
+   * ad-hoc extraction and metadata augmentation for any given WebPage.
+   */
+  public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
     return metadata;
   }
 
   /**
-   * Sets the value of the 'metadata' field.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
+   * Sets the value of the 'metadata' field. A multi-valued metadata container
+   * used for storing everything from structured WebPage characterists, to
+   * ad-hoc extraction and metadata augmentation for any given WebPage. * @param
+   * value the value to set.
    */
-  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMetadata(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(22);
   }
-  
+
   /**
-   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'metadata' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * multi-valued metadata container used for storing everything from structured
+   * WebPage characterists, to ad-hoc extraction and metadata augmentation for
+   * any given WebPage. * @param value the value to set.
    */
-  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+  public boolean isMetadataDirty(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
     return isDirty(22);
   }
 
   /**
-   * Gets the value of the 'batchId' field.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   */
+   * Gets the value of the 'batchId' field. A batchId that this WebPage is
+   * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
+   * partitioned but can always be associated and fetched alongside pages of
+   * similar value (within a crawl cycle) based on batchId.
+   */
   public java.lang.CharSequence getBatchId() {
     return batchId;
   }
 
   /**
-   * Sets the value of the 'batchId' field.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
+   * Sets the value of the 'batchId' field. A batchId that this WebPage is
+   * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
+   * partitioned but can always be associated and fetched alongside pages of
+   * similar value (within a crawl cycle) based on batchId. * @param value the
+   * value to set.
    */
   public void setBatchId(java.lang.CharSequence value) {
     this.batchId = value;
     setDirty(23);
   }
-  
+
   /**
-   * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
+   * Checks the dirty status of the 'batchId' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * batchId that this WebPage is assigned to. WebPage's are fetched in batches,
+   * called fetchlists. Pages are partitioned but can always be associated and
+   * fetched alongside pages of similar value (within a crawl cycle) based on
+   * batchId. * @param value the value to set.
    */
   public boolean isBatchIdDirty(java.lang.CharSequence value) {
     return isDirty(23);
@@ -813,17 +1155,21 @@
   public static org.apache.nutch.storage.WebPage.Builder newBuilder() {
     return new org.apache.nutch.storage.WebPage.Builder();
   }
-  
+
   /** Creates a new WebPage RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage.Builder other) {
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(
+      org.apache.nutch.storage.WebPage.Builder other) {
     return new org.apache.nutch.storage.WebPage.Builder(other);
   }
-  
-  /** Creates a new WebPage RecordBuilder by copying an existing WebPage instance */
-  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage other) {
+
+  /**
+   * Creates a new WebPage RecordBuilder by copying an existing WebPage instance
+   */
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(
+      org.apache.nutch.storage.WebPage other) {
     return new org.apache.nutch.storage.WebPage.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -846,12 +1192,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for WebPage instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<WebPage>
-    implements org.apache.avro.data.RecordBuilder<WebPage> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<WebPage> implements
+      org.apache.avro.data.RecordBuilder<WebPage> {
 
     private java.lang.CharSequence baseUrl;
     private int status;
@@ -871,11 +1218,11 @@
     private org.apache.nutch.storage.ParseStatus parseStatus;
     private float score;
     private java.lang.CharSequence reprUrl;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
-    private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> headers;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> markers;
+    private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
     private java.lang.CharSequence batchId;
 
     /** Creates a new Builder */
@@ -882,109 +1229,133 @@
     private Builder() {
       super(org.apache.nutch.storage.WebPage.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.WebPage.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing WebPage instance */
     private Builder(org.apache.nutch.storage.WebPage other) {
-            super(org.apache.nutch.storage.WebPage.SCHEMA$);
+      super(org.apache.nutch.storage.WebPage.SCHEMA$);
       if (isValidValue(fields()[0], other.baseUrl)) {
-        this.baseUrl = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.baseUrl);
+        this.baseUrl = (java.lang.CharSequence) data().deepCopy(
+            fields()[0].schema(), other.baseUrl);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.status)) {
-        this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.status);
+        this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(),
+            other.status);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.fetchTime)) {
-        this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.fetchTime);
+        this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(),
+            other.fetchTime);
         fieldSetFlags()[2] = true;
       }
       if (isValidValue(fields()[3], other.prevFetchTime)) {
-        this.prevFetchTime = (java.lang.Long) data().deepCopy(fields()[3].schema(), other.prevFetchTime);
+        this.prevFetchTime = (java.lang.Long) data().deepCopy(
+            fields()[3].schema(), other.prevFetchTime);
         fieldSetFlags()[3] = true;
       }
       if (isValidValue(fields()[4], other.fetchInterval)) {
-        this.fetchInterval = (java.lang.Integer) data().deepCopy(fields()[4].schema(), other.fetchInterval);
+        this.fetchInterval = (java.lang.Integer) data().deepCopy(
+            fields()[4].schema(), other.fetchInterval);
         fieldSetFlags()[4] = true;
       }
       if (isValidValue(fields()[5], other.retriesSinceFetch)) {
-        this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(fields()[5].schema(), other.retriesSinceFetch);
+        this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(
+            fields()[5].schema(), other.retriesSinceFetch);
         fieldSetFlags()[5] = true;
       }
       if (isValidValue(fields()[6], other.modifiedTime)) {
-        this.modifiedTime = (java.lang.Long) data().deepCopy(fields()[6].schema(), other.modifiedTime);
+        this.modifiedTime = (java.lang.Long) data().deepCopy(
+            fields()[6].schema(), other.modifiedTime);
         fieldSetFlags()[6] = true;
       }
       if (isValidValue(fields()[7], other.prevModifiedTime)) {
-        this.prevModifiedTime = (java.lang.Long) data().deepCopy(fields()[7].schema(), other.prevModifiedTime);
+        this.prevModifiedTime = (java.lang.Long) data().deepCopy(
+            fields()[7].schema(), other.prevModifiedTime);
         fieldSetFlags()[7] = true;
       }
       if (isValidValue(fields()[8], other.protocolStatus)) {
-        this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data().deepCopy(fields()[8].schema(), other.protocolStatus);
+        this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data()
+            .deepCopy(fields()[8].schema(), other.protocolStatus);
         fieldSetFlags()[8] = true;
       }
       if (isValidValue(fields()[9], other.content)) {
-        this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[9].schema(), other.content);
+        this.content = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[9].schema(), other.content);
         fieldSetFlags()[9] = true;
       }
       if (isValidValue(fields()[10], other.contentType)) {
-        this.contentType = (java.lang.CharSequence) data().deepCopy(fields()[10].schema(), other.contentType);
+        this.contentType = (java.lang.CharSequence) data().deepCopy(
+            fields()[10].schema(), other.contentType);
         fieldSetFlags()[10] = true;
       }
       if (isValidValue(fields()[11], other.prevSignature)) {
-        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[11].schema(), other.prevSignature);
+        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[11].schema(), other.prevSignature);
         fieldSetFlags()[11] = true;
       }
       if (isValidValue(fields()[12], other.signature)) {
-        this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.signature);
+        this.signature = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[12].schema(), other.signature);
         fieldSetFlags()[12] = true;
       }
       if (isValidValue(fields()[13], other.title)) {
-        this.title = (java.lang.CharSequence) data().deepCopy(fields()[13].schema(), other.title);
+        this.title = (java.lang.CharSequence) data().deepCopy(
+            fields()[13].schema(), other.title);
         fieldSetFlags()[13] = true;
       }
       if (isValidValue(fields()[14], other.text)) {
-        this.text = (java.lang.CharSequence) data().deepCopy(fields()[14].schema(), other.text);
+        this.text = (java.lang.CharSequence) data().deepCopy(
+            fields()[14].schema(), other.text);
         fieldSetFlags()[14] = true;
       }
       if (isValidValue(fields()[15], other.parseStatus)) {
-        this.parseStatus = (org.apache.nutch.storage.ParseStatus) data().deepCopy(fields()[15].schema(), other.parseStatus);
+        this.parseStatus = (org.apache.nutch.storage.ParseStatus) data()
+            .deepCopy(fields()[15].schema(), other.parseStatus);
         fieldSetFlags()[15] = true;
       }
       if (isValidValue(fields()[16], other.score)) {
-        this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(), other.score);
+        this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(),
+            other.score);
         fieldSetFlags()[16] = true;
       }
       if (isValidValue(fields()[17], other.reprUrl)) {
-        this.reprUrl = (java.lang.CharSequence) data().deepCopy(fields()[17].schema(), other.reprUrl);
+        this.reprUrl = (java.lang.CharSequence) data().deepCopy(
+            fields()[17].schema(), other.reprUrl);
         fieldSetFlags()[17] = true;
       }
       if (isValidValue(fields()[18], other.headers)) {
-        this.headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[18].schema(), other.headers);
+        this.headers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[18].schema(), other.headers);
         fieldSetFlags()[18] = true;
       }
       if (isValidValue(fields()[19], other.outlinks)) {
-        this.outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[19].schema(), other.outlinks);
+        this.outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[19].schema(), other.outlinks);
         fieldSetFlags()[19] = true;
       }
       if (isValidValue(fields()[20], other.inlinks)) {
-        this.inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[20].schema(), other.inlinks);
+        this.inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[20].schema(), other.inlinks);
         fieldSetFlags()[20] = true;
       }
       if (isValidValue(fields()[21], other.markers)) {
-        this.markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[21].schema(), other.markers);
+        this.markers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[21].schema(), other.markers);
         fieldSetFlags()[21] = true;
       }
       if (isValidValue(fields()[22], other.metadata)) {
-        this.metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[22].schema(), other.metadata);
+        this.metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) data()
+            .deepCopy(fields()[22].schema(), other.metadata);
         fieldSetFlags()[22] = true;
       }
       if (isValidValue(fields()[23], other.batchId)) {
-        this.batchId = (java.lang.CharSequence) data().deepCopy(fields()[23].schema(), other.batchId);
+        this.batchId = (java.lang.CharSequence) data().deepCopy(
+            fields()[23].schema(), other.batchId);
         fieldSetFlags()[23] = true;
       }
     }
@@ -993,20 +1364,21 @@
     public java.lang.CharSequence getBaseUrl() {
       return baseUrl;
     }
-    
+
     /** Sets the value of the 'baseUrl' field */
-    public org.apache.nutch.storage.WebPage.Builder setBaseUrl(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setBaseUrl(
+        java.lang.CharSequence value) {
       validate(fields()[0], value);
       this.baseUrl = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'baseUrl' field has been set */
     public boolean hasBaseUrl() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'baseUrl' field */
     public org.apache.nutch.storage.WebPage.Builder clearBaseUrl() {
       baseUrl = null;
@@ -1013,193 +1385,196 @@
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'status' field */
     public java.lang.Integer getStatus() {
       return status;
     }
-    
+
     /** Sets the value of the 'status' field */
     public org.apache.nutch.storage.WebPage.Builder setStatus(int value) {
       validate(fields()[1], value);
       this.status = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'status' field has been set */
     public boolean hasStatus() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'status' field */
     public org.apache.nutch.storage.WebPage.Builder clearStatus() {
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'fetchTime' field */
     public java.lang.Long getFetchTime() {
       return fetchTime;
     }
-    
+
     /** Sets the value of the 'fetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder setFetchTime(long value) {
       validate(fields()[2], value);
       this.fetchTime = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'fetchTime' field has been set */
     public boolean hasFetchTime() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'fetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearFetchTime() {
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevFetchTime' field */
     public java.lang.Long getPrevFetchTime() {
       return prevFetchTime;
     }
-    
+
     /** Sets the value of the 'prevFetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder setPrevFetchTime(long value) {
       validate(fields()[3], value);
       this.prevFetchTime = value;
       fieldSetFlags()[3] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevFetchTime' field has been set */
     public boolean hasPrevFetchTime() {
       return fieldSetFlags()[3];
     }
-    
+
     /** Clears the value of the 'prevFetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevFetchTime() {
       fieldSetFlags()[3] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'fetchInterval' field */
     public java.lang.Integer getFetchInterval() {
       return fetchInterval;
     }
-    
+
     /** Sets the value of the 'fetchInterval' field */
     public org.apache.nutch.storage.WebPage.Builder setFetchInterval(int value) {
       validate(fields()[4], value);
       this.fetchInterval = value;
       fieldSetFlags()[4] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'fetchInterval' field has been set */
     public boolean hasFetchInterval() {
       return fieldSetFlags()[4];
     }
-    
+
     /** Clears the value of the 'fetchInterval' field */
     public org.apache.nutch.storage.WebPage.Builder clearFetchInterval() {
       fieldSetFlags()[4] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'retriesSinceFetch' field */
     public java.lang.Integer getRetriesSinceFetch() {
       return retriesSinceFetch;
     }
-    
+
     /** Sets the value of the 'retriesSinceFetch' field */
-    public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(int value) {
+    public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(
+        int value) {
       validate(fields()[5], value);
       this.retriesSinceFetch = value;
       fieldSetFlags()[5] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'retriesSinceFetch' field has been set */
     public boolean hasRetriesSinceFetch() {
       return fieldSetFlags()[5];
     }
-    
+
     /** Clears the value of the 'retriesSinceFetch' field */
     public org.apache.nutch.storage.WebPage.Builder clearRetriesSinceFetch() {
       fieldSetFlags()[5] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'modifiedTime' field */
     public java.lang.Long getModifiedTime() {
       return modifiedTime;
     }
-    
+
     /** Sets the value of the 'modifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder setModifiedTime(long value) {
       validate(fields()[6], value);
       this.modifiedTime = value;
       fieldSetFlags()[6] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'modifiedTime' field has been set */
     public boolean hasModifiedTime() {
       return fieldSetFlags()[6];
     }
-    
+
     /** Clears the value of the 'modifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearModifiedTime() {
       fieldSetFlags()[6] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevModifiedTime' field */
     public java.lang.Long getPrevModifiedTime() {
       return prevModifiedTime;
     }
-    
+
     /** Sets the value of the 'prevModifiedTime' field */
-    public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(long value) {
+    public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(
+        long value) {
       validate(fields()[7], value);
       this.prevModifiedTime = value;
       fieldSetFlags()[7] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevModifiedTime' field has been set */
     public boolean hasPrevModifiedTime() {
       return fieldSetFlags()[7];
     }
-    
+
     /** Clears the value of the 'prevModifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevModifiedTime() {
       fieldSetFlags()[7] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'protocolStatus' field */
     public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
       return protocolStatus;
     }
-    
+
     /** Sets the value of the 'protocolStatus' field */
-    public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
+    public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(
+        org.apache.nutch.storage.ProtocolStatus value) {
       validate(fields()[8], value);
       this.protocolStatus = value;
       fieldSetFlags()[8] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'protocolStatus' field has been set */
     public boolean hasProtocolStatus() {
       return fieldSetFlags()[8];
     }
-    
+
     /** Clears the value of the 'protocolStatus' field */
     public org.apache.nutch.storage.WebPage.Builder clearProtocolStatus() {
       protocolStatus = null;
@@ -1206,25 +1581,26 @@
       fieldSetFlags()[8] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'content' field */
     public java.nio.ByteBuffer getContent() {
       return content;
     }
-    
+
     /** Sets the value of the 'content' field */
-    public org.apache.nutch.storage.WebPage.Builder setContent(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setContent(
+        java.nio.ByteBuffer value) {
       validate(fields()[9], value);
       this.content = value;
       fieldSetFlags()[9] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'content' field has been set */
     public boolean hasContent() {
       return fieldSetFlags()[9];
     }
-    
+
     /** Clears the value of the 'content' field */
     public org.apache.nutch.storage.WebPage.Builder clearContent() {
       content = null;
@@ -1231,25 +1607,26 @@
       fieldSetFlags()[9] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'contentType' field */
     public java.lang.CharSequence getContentType() {
       return contentType;
     }
-    
+
     /** Sets the value of the 'contentType' field */
-    public org.apache.nutch.storage.WebPage.Builder setContentType(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setContentType(
+        java.lang.CharSequence value) {
       validate(fields()[10], value);
       this.contentType = value;
       fieldSetFlags()[10] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'contentType' field has been set */
     public boolean hasContentType() {
       return fieldSetFlags()[10];
     }
-    
+
     /** Clears the value of the 'contentType' field */
     public org.apache.nutch.storage.WebPage.Builder clearContentType() {
       contentType = null;
@@ -1256,25 +1633,26 @@
       fieldSetFlags()[10] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevSignature' field */
     public java.nio.ByteBuffer getPrevSignature() {
       return prevSignature;
     }
-    
+
     /** Sets the value of the 'prevSignature' field */
-    public org.apache.nutch.storage.WebPage.Builder setPrevSignature(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setPrevSignature(
+        java.nio.ByteBuffer value) {
       validate(fields()[11], value);
       this.prevSignature = value;
       fieldSetFlags()[11] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevSignature' field has been set */
     public boolean hasPrevSignature() {
       return fieldSetFlags()[11];
     }
-    
+
     /** Clears the value of the 'prevSignature' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevSignature() {
       prevSignature = null;
@@ -1281,25 +1659,26 @@
       fieldSetFlags()[11] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'signature' field */
     public java.nio.ByteBuffer getSignature() {
       return signature;
     }
-    
+
     /** Sets the value of the 'signature' field */
-    public org.apache.nutch.storage.WebPage.Builder setSignature(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setSignature(
+        java.nio.ByteBuffer value) {
       validate(fields()[12], value);
       this.signature = value;
       fieldSetFlags()[12] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'signature' field has been set */
     public boolean hasSignature() {
       return fieldSetFlags()[12];
     }
-    
+
     /** Clears the value of the 'signature' field */
     public org.apache.nutch.storage.WebPage.Builder clearSignature() {
       signature = null;
@@ -1306,25 +1685,26 @@
       fieldSetFlags()[12] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'title' field */
     public java.lang.CharSequence getTitle() {
       return title;
     }
-    
+
     /** Sets the value of the 'title' field */
-    public org.apache.nutch.storage.WebPage.Builder setTitle(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setTitle(
+        java.lang.CharSequence value) {
       validate(fields()[13], value);
       this.title = value;
       fieldSetFlags()[13] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'title' field has been set */
     public boolean hasTitle() {
       return fieldSetFlags()[13];
     }
-    
+
     /** Clears the value of the 'title' field */
     public org.apache.nutch.storage.WebPage.Builder clearTitle() {
       title = null;
@@ -1331,25 +1711,26 @@
       fieldSetFlags()[13] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'text' field */
     public java.lang.CharSequence getText() {
       return text;
     }
-    
+
     /** Sets the value of the 'text' field */
-    public org.apache.nutch.storage.WebPage.Builder setText(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setText(
+        java.lang.CharSequence value) {
       validate(fields()[14], value);
       this.text = value;
       fieldSetFlags()[14] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'text' field has been set */
     public boolean hasText() {
       return fieldSetFlags()[14];
     }
-    
+
     /** Clears the value of the 'text' field */
     public org.apache.nutch.storage.WebPage.Builder clearText() {
       text = null;
@@ -1356,25 +1737,26 @@
       fieldSetFlags()[14] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'parseStatus' field */
     public org.apache.nutch.storage.ParseStatus getParseStatus() {
       return parseStatus;
     }
-    
+
     /** Sets the value of the 'parseStatus' field */
-    public org.apache.nutch.storage.WebPage.Builder setParseStatus(org.apache.nutch.storage.ParseStatus value) {
+    public org.apache.nutch.storage.WebPage.Builder setParseStatus(
+        org.apache.nutch.storage.ParseStatus value) {
       validate(fields()[15], value);
       this.parseStatus = value;
       fieldSetFlags()[15] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'parseStatus' field has been set */
     public boolean hasParseStatus() {
       return fieldSetFlags()[15];
     }
-    
+
     /** Clears the value of the 'parseStatus' field */
     public org.apache.nutch.storage.WebPage.Builder clearParseStatus() {
       parseStatus = null;
@@ -1381,49 +1763,50 @@
       fieldSetFlags()[15] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'score' field */
     public java.lang.Float getScore() {
       return score;
     }
-    
+
     /** Sets the value of the 'score' field */
     public org.apache.nutch.storage.WebPage.Builder setScore(float value) {
       validate(fields()[16], value);
       this.score = value;
       fieldSetFlags()[16] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'score' field has been set */
     public boolean hasScore() {
       return fieldSetFlags()[16];
     }
-    
+
     /** Clears the value of the 'score' field */
     public org.apache.nutch.storage.WebPage.Builder clearScore() {
       fieldSetFlags()[16] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'reprUrl' field */
     public java.lang.CharSequence getReprUrl() {
       return reprUrl;
     }
-    
+
     /** Sets the value of the 'reprUrl' field */
-    public org.apache.nutch.storage.WebPage.Builder setReprUrl(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setReprUrl(
+        java.lang.CharSequence value) {
       validate(fields()[17], value);
       this.reprUrl = value;
       fieldSetFlags()[17] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'reprUrl' field has been set */
     public boolean hasReprUrl() {
       return fieldSetFlags()[17];
     }
-    
+
     /** Clears the value of the 'reprUrl' field */
     public org.apache.nutch.storage.WebPage.Builder clearReprUrl() {
       reprUrl = null;
@@ -1430,25 +1813,26 @@
       fieldSetFlags()[17] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'headers' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
       return headers;
     }
-    
+
     /** Sets the value of the 'headers' field */
-    public org.apache.nutch.storage.WebPage.Builder setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setHeaders(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[18], value);
       this.headers = value;
       fieldSetFlags()[18] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'headers' field has been set */
     public boolean hasHeaders() {
       return fieldSetFlags()[18];
     }
-    
+
     /** Clears the value of the 'headers' field */
     public org.apache.nutch.storage.WebPage.Builder clearHeaders() {
       headers = null;
@@ -1455,25 +1839,26 @@
       fieldSetFlags()[18] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'outlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
       return outlinks;
     }
-    
+
     /** Sets the value of the 'outlinks' field */
-    public org.apache.nutch.storage.WebPage.Builder setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[19], value);
       this.outlinks = value;
       fieldSetFlags()[19] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'outlinks' field has been set */
     public boolean hasOutlinks() {
       return fieldSetFlags()[19];
     }
-    
+
     /** Clears the value of the 'outlinks' field */
     public org.apache.nutch.storage.WebPage.Builder clearOutlinks() {
       outlinks = null;
@@ -1480,25 +1865,26 @@
       fieldSetFlags()[19] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'inlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
       return inlinks;
     }
-    
+
     /** Sets the value of the 'inlinks' field */
-    public org.apache.nutch.storage.WebPage.Builder setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[20], value);
       this.inlinks = value;
       fieldSetFlags()[20] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'inlinks' field has been set */
     public boolean hasInlinks() {
       return fieldSetFlags()[20];
     }
-    
+
     /** Clears the value of the 'inlinks' field */
     public org.apache.nutch.storage.WebPage.Builder clearInlinks() {
       inlinks = null;
@@ -1505,25 +1891,26 @@
       fieldSetFlags()[20] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'markers' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
       return markers;
     }
-    
+
     /** Sets the value of the 'markers' field */
-    public org.apache.nutch.storage.WebPage.Builder setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setMarkers(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[21], value);
       this.markers = value;
       fieldSetFlags()[21] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'markers' field has been set */
     public boolean hasMarkers() {
       return fieldSetFlags()[21];
     }
-    
+
     /** Clears the value of the 'markers' field */
     public org.apache.nutch.storage.WebPage.Builder clearMarkers() {
       markers = null;
@@ -1530,25 +1917,26 @@
       fieldSetFlags()[21] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'metadata' field */
-    public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
       return metadata;
     }
-    
+
     /** Sets the value of the 'metadata' field */
-    public org.apache.nutch.storage.WebPage.Builder setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+    public org.apache.nutch.storage.WebPage.Builder setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
       validate(fields()[22], value);
       this.metadata = value;
       fieldSetFlags()[22] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'metadata' field has been set */
     public boolean hasMetadata() {
       return fieldSetFlags()[22];
     }
-    
+
     /** Clears the value of the 'metadata' field */
     public org.apache.nutch.storage.WebPage.Builder clearMetadata() {
       metadata = null;
@@ -1555,25 +1943,26 @@
       fieldSetFlags()[22] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'batchId' field */
     public java.lang.CharSequence getBatchId() {
       return batchId;
     }
-    
+
     /** Sets the value of the 'batchId' field */
-    public org.apache.nutch.storage.WebPage.Builder setBatchId(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setBatchId(
+        java.lang.CharSequence value) {
       validate(fields()[23], value);
       this.batchId = value;
       fieldSetFlags()[23] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'batchId' field has been set */
     public boolean hasBatchId() {
       return fieldSetFlags()[23];
     }
-    
+
     /** Clears the value of the 'batchId' field */
     public org.apache.nutch.storage.WebPage.Builder clearBatchId() {
       batchId = null;
@@ -1580,35 +1969,64 @@
       fieldSetFlags()[23] = false;
       return this;
     }
-    
+
     @Override
     public WebPage build() {
       try {
         WebPage record = new WebPage();
-        record.baseUrl = fieldSetFlags()[0] ? this.baseUrl : (java.lang.CharSequence) defaultValue(fields()[0]);
-        record.status = fieldSetFlags()[1] ? this.status : (java.lang.Integer) defaultValue(fields()[1]);
-        record.fetchTime = fieldSetFlags()[2] ? this.fetchTime : (java.lang.Long) defaultValue(fields()[2]);
-        record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime : (java.lang.Long) defaultValue(fields()[3]);
-        record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval : (java.lang.Integer) defaultValue(fields()[4]);
-        record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch : (java.lang.Integer) defaultValue(fields()[5]);
-        record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime : (java.lang.Long) defaultValue(fields()[6]);
-        record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime : (java.lang.Long) defaultValue(fields()[7]);
-        record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
-        record.content = fieldSetFlags()[9] ? this.content : (java.nio.ByteBuffer) defaultValue(fields()[9]);
-        record.contentType = fieldSetFlags()[10] ? this.contentType : (java.lang.CharSequence) defaultValue(fields()[10]);
-        record.prevSignature = fieldSetFlags()[11] ? this.prevSignature : (java.nio.ByteBuffer) defaultValue(fields()[11]);
-        record.signature = fieldSetFlags()[12] ? this.signature : (java.nio.ByteBuffer) defaultValue(fields()[12]);
-        record.title = fieldSetFlags()[13] ? this.title : (java.lang.CharSequence) defaultValue(fields()[13]);
-        record.text = fieldSetFlags()[14] ? this.text : (java.lang.CharSequence) defaultValue(fields()[14]);
-        record.parseStatus = fieldSetFlags()[15] ? this.parseStatus : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
-        record.score = fieldSetFlags()[16] ? this.score : (java.lang.Float) defaultValue(fields()[16]);
-        record.reprUrl = fieldSetFlags()[17] ? this.reprUrl : (java.lang.CharSequence) defaultValue(fields()[17]);
-        record.headers = fieldSetFlags()[18] ? this.headers : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[18]));
-        record.outlinks = fieldSetFlags()[19] ? this.outlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[19]));
-        record.inlinks = fieldSetFlags()[20] ? this.inlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[20]));
-        record.markers = fieldSetFlags()[21] ? this.markers : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[21]));
-        record.metadata = fieldSetFlags()[22] ? this.metadata : (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[22]));
-        record.batchId = fieldSetFlags()[23] ? this.batchId : (java.lang.CharSequence) defaultValue(fields()[23]);
+        record.baseUrl = fieldSetFlags()[0] ? this.baseUrl
+            : (java.lang.CharSequence) defaultValue(fields()[0]);
+        record.status = fieldSetFlags()[1] ? this.status
+            : (java.lang.Integer) defaultValue(fields()[1]);
+        record.fetchTime = fieldSetFlags()[2] ? this.fetchTime
+            : (java.lang.Long) defaultValue(fields()[2]);
+        record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime
+            : (java.lang.Long) defaultValue(fields()[3]);
+        record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval
+            : (java.lang.Integer) defaultValue(fields()[4]);
+        record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch
+            : (java.lang.Integer) defaultValue(fields()[5]);
+        record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime
+            : (java.lang.Long) defaultValue(fields()[6]);
+        record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime
+            : (java.lang.Long) defaultValue(fields()[7]);
+        record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus
+            : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
+        record.content = fieldSetFlags()[9] ? this.content
+            : (java.nio.ByteBuffer) defaultValue(fields()[9]);
+        record.contentType = fieldSetFlags()[10] ? this.contentType
+            : (java.lang.CharSequence) defaultValue(fields()[10]);
+        record.prevSignature = fieldSetFlags()[11] ? this.prevSignature
+            : (java.nio.ByteBuffer) defaultValue(fields()[11]);
+        record.signature = fieldSetFlags()[12] ? this.signature
+            : (java.nio.ByteBuffer) defaultValue(fields()[12]);
+        record.title = fieldSetFlags()[13] ? this.title
+            : (java.lang.CharSequence) defaultValue(fields()[13]);
+        record.text = fieldSetFlags()[14] ? this.text
+            : (java.lang.CharSequence) defaultValue(fields()[14]);
+        record.parseStatus = fieldSetFlags()[15] ? this.parseStatus
+            : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
+        record.score = fieldSetFlags()[16] ? this.score
+            : (java.lang.Float) defaultValue(fields()[16]);
+        record.reprUrl = fieldSetFlags()[17] ? this.reprUrl
+            : (java.lang.CharSequence) defaultValue(fields()[17]);
+        record.headers = fieldSetFlags()[18] ? this.headers
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[18]));
+        record.outlinks = fieldSetFlags()[19] ? this.outlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[19]));
+        record.inlinks = fieldSetFlags()[20] ? this.inlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[20]));
+        record.markers = fieldSetFlags()[21] ? this.markers
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[21]));
+        record.metadata = fieldSetFlags()[22] ? this.metadata
+            : (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[22]));
+        record.batchId = fieldSetFlags()[23] ? this.batchId
+            : (java.lang.CharSequence) defaultValue(fields()[23]);
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -1615,575 +2033,847 @@
       }
     }
   }
-  
-  public WebPage.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public WebPage.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public WebPage newInstance(){
+  public WebPage newInstance() {
     return newBuilder().build();
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends WebPage implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'baseUrl' field.
-	   * The original associated with this WebPage.	   */
-	  public java.lang.CharSequence getBaseUrl() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'baseUrl' field.
-	   * The original associated with this WebPage.	   * @param value the value to set.
-	   */
-	  public void setBaseUrl(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The original associated with this WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isBaseUrlDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'status' field.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   */
-	  public java.lang.Integer getStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'status' field.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   * @param value the value to set.
-	   */
-	  public void setStatus(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   * @param value the value to set.
-	   */
-	  public boolean isStatusDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'fetchTime' field.
-	   * The system time in milliseconds for when the page was fetched.	   */
-	  public java.lang.Long getFetchTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'fetchTime' field.
-	   * The system time in milliseconds for when the page was fetched.	   * @param value the value to set.
-	   */
-	  public void setFetchTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when the page was fetched.	   * @param value the value to set.
-	   */
-	  public boolean isFetchTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevFetchTime' field.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   */
-	  public java.lang.Long getPrevFetchTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevFetchTime' field.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   * @param value the value to set.
-	   */
-	  public void setPrevFetchTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   * @param value the value to set.
-	   */
-	  public boolean isPrevFetchTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'fetchInterval' field.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   */
-	  public java.lang.Integer getFetchInterval() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'fetchInterval' field.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   * @param value the value to set.
-	   */
-	  public void setFetchInterval(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   * @param value the value to set.
-	   */
-	  public boolean isFetchIntervalDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'retriesSinceFetch' field.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   */
-	  public java.lang.Integer getRetriesSinceFetch() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'retriesSinceFetch' field.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   * @param value the value to set.
-	   */
-	  public void setRetriesSinceFetch(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   * @param value the value to set.
-	   */
-	  public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'modifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   */
-	  public java.lang.Long getModifiedTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'modifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   * @param value the value to set.
-	   */
-	  public void setModifiedTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isModifiedTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevModifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   */
-	  public java.lang.Long getPrevModifiedTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevModifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   * @param value the value to set.
-	   */
-	  public void setPrevModifiedTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'protocolStatus' field.
-		   */
-	  public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'protocolStatus' field.
-		   * @param value the value to set.
-	   */
-	  public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-		   * @param value the value to set.
-	   */
-	  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'content' field.
-	   * The entire raw document content e.g. raw XHTML	   */
-	  public java.nio.ByteBuffer getContent() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'content' field.
-	   * The entire raw document content e.g. raw XHTML	   * @param value the value to set.
-	   */
-	  public void setContent(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The entire raw document content e.g. raw XHTML	   * @param value the value to set.
-	   */
-	  public boolean isContentDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'contentType' field.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   */
-	  public java.lang.CharSequence getContentType() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'contentType' field.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   * @param value the value to set.
-	   */
-	  public void setContentType(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   * @param value the value to set.
-	   */
-	  public boolean isContentTypeDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevSignature' field.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   */
-	  public java.nio.ByteBuffer getPrevSignature() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevSignature' field.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   * @param value the value to set.
-	   */
-	  public void setPrevSignature(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   * @param value the value to set.
-	   */
-	  public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'signature' field.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   */
-	  public java.nio.ByteBuffer getSignature() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'signature' field.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   * @param value the value to set.
-	   */
-	  public void setSignature(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   * @param value the value to set.
-	   */
-	  public boolean isSignatureDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'title' field.
-	   * The title of the WebPage.	   */
-	  public java.lang.CharSequence getTitle() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'title' field.
-	   * The title of the WebPage.	   * @param value the value to set.
-	   */
-	  public void setTitle(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The title of the WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isTitleDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'text' field.
-	   * The textual content of the WebPage devoid from native markup.	   */
-	  public java.lang.CharSequence getText() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'text' field.
-	   * The textual content of the WebPage devoid from native markup.	   * @param value the value to set.
-	   */
-	  public void setText(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The textual content of the WebPage devoid from native markup.	   * @param value the value to set.
-	   */
-	  public boolean isTextDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'parseStatus' field.
-		   */
-	  public org.apache.nutch.storage.ParseStatus getParseStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'parseStatus' field.
-		   * @param value the value to set.
-	   */
-	  public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-		   * @param value the value to set.
-	   */
-	  public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'score' field.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   */
-	  public java.lang.Float getScore() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'score' field.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   * @param value the value to set.
-	   */
-	  public void setScore(java.lang.Float value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   * @param value the value to set.
-	   */
-	  public boolean isScoreDirty(java.lang.Float value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'reprUrl' field.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   */
-	  public java.lang.CharSequence getReprUrl() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'reprUrl' field.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   * @param value the value to set.
-	   */
-	  public void setReprUrl(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   * @param value the value to set.
-	   */
-	  public boolean isReprUrlDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'headers' field.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'headers' field.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   * @param value the value to set.
-	   */
-	  public void setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   * @param value the value to set.
-	   */
-	  public boolean isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'outlinks' field.
-	   * Embedded hyperlinks which direct outside of the current domain.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'outlinks' field.
-	   * Embedded hyperlinks which direct outside of the current domain.	   * @param value the value to set.
-	   */
-	  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Embedded hyperlinks which direct outside of the current domain.	   * @param value the value to set.
-	   */
-	  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'inlinks' field.
-	   * Embedded hyperlinks which link to pages within the current domain.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'inlinks' field.
-	   * Embedded hyperlinks which link to pages within the current domain.	   * @param value the value to set.
-	   */
-	  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Embedded hyperlinks which link to pages within the current domain.	   * @param value the value to set.
-	   */
-	  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'markers' field.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'markers' field.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   * @param value the value to set.
-	   */
-	  public void setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isMarkersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'metadata' field.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   */
-	  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'metadata' field.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   * @param value the value to set.
-	   */
-	  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'batchId' field.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   */
-	  public java.lang.CharSequence getBatchId() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'batchId' field.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   * @param value the value to set.
-	   */
-	  public void setBatchId(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   * @param value the value to set.
-	   */
-	  public boolean isBatchIdDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends WebPage implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'baseUrl' field. The original associated with this
+     * WebPage.
+     */
+    public java.lang.CharSequence getBaseUrl() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'baseUrl' field. The original associated with this
+     * WebPage. * @param value the value to set.
+     */
+    public void setBaseUrl(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * original associated with this WebPage. * @param value the value to set.
+     */
+    public boolean isBaseUrlDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'status' field. A crawl status associated with the
+     * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+     * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
+     * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
+     * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
+     * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
+     * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
+     * not modified
+     */
+    public java.lang.Integer getStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'status' field. A crawl status associated with the
+     * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+     * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
+     * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
+     * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
+     * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
+     * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
+     * not modified * @param value the value to set.
+     */
+    public void setStatus(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'status' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * crawl status associated with the WebPage, can be of value
+     * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
+     * was successfully fetched, STATUS_GONE - WebPage no longer exists,
+     * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
+     * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+     * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+     * errors and STATUS_NOTMODIFIED - fetching successful - page is not
+     * modified * @param value the value to set.
+     */
+    public boolean isStatusDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'fetchTime' field. The system time in milliseconds
+     * for when the page was fetched.
+     */
+    public java.lang.Long getFetchTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'fetchTime' field. The system time in milliseconds
+     * for when the page was fetched. * @param value the value to set.
+     */
+    public void setFetchTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when the page was fetched. * @param value
+     * the value to set.
+     */
+    public boolean isFetchTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevFetchTime' field. The system time in
+     * milliseconds for when the page was last fetched if it was previously
+     * fetched which can be used to calculate time delta within a fetching
+     * schedule implementation
+     */
+    public java.lang.Long getPrevFetchTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevFetchTime' field. The system time in
+     * milliseconds for when the page was last fetched if it was previously
+     * fetched which can be used to calculate time delta within a fetching
+     * schedule implementation * @param value the value to set.
+     */
+    public void setPrevFetchTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when the page was last fetched if it was
+     * previously fetched which can be used to calculate time delta within a
+     * fetching schedule implementation * @param value the value to set.
+     */
+    public boolean isPrevFetchTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'fetchInterval' field. The default number of
+     * seconds between re-fetches of a page. The default is considered as 30
+     * days unless a custom fetch schedle is implemented.
+     */
+    public java.lang.Integer getFetchInterval() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'fetchInterval' field. The default number of
+     * seconds between re-fetches of a page. The default is considered as 30
+     * days unless a custom fetch schedle is implemented. * @param value the
+     * value to set.
+     */
+    public void setFetchInterval(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * default number of seconds between re-fetches of a page. The default is
+     * considered as 30 days unless a custom fetch schedle is implemented. * @param
+     * value the value to set.
+     */
+    public boolean isFetchIntervalDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'retriesSinceFetch' field. The number of retried
+     * attempts at fetching the WebPage since it was last successfully fetched.
+     */
+    public java.lang.Integer getRetriesSinceFetch() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'retriesSinceFetch' field. The number of retried
+     * attempts at fetching the WebPage since it was last successfully fetched.
+     * * @param value the value to set.
+     */
+    public void setRetriesSinceFetch(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'retriesSinceFetch' field. A field is
+     * dirty if it represents a change that has not yet been written to the
+     * database. The number of retried attempts at fetching the WebPage since it
+     * was last successfully fetched. * @param value the value to set.
+     */
+    public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'modifiedTime' field. The system time in
+     * milliseconds for when this WebPage was modified by the WebPage author, if
+     * this is not available we default to the server for this information. This
+     * is important to understand the changing nature of the WebPage.
+     */
+    public java.lang.Long getModifiedTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'modifiedTime' field. The system time in
+     * milliseconds for when this WebPage was modified by the WebPage author, if
+     * this is not available we default to the server for this information. This
+     * is important to understand the changing nature of the WebPage. * @param
+     * value the value to set.
+     */
+    public void setModifiedTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'modifiedTime' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when this WebPage was modified by the
+     * WebPage author, if this is not available we default to the server for
+     * this information. This is important to understand the changing nature of
+     * the WebPage. * @param value the value to set.
+     */
+    public boolean isModifiedTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevModifiedTime' field. The system time in
+     * milliseconds for when this WebPage was previously modified by the author,
+     * if this is not available then we default to the server for this
+     * information. This is important to understand the changing nature of a
+     * WebPage.
+     */
+    public java.lang.Long getPrevModifiedTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevModifiedTime' field. The system time in
+     * milliseconds for when this WebPage was previously modified by the author,
+     * if this is not available then we default to the server for this
+     * information. This is important to understand the changing nature of a
+     * WebPage. * @param value the value to set.
+     */
+    public void setPrevModifiedTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
+     * if it represents a change that has not yet been written to the database.
+     * The system time in milliseconds for when this WebPage was previously
+     * modified by the author, if this is not available then we default to the
+     * server for this information. This is important to understand the changing
+     * nature of a WebPage. * @param value the value to set.
+     */
+    public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'protocolStatus' field.
+     */
+    public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'protocolStatus' field.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'protocolStatus' field. A field is dirty
+     * if it represents a change that has not yet been written to the database.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public boolean isProtocolStatusDirty(
+        org.apache.nutch.storage.ProtocolStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'content' field. The entire raw document content
+     * e.g. raw XHTML
+     */
+    public java.nio.ByteBuffer getContent() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'content' field. The entire raw document content
+     * e.g. raw XHTML * @param value the value to set.
+     */
+    public void setContent(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'content' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * entire raw document content e.g. raw XHTML * @param value the value to
+     * set.
+     */
+    public boolean isContentDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'contentType' field. The type of the content
+     * contained within the document itself. ContentType is an alias for
+     * MimeType. Historically, this parameter was only called MimeType, but
+     * since this is actually the value included in the HTTP Content-Type
+     * header, it can also include the character set encoding, which makes it
+     * more than just a MimeType specification. If MimeType is specified e.g.
+     * not None, that value is used. Otherwise, ContentType is used. If neither
+     * is given, the DEFAULT_CONTENT_TYPE setting is used.
+     */
+    public java.lang.CharSequence getContentType() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'contentType' field. The type of the content
+     * contained within the document itself. ContentType is an alias for
+     * MimeType. Historically, this parameter was only called MimeType, but
+     * since this is actually the value included in the HTTP Content-Type
+     * header, it can also include the character set encoding, which makes it
+     * more than just a MimeType specification. If MimeType is specified e.g.
+     * not None, that value is used. Otherwise, ContentType is used. If neither
+     * is given, the DEFAULT_CONTENT_TYPE setting is used. * @param value the
+     * value to set.
+     */
+    public void setContentType(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'contentType' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * type of the content contained within the document itself. ContentType is
+     * an alias for MimeType. Historically, this parameter was only called
+     * MimeType, but since this is actually the value included in the HTTP
+     * Content-Type header, it can also include the character set encoding,
+     * which makes it more than just a MimeType specification. If MimeType is
+     * specified e.g. not None, that value is used. Otherwise, ContentType is
+     * used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. * @param
+     * value the value to set.
+     */
+    public boolean isContentTypeDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevSignature' field. An implementation of a
+     * WebPage's previous signature from which it can be identified and
+     * referenced at any point in time. This can be used to uniquely identify
+     * WebPage deltas based on page fingerprints.
+     */
+    public java.nio.ByteBuffer getPrevSignature() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevSignature' field. An implementation of a
+     * WebPage's previous signature from which it can be identified and
+     * referenced at any point in time. This can be used to uniquely identify
+     * WebPage deltas based on page fingerprints. * @param value the value to
+     * set.
+     */
+    public void setPrevSignature(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevSignature' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. An
+     * implementation of a WebPage's previous signature from which it can be
+     * identified and referenced at any point in time. This can be used to
+     * uniquely identify WebPage deltas based on page fingerprints. * @param
+     * value the value to set.
+     */
+    public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'signature' field. An implementation of a WebPage's
+     * signature from which it can be identified and referenced at any point in
+     * time. This is essentially the WebPage's fingerprint represnting its state
+     * for any point in time.
+     */
+    public java.nio.ByteBuffer getSignature() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'signature' field. An implementation of a WebPage's
+     * signature from which it can be identified and referenced at any point in
+     * time. This is essentially the WebPage's fingerprint represnting its state
+     * for any point in time. * @param value the value to set.
+     */
+    public void setSignature(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'signature' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. An
+     * implementation of a WebPage's signature from which it can be identified
+     * and referenced at any point in time. This is essentially the WebPage's
+     * fingerprint represnting its state for any point in time. * @param value
+     * the value to set.
+     */
+    public boolean isSignatureDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'title' field. The title of the WebPage.
+     */
+    public java.lang.CharSequence getTitle() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'title' field. The title of the WebPage. * @param
+     * value the value to set.
+     */
+    public void setTitle(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'title' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * title of the WebPage. * @param value the value to set.
+     */
+    public boolean isTitleDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'text' field. The textual content of the WebPage
+     * devoid from native markup.
+     */
+    public java.lang.CharSequence getText() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'text' field. The textual content of the WebPage
+     * devoid from native markup. * @param value the value to set.
+     */
+    public void setText(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'text' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * textual content of the WebPage devoid from native markup. * @param value
+     * the value to set.
+     */
+    public boolean isTextDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'parseStatus' field.
+     */
+    public org.apache.nutch.storage.ParseStatus getParseStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'parseStatus' field.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'parseStatus' field. A field is dirty if
+     * it represents a change that has not yet been written to the database.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'score' field. A score used to determine a
+     * WebPage's relevance within the web graph it is part of. This score may
+     * change over time based on graph characteristics.
+     */
+    public java.lang.Float getScore() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'score' field. A score used to determine a
+     * WebPage's relevance within the web graph it is part of. This score may
+     * change over time based on graph characteristics. * @param value the value
+     * to set.
+     */
+    public void setScore(java.lang.Float value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'score' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * score used to determine a WebPage's relevance within the web graph it is
+     * part of. This score may change over time based on graph characteristics.
+     * * @param value the value to set.
+     */
+    public boolean isScoreDirty(java.lang.Float value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'reprUrl' field. In the case where we are given two
+     * urls, a source and a destination of a redirect, we should determine and
+     * persist the representative url. The logic used to determine this is based
+     * largely on Yahoo!'s Slurp Crawler
+     */
+    public java.lang.CharSequence getReprUrl() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'reprUrl' field. In the case where we are given two
+     * urls, a source and a destination of a redirect, we should determine and
+     * persist the representative url. The logic used to determine this is based
+     * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
+     */
+    public void setReprUrl(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. In the
+     * case where we are given two urls, a source and a destination of a
+     * redirect, we should determine and persist the representative url. The
+     * logic used to determine this is based largely on Yahoo!'s Slurp Crawler * @param
+     * value the value to set.
+     */
+    public boolean isReprUrlDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'headers' field. Header information returned from
+     * the web server used to server the content which is subsequently fetched
+     * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+     * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+     * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'headers' field. Header information returned from
+     * the web server used to server the content which is subsequently fetched
+     * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+     * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+     * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
+     * value to set.
+     */
+    public void setHeaders(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'headers' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Header
+     * information returned from the web server used to server the content which
+     * is subsequently fetched from. This includes keys such as
+     * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
+     * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
+     * LAST_MODIFIED and LOCATION. * @param value the value to set.
+     */
+    public boolean isHeadersDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
+     * outside of the current domain.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
+     * outside of the current domain. * @param value the value to set.
+     */
+    public void setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Embedded hyperlinks which direct outside of the current domain. * @param
+     * value the value to set.
+     */
+    public boolean isOutlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
+     * pages within the current domain.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
+     * pages within the current domain. * @param value the value to set.
+     */
+    public void setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Embedded hyperlinks which link to pages within the current domain. * @param
+     * value the value to set.
+     */
+    public boolean isInlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'markers' field. Markers flags which represent user
+     * and machine decisions which have affected influenced a WebPage's current
+     * state. Markers can be system specific and user machine driven in nature.
+     * They are assigned to a WebPage on a job-by-job basis and thier values
+     * indicative of what actions should be associated with a WebPage.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'markers' field. Markers flags which represent user
+     * and machine decisions which have affected influenced a WebPage's current
+     * state. Markers can be system specific and user machine driven in nature.
+     * They are assigned to a WebPage on a job-by-job basis and thier values
+     * indicative of what actions should be associated with a WebPage. * @param
+     * value the value to set.
+     */
+    public void setMarkers(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'markers' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Markers flags which represent user and machine decisions which have
+     * affected influenced a WebPage's current state. Markers can be system
+     * specific and user machine driven in nature. They are assigned to a
+     * WebPage on a job-by-job basis and thier values indicative of what actions
+     * should be associated with a WebPage. * @param value the value to set.
+     */
+    public boolean isMarkersDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'metadata' field. A multi-valued metadata container
+     * used for storing everything from structured WebPage characterists, to
+     * ad-hoc extraction and metadata augmentation for any given WebPage.
+     */
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'metadata' field. A multi-valued metadata container
+     * used for storing everything from structured WebPage characterists, to
+     * ad-hoc extraction and metadata augmentation for any given WebPage. * @param
+     * value the value to set.
+     */
+    public void setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'metadata' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * multi-valued metadata container used for storing everything from
+     * structured WebPage characterists, to ad-hoc extraction and metadata
+     * augmentation for any given WebPage. * @param value the value to set.
+     */
+    public boolean isMetadataDirty(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'batchId' field. A batchId that this WebPage is
+     * assigned to. WebPage's are fetched in batches, called fetchlists. Pages
+     * are partitioned but can always be associated and fetched alongside pages
+     * of similar value (within a crawl cycle) based on batchId.
+     */
+    public java.lang.CharSequence getBatchId() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'batchId' field. A batchId that this WebPage is
+     * assigned to. WebPage's are fetched in batches, called fetchlists. Pages
+     * are partitioned but can always be associated and fetched alongside pages
+     * of similar value (within a crawl cycle) based on batchId. * @param value
+     * the value to set.
+     */
+    public void setBatchId(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'batchId' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * batchId that this WebPage is assigned to. WebPage's are fetched in
+     * batches, called fetchlists. Pages are partitioned but can always be
+     * associated and fetched alongside pages of similar value (within a crawl
+     * cycle) based on batchId. * @param value the value to set.
+     */
+    public boolean isBatchIdDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/WebTableCreator.java
===================================================================
--- src/java/org/apache/nutch/storage/WebTableCreator.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/WebTableCreator.java	(working copy)
@@ -21,9 +21,8 @@
 
 public class WebTableCreator {
   public static void main(String[] args) throws Exception {
-    DataStore<String, WebPage> store =
-      StorageUtils.createWebStore(NutchConfiguration.create(), String.class,
-        WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        NutchConfiguration.create(), String.class, WebPage.class);
 
     System.out.println(store);
   }
Index: src/java/org/apache/nutch/storage/package-info.java
===================================================================
--- src/java/org/apache/nutch/storage/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * {@link org.apache.nutch.storage.Host host metadata}) of data in abstracted storage.
  */
 package org.apache.nutch.storage;
+
Index: src/java/org/apache/nutch/tools/Benchmark.java
===================================================================
--- src/java/org/apache/nutch/tools/Benchmark.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/Benchmark.java	(working copy)
@@ -50,7 +50,8 @@
     System.exit(res);
   }
 
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+  private void createSeeds(FileSystem fs, Path seedsDir, int count)
+      throws Exception {
     OutputStream os = fs.create(new Path(seedsDir, "seeds"));
     for (int i = 0; i < count; i++) {
       String url = "http://www.test-" + i + ".com/\r\n";
@@ -61,7 +62,7 @@
   }
 
   public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
     List<String> runs = new ArrayList<String>();
     List<String> stages = new ArrayList<String>();
     int seeds, depth, threads;
@@ -76,9 +77,9 @@
       if (!stages.contains(stage)) {
         stages.add(stage);
       }
-      Map<String,Long> t = timings.get(stage);
+      Map<String, Long> t = timings.get(stage);
       if (t == null) {
-        t = new HashMap<String,Long>();
+        t = new HashMap<String, Long>();
         timings.put(stage, t);
       }
       t.put(run, timing);
@@ -94,8 +95,9 @@
       sb.append("* TopN:\t" + topN + "\n");
       sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
       for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
+        Map<String, Long> timing = timings.get(stage);
+        if (timing == null)
+          continue;
         sb.append("- stage: " + stage + "\n");
         for (String r : runs) {
           Long Time = timing.get(r);
@@ -111,6 +113,7 @@
     public List<String> getStages() {
       return stages;
     }
+
     public List<String> getRuns() {
       return runs;
     }
@@ -121,21 +124,28 @@
     int seeds = 1;
     int depth = 10;
     int threads = 10;
-    //boolean delete = true;
+    // boolean delete = true;
     long topN = Long.MAX_VALUE;
 
     if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err
+          .println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
+      System.err
+          .println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err
+          .println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
       System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      System.err
+          .println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
       // XXX what is the equivalent here? not an additional job...
       // System.err.println("\t-keep\tkeep batchId data (default: delete after updatedb)");
       System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      System.err.println("\tNOTE: if not specified, this is reset to: "
+          + plugins);
+      System.err
+          .println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err
+          .println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
       return -1;
     }
     int maxPerHost = Integer.MAX_VALUE;
@@ -157,13 +167,14 @@
         return -1;
       }
     }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, plugins);
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN,
+        plugins);
     System.out.println(res);
     return 0;
   }
 
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, String plugins) throws Exception {
+  public BenchmarkResults benchmark(int seeds, int depth, int threads,
+      int maxPerHost, long topN, String plugins) throws Exception {
     Configuration conf = getConf();
     conf.set("http.proxy.host", "localhost");
     conf.setInt("http.proxy.port", 8181);
@@ -173,11 +184,12 @@
       conf.set("plugin.includes", plugins);
     }
     conf.setInt(GeneratorJob.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    conf.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     Job job = new NutchJob(conf);
     FileSystem fs = FileSystem.get(job.getConfiguration());
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"), "bench-"
+        + System.currentTimeMillis());
     fs.mkdirs(dir);
     Path rootUrlDir = new Path(dir, "seed");
     fs.mkdirs(rootUrlDir);
@@ -204,7 +216,7 @@
     ParserJob parseBatch = new ParserJob(conf);
     DbUpdaterJob crawlDbTool = new DbUpdaterJob(conf);
     // not needed in the new API
-    //LinkDb linkDbTool = new LinkDb(getConf());
+    // LinkDb linkDbTool = new LinkDb(getConf());
 
     long start = System.currentTimeMillis();
     // initialize crawlDb
@@ -212,10 +224,10 @@
     long delta = System.currentTimeMillis() - start;
     res.addTiming("inject", "0", delta);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new batch
+    for (i = 0; i < depth; i++) { // generate new batch
       start = System.currentTimeMillis();
       String batchId = generator.generate(topN, System.currentTimeMillis(),
-              false, false);
+          false, false);
       delta = System.currentTimeMillis() - start;
       res.addTiming("generate", i + "", delta);
       if (batchId == null) {
@@ -224,12 +236,12 @@
       }
       boolean isParsing = getConf().getBoolean("fetcher.parse", false);
       start = System.currentTimeMillis();
-      fetcher.fetch(batchId, threads, false, -1);  // fetch it
+      fetcher.fetch(batchId, threads, false, -1); // fetch it
       delta = System.currentTimeMillis() - start;
       res.addTiming("fetch", i + "", delta);
       if (!isParsing) {
         start = System.currentTimeMillis();
-        parseBatch.parse(batchId, false, false);    // parse it, if needed
+        parseBatch.parse(batchId, false, false); // parse it, if needed
         delta = System.currentTimeMillis() - start;
         res.addTiming("parse", i + "", delta);
       }
@@ -241,7 +253,9 @@
     if (i == 0) {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     res.elapsed = System.currentTimeMillis() - res.elapsed;
     WebTableReader dbreader = new WebTableReader();
     dbreader.setConf(conf);
Index: src/java/org/apache/nutch/tools/DmozParser.java
===================================================================
--- src/java/org/apache/nutch/tools/DmozParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/DmozParser.java	(working copy)
@@ -40,17 +40,16 @@
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.TableUtil;
 
-
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
   public static final Logger LOG = LoggerFactory.getLogger(DmozParser.class);
-  
-    long pages = 0;
-    private static DataStore<String, WebPage> store = null;
-    
+
+  long pages = 0;
+  private static DataStore<String, WebPage> store = null;
+
   /**
-   * This filter fixes characters that might offend our parser.
-   * This lets us be tolerant of errors that might appear in the input XML.
+   * This filter fixes characters that might offend our parser. This lets us be
+   * tolerant of errors that might appear in the input XML.
    */
   private static class XMLCharFilter extends FilterReader {
     private boolean lastBad = false;
@@ -62,9 +61,9 @@
     public int read() throws IOException {
       int c = in.read();
       int value = c;
-      if (c != -1 && !(XMLChar.isValid(c)))     // fix invalid characters
+      if (c != -1 && !(XMLChar.isValid(c))) // fix invalid characters
         value = 'X';
-      else if (lastBad && c == '<') {           // fix mis-matched brackets
+      else if (lastBad && c == '<') { // fix mis-matched brackets
         in.mark(1);
         if (in.read() != '/')
           value = 'X';
@@ -75,21 +74,20 @@
       return value;
     }
 
-    public int read(char[] cbuf, int off, int len)
-      throws IOException {
+    public int read(char[] cbuf, int off, int len) throws IOException {
       int n = in.read(cbuf, off, len);
       if (n != -1) {
         for (int i = 0; i < n; i++) {
-          char c = cbuf[off+i];
+          char c = cbuf[off + i];
           char value = c;
-          if (!(XMLChar.isValid(c)))            // fix invalid characters
+          if (!(XMLChar.isValid(c))) // fix invalid characters
             value = 'X';
-          else if (lastBad && c == '<') {       // fix mis-matched brackets
-            if (i != n-1 && cbuf[off+i+1] != '/')
+          else if (lastBad && c == '<') { // fix mis-matched brackets
+            if (i != n - 1 && cbuf[off + i + 1] != '/')
               value = 'X';
           }
           lastBad = (c == 65533);
-          cbuf[off+i] = value;
+          cbuf[off + i] = value;
         }
       }
       return n;
@@ -96,16 +94,15 @@
     }
   }
 
-
   /**
-   * The RDFProcessor receives tag messages during a parse
-   * of RDF XML data.  We build whatever structures we need
-   * from these messages.
+   * The RDFProcessor receives tag messages during a parse of RDF XML data. We
+   * build whatever structures we need from these messages.
    */
   private class RDFProcessor extends DefaultHandler {
     String curURL = null, curSection = null;
-    boolean titlePending = false, descPending = false, insideAdultSection = false;
-    Pattern topicPattern = null; 
+    boolean titlePending = false, descPending = false,
+        insideAdultSection = false;
+    Pattern topicPattern = null;
     StringBuffer title = new StringBuffer(), desc = new StringBuffer();
     XMLReader reader;
     int subsetDenom;
@@ -115,16 +112,18 @@
     Locator location;
 
     /**
-     * Pass in an XMLReader, plus a flag as to whether we 
-     * should include adult material.
+     * Pass in an XMLReader, plus a flag as to whether we should include adult
+     * material.
      */
-    public RDFProcessor(XMLReader reader, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern, boolean snippet) throws IOException {
+    public RDFProcessor(XMLReader reader, int subsetDenom,
+        boolean includeAdult, int skew, Pattern topicPattern, boolean snippet)
+        throws IOException {
       this.reader = reader;
       this.subsetDenom = subsetDenom;
       this.includeAdult = includeAdult;
       this.topicPattern = topicPattern;
       this.snippet = snippet;
-      
+
       this.hashSkew = skew != 0 ? skew : new Random().nextInt();
     }
 
@@ -135,20 +134,21 @@
     /**
      * Start of an XML elt
      */
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
+    public void startElement(String namespaceURI, String localName,
+        String qName, Attributes atts) throws SAXException {
       if ("Topic".equals(qName)) {
         curSection = atts.getValue("r:id");
       } else if ("ExternalPage".equals(qName)) {
         // Porn filter
-        if ((! includeAdult) && curSection.startsWith("Top/Adult")) {
+        if ((!includeAdult) && curSection.startsWith("Top/Adult")) {
           return;
         }
-          
+
         if (topicPattern != null && !topicPattern.matcher(curSection).matches()) {
           return;
         }
 
-        // Subset denominator filter.  
+        // Subset denominator filter.
         // Only emit with a chance of 1/denominator.
         String url = atts.getValue("about");
         int hashValue = MD5Hash.digest(url).hashCode();
@@ -181,40 +181,42 @@
      * Termination of XML elt
      */
     public void endElement(String namespaceURI, String localName, String qName)
-      throws SAXException {
+        throws SAXException {
       if (curURL != null) {
         if ("ExternalPage".equals(qName)) {
           //
-          // Inc the number of pages, insert the page, and 
+          // Inc the number of pages, insert the page, and
           // possibly print status.
           //
-          if(snippet){
+          if (snippet) {
             try {
               String reversedUrl = TableUtil.reverseUrl(curURL);
               WebPage row = store.get(reversedUrl);
-              
-              if(row!=null){
+
+              if (row != null) {
                 if (desc.length() > 0) {
-                  row.getMetadata().put(new Utf8("_dmoz_desc_"), ByteBuffer.wrap(desc.toString().getBytes()));
+                  row.getMetadata().put(new Utf8("_dmoz_desc_"),
+                      ByteBuffer.wrap(desc.toString().getBytes()));
                   desc.delete(0, desc.length());
                 }
                 if (title.length() > 0) {
-                  row.getMetadata().put(new Utf8("_dmoz_title_"), ByteBuffer.wrap(title.toString().getBytes()));
+                  row.getMetadata().put(new Utf8("_dmoz_title_"),
+                      ByteBuffer.wrap(title.toString().getBytes()));
                   title.delete(0, title.length());
                 }
                 store.put(reversedUrl, row);
                 store.flush();
               }
-              
-             } catch (IOException e) {
+
+            } catch (IOException e) {
               // TODO Auto-generated catch block
               e.printStackTrace();
-             }
+            }
           } else {
-            System.out.println(curURL); 
-            
+            System.out.println(curURL);
+
             //
-            // Clear out the link text.  This is what
+            // Clear out the link text. This is what
             // you would use for adding to the linkdb.
             //
             if (desc.length() > 0) {
@@ -225,7 +227,7 @@
             }
           }
           pages++;
-          
+
           // Null out the URL.
           curURL = null;
         } else if ("d:Title".equals(qName)) {
@@ -252,15 +254,13 @@
     }
 
     /**
-     * From time to time the Parser will set the "current location"
-     * by calling this function.  It's useful for emitting locations
-     * for error messages.
+     * From time to time the Parser will set the "current location" by calling
+     * this function. It's useful for emitting locations for error messages.
      */
     public void setDocumentLocator(Locator locator) {
       location = locator;
     }
 
-
     //
     // Interface ErrorHandler
     //
@@ -280,11 +280,11 @@
     public void fatalError(SAXParseException spe) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Fatal err: " + spe.toString() + ": " + spe.getMessage());
-        LOG.error("Last known line is " + location.getLineNumber() +
-                  ", column " + location.getColumnNumber());
+        LOG.error("Last known line is " + location.getLineNumber()
+            + ", column " + location.getColumnNumber());
       }
     }
-        
+
     /**
      * Emit exception warning message
      */
@@ -296,16 +296,13 @@
   }
 
   /**
-   * Iterate through all the items in this structured DMOZ file.
-   * Add each URL to the web db.
+   * Iterate through all the items in this structured DMOZ file. Add each URL to
+   * the web db.
    */
   public void parseDmozFile(File dmozFile, int subsetDenom,
-                            boolean includeAdult,
-                            int skew,
-                            Pattern topicPattern,
-                            boolean snippet)
+      boolean includeAdult, int skew, Pattern topicPattern, boolean snippet)
 
-    throws IOException, SAXException, ParserConfigurationException {
+  throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
     SAXParser parser = parserFactory.newSAXParser();
@@ -312,19 +309,20 @@
     XMLReader reader = parser.getXMLReader();
 
     // Create our own processor to receive SAX events
-    RDFProcessor rp =
-      new RDFProcessor(reader, subsetDenom, includeAdult,
-                       skew, topicPattern, snippet);
+    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,
+        topicPattern, snippet);
     reader.setContentHandler(rp);
     reader.setErrorHandler(rp);
     LOG.info("skew = " + rp.hashSkew);
 
     //
-    // Open filtered text stream.  The TextFilter makes sure that
+    // Open filtered text stream. The TextFilter makes sure that
     // only appropriate XML-approved Text characters are received.
     // Any non-conforming characters are silently skipped.
     //
-    XMLCharFilter in = new XMLCharFilter(new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
+    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
+        new InputStreamReader(new BufferedInputStream(new FileInputStream(
+            dmozFile)), "UTF-8")));
     try {
       InputSource is = new InputSource(in);
       reader.parse(is);
@@ -338,18 +336,17 @@
     }
   }
 
-  private static void addTopicsFromFile(String topicFile,
-                                        Vector<String> topics)
-  throws IOException {
+  private static void addTopicsFromFile(String topicFile, Vector<String> topics)
+      throws IOException {
     BufferedReader in = null;
     try {
-      in = new BufferedReader(new InputStreamReader(new FileInputStream(topicFile), "UTF-8"));
+      in = new BufferedReader(new InputStreamReader(new FileInputStream(
+          topicFile), "UTF-8"));
       String line = null;
       while ((line = in.readLine()) != null) {
         topics.addElement(new String(line));
       }
-    } 
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Failed with the following exception: ", e.toString());
       }
@@ -358,18 +355,19 @@
       in.close();
     }
   }
-    
+
   /**
-   * Command-line access.  User may add URLs via a flat text file
-   * or the structured DMOZ file.  By default, we ignore Adult
-   * material (as categorized by DMOZ).
+   * Command-line access. User may add URLs via a flat text file or the
+   * structured DMOZ file. By default, we ignore Adult material (as categorized
+   * by DMOZ).
    */
   public static void main(String argv[]) throws Exception {
     if (argv.length < 1) {
-      System.err.println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
+      System.err
+          .println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
       return;
     }
-    
+
     //
     // Parse the command line, figure out what kind of
     // URL file we need to load
@@ -379,11 +377,11 @@
     String dmozFile = argv[0];
     boolean includeAdult = false;
     boolean snippet = false;
-    Pattern topicPattern = null; 
+    Pattern topicPattern = null;
     Vector<String> topics = new Vector<String>();
-    
+
     Configuration conf = NutchConfiguration.create();
-    store = StorageUtils.createWebStore(conf,String.class, WebPage.class);
+    store = StorageUtils.createWebStore(conf, String.class, WebPage.class);
     FileSystem fs = FileSystem.get(conf);
     try {
       for (int i = 1; i < argv.length; i++) {
@@ -390,18 +388,18 @@
         if ("-includeAdultMaterial".equals(argv[i])) {
           includeAdult = true;
         } else if ("-subset".equals(argv[i])) {
-          subsetDenom = Integer.parseInt(argv[i+1]);
+          subsetDenom = Integer.parseInt(argv[i + 1]);
           i++;
         } else if ("-topic".equals(argv[i])) {
-          topics.addElement(argv[i+1]); 
+          topics.addElement(argv[i + 1]);
           i++;
         } else if ("-topicFile".equals(argv[i])) {
-          addTopicsFromFile(argv[i+1], topics);
+          addTopicsFromFile(argv[i + 1], topics);
           i++;
         } else if ("-skew".equals(argv[i])) {
-          skew = Integer.parseInt(argv[i+1]);
+          skew = Integer.parseInt(argv[i + 1]);
           i++;
-        }else if ("-snippet".equals(argv[i])) {
+        } else if ("-snippet".equals(argv[i])) {
           snippet = true;
         }
       }
@@ -409,21 +407,21 @@
       DmozParser parser = new DmozParser();
 
       if (!topics.isEmpty()) {
-        String regExp = new String("^("); 
+        String regExp = new String("^(");
         int j = 0;
-        for ( ; j < topics.size() - 1; ++j) {
+        for (; j < topics.size() - 1; ++j) {
           regExp = regExp.concat(topics.get(j));
           regExp = regExp.concat("|");
         }
         regExp = regExp.concat(topics.get(j));
-        regExp = regExp.concat(").*"); 
+        regExp = regExp.concat(").*");
         LOG.info("Topic selection pattern = " + regExp);
-        topicPattern = Pattern.compile(regExp); 
+        topicPattern = Pattern.compile(regExp);
       }
 
-      parser.parseDmozFile(new File(dmozFile), subsetDenom,
-                           includeAdult, skew, topicPattern, snippet);
-      
+      parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew,
+          topicPattern, snippet);
+
     } finally {
       fs.close();
     }
Index: src/java/org/apache/nutch/tools/ResolveUrls.java
===================================================================
--- src/java/org/apache/nutch/tools/ResolveUrls.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/ResolveUrls.java	(working copy)
@@ -59,8 +59,7 @@
   /**
    * A Thread which gets the ip address of a single host by name.
    */
-  private static class ResolverThread
-    extends Thread {
+  private static class ResolverThread extends Thread {
 
     private String url = null;
 
@@ -74,13 +73,12 @@
       String host = URLUtil.getHost(url);
       long start = System.currentTimeMillis();
       try {
-        // get the address by name and if no error is thrown then it 
+        // get the address by name and if no error is thrown then it
         // is resolved successfully
         InetAddress.getByName(host);
         LOG.info("Resolved: " + host);
         numResolved.incrementAndGet();
-      }
-      catch (Exception uhe) {
+      } catch (Exception uhe) {
         LOG.info("Error Resolving: " + host);
         numErrored.incrementAndGet();
       }
@@ -92,8 +90,8 @@
   }
 
   /**
-   * Creates a thread pool for resolving urls.  Reads in the url file on the
-   * local filesystem.  For each url it attempts to resolve it keeping a total
+   * Creates a thread pool for resolving urls. Reads in the url file on the
+   * local filesystem. For each url it attempts to resolve it keeping a total
    * account of the number resolved, errored, and the amount of time.
    */
   public void resolveUrls() {
@@ -102,13 +100,13 @@
 
       // create a thread pool with a fixed number of threads
       pool = Executors.newFixedThreadPool(numThreads);
-      
+
       // read in the urls file and loop through each line, one url per line
       BufferedReader buffRead = new BufferedReader(new FileReader(new File(
-        urlsFile)));
+          urlsFile)));
       String urlStr = null;
       while ((urlStr = buffRead.readLine()) != null) {
-        
+
         // spin up a resolver thread per url
         LOG.info("Starting: " + urlStr);
         pool.execute(new ResolverThread(urlStr));
@@ -118,9 +116,8 @@
       // the thread pool to give urls time to finish resolving
       buffRead.close();
       pool.awaitTermination(60, TimeUnit.SECONDS);
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // on error shutdown the thread pool immediately
       pool.shutdownNow();
       LOG.info(StringUtils.stringifyException(e));
@@ -128,15 +125,16 @@
 
     // shutdown the thread pool and log totals
     pool.shutdown();
-    LOG.info("Total: " + numTotal.get() + ", Resovled: "
-      + numResolved.get() + ", Errored: " + numErrored.get()
-      + ", Average Time: " + totalTime.get() / numTotal.get());
+    LOG.info("Total: " + numTotal.get() + ", Resovled: " + numResolved.get()
+        + ", Errored: " + numErrored.get() + ", Average Time: "
+        + totalTime.get() / numTotal.get());
   }
 
   /**
    * Create a new ResolveUrls with a file from the local file system.
-   *
-   * @param urlsFile The local urls file, one url per line.
+   * 
+   * @param urlsFile
+   *          The local urls file, one url per line.
    */
   public ResolveUrls(String urlsFile) {
     this(urlsFile, 100);
@@ -144,10 +142,12 @@
 
   /**
    * Create a new ResolveUrls with a urls file and a number of threads for the
-   * Thread pool.  Number of threads is 100 by default.
+   * Thread pool. Number of threads is 100 by default.
    * 
-   * @param urlsFile The local urls file, one url per line.
-   * @param numThreads The number of threads used to resolve urls in parallel.
+   * @param urlsFile
+   *          The local urls file, one url per line.
+   * @param numThreads
+   *          The number of threads used to resolve urls in parallel.
    */
   public ResolveUrls(String urlsFile, int numThreads) {
     this.urlsFile = urlsFile;
@@ -163,17 +163,17 @@
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
-    
+
     OptionBuilder.withArgName("urls");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the urls file to check");
     Option urlOpts = OptionBuilder.create("urls");
-    
+
     OptionBuilder.withArgName("numThreads");
     OptionBuilder.hasArgs();
     OptionBuilder.withDescription("the number of threads to use");
     Option numThreadOpts = OptionBuilder.create("numThreads");
-    
+
     options.addOption(helpOpts);
     options.addOption(urlOpts);
     options.addOption(numThreadOpts);
@@ -198,8 +198,7 @@
       }
       ResolveUrls resolve = new ResolveUrls(urls, numThreads);
       resolve.resolveUrls();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
     }
   }
Index: src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(working copy)
@@ -30,21 +30,22 @@
 /**
  * A input format the reads arc files.
  */
-public class ArcInputFormat
-  extends FileInputFormat<Text, BytesWritable> {
+public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> {
 
   /**
    * Returns the <code>RecordReader</code> for reading the arc file.
    * 
-   * @param split The InputSplit of the arc file to process.
-   * @param job The job configuration.
-   * @param reporter The progress reporter.
+   * @param split
+   *          The InputSplit of the arc file to process.
+   * @param job
+   *          The job configuration.
+   * @param reporter
+   *          The progress reporter.
    */
   public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
-      JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
     reporter.setStatus(split.toString());
-    return new ArcRecordReader(job, (FileSplit)split);
+    return new ArcRecordReader(job, (FileSplit) split);
   }
 
 }
Index: src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(working copy)
@@ -34,23 +34,29 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * <p>The <code>ArchRecordReader</code> class provides a record reader which 
- * reads records from arc files.</p>
+ * <p>
+ * The <code>ArchRecordReader</code> class provides a record reader which reads
+ * records from arc files.
+ * </p>
  * 
- * <p>Arc files are essentially tars of gzips.  Each record in an arc file is
- * a compressed gzip.  Multiple records are concatenated together to form a
- * complete arc.  For more information on the arc file format see
- * {@link http://www.archive.org/web/researcher/ArcFileFormat.php}.</p>
+ * <p>
+ * Arc files are essentially tars of gzips. Each record in an arc file is a
+ * compressed gzip. Multiple records are concatenated together to form a
+ * complete arc. For more information on the arc file format see {@link http
+ * ://www.archive.org/web/researcher/ArcFileFormat.php}.
+ * </p>
  * 
- * <p>Arc files are used by the internet archive and grub projects.</p>
+ * <p>
+ * Arc files are used by the internet archive and grub projects.
+ * </p>
  * 
  * @see http://www.archive.org/
  * @see http://www.grub.org/
  */
-public class ArcRecordReader
-  implements RecordReader<Text, BytesWritable> {
+public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcRecordReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcRecordReader.class);
 
   protected Configuration conf;
   protected long splitStart = 0;
@@ -60,23 +66,25 @@
   protected long fileLen = 0;
   protected FSDataInputStream in;
 
-  private static byte[] MAGIC = {(byte)0x1F, (byte)0x8B};
+  private static byte[] MAGIC = { (byte) 0x1F, (byte) 0x8B };
 
   /**
-   * <p>Returns true if the byte array passed matches the gzip header magic 
-   * number.</p>
+   * <p>
+   * Returns true if the byte array passed matches the gzip header magic number.
+   * </p>
    * 
-   * @param input The byte array to check.
+   * @param input
+   *          The byte array to check.
    * 
    * @return True if the byte array matches the gzip header magic number.
    */
   public static boolean isMagic(byte[] input) {
 
-	// check for null and incorrect length
+    // check for null and incorrect length
     if (input == null || input.length != MAGIC.length) {
       return false;
     }
-    
+
     // check byte by byte
     for (int i = 0; i < MAGIC.length; i++) {
       if (MAGIC[i] != input[i]) {
@@ -83,7 +91,7 @@
         return false;
       }
     }
-    
+
     // must match
     return true;
   }
@@ -91,13 +99,16 @@
   /**
    * Constructor that sets the configuration and file split.
    * 
-   * @param conf The job configuration.
-   * @param split The file split to read from.
+   * @param conf
+   *          The job configuration.
+   * @param split
+   *          The file split to read from.
    * 
-   * @throws IOException  If an IO error occurs while initializing file split.
+   * @throws IOException
+   *           If an IO error occurs while initializing file split.
    */
   public ArcRecordReader(Configuration conf, FileSplit split)
-    throws IOException {
+      throws IOException {
 
     Path path = split.getPath();
     FileSystem fs = path.getFileSystem(conf);
@@ -113,8 +124,7 @@
   /**
    * Closes the record reader resources.
    */
-  public void close()
-    throws IOException {
+  public void close() throws IOException {
     this.in.close();
   }
 
@@ -137,63 +147,64 @@
    * 
    * @return The long of the current position in the file.
    */
-  public long getPos()
-    throws IOException {
+  public long getPos() throws IOException {
     return in.getPos();
   }
 
   /**
-   * Returns the percentage of progress in processing the file.  This will be
+   * Returns the percentage of progress in processing the file. This will be
    * represented as a float from 0 to 1 with 1 being 100% completed.
    * 
    * @return The percentage of progress as a float from 0 to 1.
    */
-  public float getProgress()
-    throws IOException {
-	  
+  public float getProgress() throws IOException {
+
     // if we haven't even started
     if (splitEnd == splitStart) {
       return 0.0f;
+    } else {
+      // the progress is current pos - where we started / length of the split
+      return Math.min(1.0f, (getPos() - splitStart) / (float) splitLen);
     }
-    else {
-      // the progress is current pos - where we started  / length of the split
-      return Math.min(1.0f, (getPos() - splitStart) / (float)splitLen);
-    }
   }
 
   /**
-   * <p>Returns true if the next record in the split is read into the key and 
-   * value pair.  The key will be the arc record header and the values will be
-   * the raw content bytes of the arc record.</p>
+   * <p>
+   * Returns true if the next record in the split is read into the key and value
+   * pair. The key will be the arc record header and the values will be the raw
+   * content bytes of the arc record.
+   * </p>
    * 
-   * @param key The record key
-   * @param value The record value
+   * @param key
+   *          The record key
+   * @param value
+   *          The record value
    * 
    * @return True if the next record is read.
    * 
-   * @throws IOException If an error occurs while reading the record value.
+   * @throws IOException
+   *           If an error occurs while reading the record value.
    */
-  public boolean next(Text key, BytesWritable value)
-    throws IOException {
+  public boolean next(Text key, BytesWritable value) throws IOException {
 
     try {
-      
+
       // get the starting position on the input stream
       long startRead = in.getPos();
       byte[] magicBuffer = null;
-      
+
       // we need this loop to handle false positives in reading of gzip records
       while (true) {
-        
+
         // while we haven't passed the end of the split
         if (startRead >= splitEnd) {
           return false;
         }
-        
+
         // scanning for the gzip header
         boolean foundStart = false;
         while (!foundStart) {
-          
+
           // start at the current file position and scan for 1K at time, break
           // if there is no more to read
           startRead = in.getPos();
@@ -202,13 +213,13 @@
           if (read < 0) {
             break;
           }
-          
-          // scan the byte array for the gzip header magic number.  This happens
+
+          // scan the byte array for the gzip header magic number. This happens
           // byte by byte
           for (int i = 0; i < read - 1; i++) {
             byte[] testMagic = new byte[2];
-            System.arraycopy(magicBuffer, i, testMagic, 0, 2);            
-            if (isMagic(testMagic)) {              
+            System.arraycopy(magicBuffer, i, testMagic, 0, 2);
+            if (isMagic(testMagic)) {
               // set the next start to the current gzip header
               startRead += i;
               foundStart = true;
@@ -216,7 +227,7 @@
             }
           }
         }
-        
+
         // seek to the start of the gzip header
         in.seek(startRead);
         ByteArrayOutputStream baos = null;
@@ -223,7 +234,7 @@
         int totalRead = 0;
 
         try {
-          
+
           // read 4K of the gzip at a time putting into a byte array
           byte[] buffer = new byte[4096];
           GZIPInputStream zin = new GZIPInputStream(in);
@@ -233,9 +244,8 @@
             baos.write(buffer, 0, gzipRead);
             totalRead += gzipRead;
           }
-        }
-        catch (Exception e) {
-          
+        } catch (Exception e) {
+
           // there are times we get false positives where the gzip header exists
           // but it is not an actual gzip record, so we ignore it and start
           // over seeking
@@ -248,7 +258,7 @@
 
         // change the output stream to a byte array
         byte[] content = baos.toByteArray();
-        
+
         // the first line of the raw content in arc files is the header
         int eol = 0;
         for (int i = 0; i < content.length; i++) {
@@ -257,12 +267,12 @@
             break;
           }
         }
-        
+
         // create the header and the raw content minus the header
         String header = new String(content, 0, eol).trim();
         byte[] raw = new byte[(content.length - eol) - 1];
         System.arraycopy(content, eol + 1, raw, 0, raw.length);
-        
+
         // populate key and values with the header and raw content.
         Text keyText = key;
         keyText.set(header);
@@ -269,22 +279,21 @@
         BytesWritable valueBytes = value;
         valueBytes.set(raw, 0, raw.length);
 
-        // TODO: It would be best to start at the end of the gzip read but 
-        // the bytes read in gzip don't match raw bytes in the file so we 
-        // overshoot the next header.  With this current method you get
+        // TODO: It would be best to start at the end of the gzip read but
+        // the bytes read in gzip don't match raw bytes in the file so we
+        // overshoot the next header. With this current method you get
         // some false positives but don't miss records.
         if (startRead + 1 < fileLen) {
           in.seek(startRead + 1);
         }
-        
+
         // populated the record, now return
         return true;
       }
+    } catch (Exception e) {
+      LOG.equals(StringUtils.stringifyException(e));
     }
-    catch (Exception e) {
-      LOG.equals(StringUtils.stringifyException(e));      
-    }
-    
+
     // couldn't populate the record or there is no next record to read
     return false;
   }
Index: src/java/org/apache/nutch/tools/arc/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * <a href="http://archive.org/web/researcher/ArcFileFormat.php">Arc file format</a>.
  */
 package org.apache.nutch.tools.arc;
+
Index: src/java/org/apache/nutch/tools/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Miscellaneous tools.
  */
 package org.apache.nutch.tools;
+
Index: src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -46,16 +47,17 @@
 
   @Override
   public void handle(String target, HttpServletRequest req,
-          HttpServletResponse res, int dispatch) throws IOException,
-          ServletException {
-    Request base_request = (req instanceof Request) ? (Request)req : HttpConnection.getCurrentConnection().getRequest();
+      HttpServletResponse res, int dispatch) throws IOException,
+      ServletException {
+    Request base_request = (req instanceof Request) ? (Request) req
+        : HttpConnection.getCurrentConnection().getRequest();
     res.addHeader("X-TestbedHandlers", this.getClass().getSimpleName());
     handle(base_request, res, target, dispatch);
   }
-  
-  public abstract void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException;
-  
+
+  public abstract void handle(Request req, HttpServletResponse res,
+      String target, int dispatch) throws IOException, ServletException;
+
   public void addMyHeader(HttpServletResponse res, String name, String value) {
     name = "X-" + this.getClass().getSimpleName() + "-" + name;
     res.addHeader(name, value);
Index: src/java/org/apache/nutch/tools/proxy/DelayHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -41,13 +42,13 @@
 import org.mortbay.jetty.Request;
 
 public class DelayHandler extends AbstractTestbedHandler {
-  
+
   public static final long DEFAULT_DELAY = 2000;
-  
+
   private int delay;
   private boolean random;
   private Random r;
-  
+
   public DelayHandler(int delay) {
     if (delay < 0) {
       delay = -delay;
@@ -59,13 +60,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       int del = random ? r.nextInt(delay) : delay;
       Thread.sleep(del);
       addMyHeader(res, "Delay", String.valueOf(del));
     } catch (Exception e) {
-      
+
     }
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/FakeHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -44,9 +45,14 @@
 import org.mortbay.jetty.Request;
 
 public class FakeHandler extends AbstractTestbedHandler {
-  /** Create links to hosts generated from a pool of numHosts/numPages random names. */
-  public static enum Mode {UNIQUE, RANDOM};
-    
+  /**
+   * Create links to hosts generated from a pool of numHosts/numPages random
+   * names.
+   */
+  public static enum Mode {
+    UNIQUE, RANDOM
+  };
+
   int numInternalLinks;
   int numExternalLinks;
   Mode hostMode;
@@ -55,34 +61,36 @@
   AtomicLong pageSeq = new AtomicLong(0);
   int numHosts;
   int numPages;
-  
+
   Random r = new Random(1234567890L); // predictable
   Random pageR;
 
-  private static final String testA = 
-    "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n" + 
-    "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>" +
-    "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>" +
-    "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
-  private static final String testB =
-    "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>" +
-    "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>" +
-    "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
+  private static final String testA = "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n"
+      + "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>"
+      + "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>"
+      + "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
+  private static final String testB = "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>"
+      + "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>"
+      + "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
 
   /**
    * Create fake pages.
-   * @param hostMode if UNIQUE then each external outlink will use a unique host name. If
-   * RANDOM then each outlink will use a host name allocated from pool of numHosts.
-   * @param pageMode if UNIQUE then each internal outlinks will use a unique page name.
-   * if RANDOM then each outlink will use a page name allocated from pool of numPages.
+   * 
+   * @param hostMode
+   *          if UNIQUE then each external outlink will use a unique host name.
+   *          If RANDOM then each outlink will use a host name allocated from
+   *          pool of numHosts.
+   * @param pageMode
+   *          if UNIQUE then each internal outlinks will use a unique page name.
+   *          if RANDOM then each outlink will use a page name allocated from
+   *          pool of numPages.
    * @param numInternalLinks
    * @param numExternalLinks
    * @param numHosts
    * @param numPages
    */
-  public FakeHandler(Mode hostMode, Mode pageMode,
-      int numInternalLinks, int numExternalLinks,
-      int numHosts, int numPages) {
+  public FakeHandler(Mode hostMode, Mode pageMode, int numInternalLinks,
+      int numExternalLinks, int numHosts, int numPages) {
     this.numExternalLinks = numExternalLinks;
     this.numInternalLinks = numInternalLinks;
     this.numHosts = numHosts;
@@ -90,10 +98,10 @@
     this.hostMode = hostMode;
     this.pageMode = pageMode;
   }
-  
+
   @Override
-  public void handle(Request req, HttpServletResponse res, String target, 
-          int dispatch) throws IOException, ServletException {
+  public void handle(Request req, HttpServletResponse res, String target,
+      int dispatch) throws IOException, ServletException {
     HttpURI u = req.getUri();
     String uri = u.toString();
     addMyHeader(res, "URI", uri);
@@ -126,7 +134,7 @@
       for (int i = 0; i < numInternalLinks; i++) {
         String link = "<p><a href='";
         if (pageMode.equals(Mode.RANDOM)) {
-          link += pageR.nextInt (numPages) + ".html'>";
+          link += pageR.nextInt(numPages) + ".html'>";
         } else {
           if (!basePath.endsWith("/")) {
             link += "/";
@@ -157,13 +165,14 @@
       }
       // fake a link to the root URL
       link = "<p><a href='" + u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) link += ":" + u.getPort();
+      if (u.getPort() != 80 && u.getPort() != -1)
+        link += ":" + u.getPort();
       link += "/'>site " + u.getHost() + "</a></p>\r\n";
       os.write(link.getBytes());
       os.write(testB.getBytes());
       res.flushBuffer();
     } catch (IOException ioe) {
-    }    
+    }
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -47,23 +48,27 @@
 import org.mortbay.jetty.Request;
 
 public class LogDebugHandler extends AbstractTestbedHandler implements Filter {
-  private static final Logger LOG = LoggerFactory.getLogger(LogDebugHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(LogDebugHandler.class);
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
-    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n" + req.getConnection().getRequestFields());
+      int dispatch) throws IOException, ServletException {
+    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n"
+        + req.getConnection().getRequestFields());
   }
 
   @Override
   public void doFilter(ServletRequest req, ServletResponse res,
-          FilterChain chain) throws IOException, ServletException {
-    ((HttpServletResponse)res).addHeader("X-Handled-By", "AsyncProxyHandler");
-    ((HttpServletResponse)res).addHeader("X-TestbedHandlers", "AsyncProxyHandler");
+      FilterChain chain) throws IOException, ServletException {
+    ((HttpServletResponse) res).addHeader("X-Handled-By", "AsyncProxyHandler");
+    ((HttpServletResponse) res).addHeader("X-TestbedHandlers",
+        "AsyncProxyHandler");
     try {
       chain.doFilter(req, res);
     } catch (Throwable e) {
-      ((HttpServletResponse)res).sendError(HttpServletResponse.SC_BAD_REQUEST, e.toString());
+      ((HttpServletResponse) res).sendError(HttpServletResponse.SC_BAD_REQUEST,
+          e.toString());
     }
   }
 
@@ -70,6 +75,6 @@
   @Override
   public void init(FilterConfig arg0) throws ServletException {
     // TODO Auto-generated method stub
-    
+
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -43,13 +44,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     // don't pass it down the chain
     req.setHandled(true);
     res.addHeader("X-Handled-By", getClass().getSimpleName());
     addMyHeader(res, "URI", req.getUri().toString());
-    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: " +
-            req.getUri().toString());
+    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: "
+        + req.getUri().toString());
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -52,19 +53,32 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
-      System.err.println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
-      System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
-      System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
-      System.err.println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
-      System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
-      System.err.println("\nAdditional options for -fake handler (all optional):");
-      System.err.println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
-      System.err.println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
-      System.err.println("\t-numHosts N\ttotal number of hosts when using hostMode r");
-      System.err.println("\t-numPages N\ttotal number of pages per host when using pageMode r");
-      System.err.println("\t-intLinks N\tnumber of internal (same host) links per page");
-      System.err.println("\t-extLinks N\tnumber of external (other host) links per page");
+      System.err
+          .println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
+      System.err
+          .println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
+      System.err
+          .println("-forward\tif specified, requests to all unknown urls will be passed to");
+      System.err
+          .println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
+      System.err
+          .println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
+      System.err
+          .println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
+      System.err
+          .println("\nAdditional options for -fake handler (all optional):");
+      System.err
+          .println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
+      System.err
+          .println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
+      System.err
+          .println("\t-numHosts N\ttotal number of hosts when using hostMode r");
+      System.err
+          .println("\t-numPages N\ttotal number of pages per host when using pageMode r");
+      System.err
+          .println("\t-intLinks N\tnumber of internal (same host) links per page");
+      System.err
+          .println("\t-extLinks N\tnumber of external (other host) links per page");
       System.err.println("\nDefaults for -fake handler:");
       System.err.println("\t-hostMode r");
       System.err.println("\t-pageMode r");
@@ -74,7 +88,7 @@
       System.err.println("\t-extLinks 5");
       System.exit(-1);
     }
-    
+
     Configuration conf = NutchConfiguration.create();
     int port = conf.getInt("batch.proxy.port", 8181);
     boolean forward = false;
@@ -88,7 +102,7 @@
     int numPages = 10000;
     int intLinks = 10;
     int extLinks = 5;
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-port")) {
         port = Integer.parseInt(args[++i]);
@@ -122,7 +136,7 @@
         System.exit(-1);
       }
     }
-    
+
     // Create the server
     Server server = new Server();
     SocketConnector connector = new SocketConnector();
@@ -129,21 +143,23 @@
     connector.setPort(port);
     connector.setResolveNames(false);
     server.addConnector(connector);
-    
+
     // create a list of handlers
     HandlerList list = new HandlerList();
     server.addHandler(list);
-    
+
     if (debug) {
       LOG.info("* Added debug handler.");
       list.addHandler(new LogDebugHandler());
     }
- 
+
     if (delay) {
-      LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
+      LOG.info("* Added delay handler: "
+          + (delayVal < 0 ? "random delay up to " + (-delayVal)
+              : "constant delay of " + delayVal));
       list.addHandler(new DelayHandler(delayVal));
     }
-    
+
     // XXX alternatively, we can add the DispatchHandler as the first one,
     // XXX to activate handler plugins and redirect requests to appropriate
     // XXX handlers ... Here we always load these handlers
Index: src/java/org/apache/nutch/tools/proxy/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Proxy to {@link org.apache.nutch.tools.Benchmark benchmark} the crawler.
  */
 package org.apache.nutch.tools.proxy;
+
Index: src/java/org/apache/nutch/util/Bytes.java
===================================================================
--- src/java/org/apache/nutch/util/Bytes.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/Bytes.java	(working copy)
@@ -42,1425 +42,1412 @@
  */
 public class Bytes {
 
-	private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
+  private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
 
-	/** When we encode strings, we always specify UTF8 encoding */
-	public static final String UTF8_ENCODING = "UTF-8";
+  /** When we encode strings, we always specify UTF8 encoding */
+  public static final String UTF8_ENCODING = "UTF-8";
 
-	/**
-	 * An empty instance.
-	 */
-	public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+  /**
+   * An empty instance.
+   */
+  public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
 
-	/**
-	 * Size of boolean in bytes
-	 */
-	public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
+  /**
+   * Size of boolean in bytes
+   */
+  public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of byte in bytes
-	 */
-	public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
+  /**
+   * Size of byte in bytes
+   */
+  public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
 
-	/**
-	 * Size of char in bytes
-	 */
-	public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
+  /**
+   * Size of char in bytes
+   */
+  public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of double in bytes
-	 */
-	public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
+  /**
+   * Size of double in bytes
+   */
+  public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of float in bytes
-	 */
-	public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
+  /**
+   * Size of float in bytes
+   */
+  public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of int in bytes
-	 */
-	public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
+  /**
+   * Size of int in bytes
+   */
+  public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of long in bytes
-	 */
-	public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
+  /**
+   * Size of long in bytes
+   */
+  public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of short in bytes
-	 */
-	public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
+  /**
+   * Size of short in bytes
+   */
+  public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
 
-	/**
-	 * Estimate of size cost to pay beyond payload in jvm for instance of byte
-	 * []. Estimate based on study of jhat and jprofiler numbers.
-	 */
-	// JHat says BU is 56 bytes.
-	// SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
-	public static final int ESTIMATED_HEAP_TAX = 16;
+  /**
+   * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
+   * Estimate based on study of jhat and jprofiler numbers.
+   */
+  // JHat says BU is 56 bytes.
+  // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
+  public static final int ESTIMATED_HEAP_TAX = 16;
 
-	/**
-	 * Byte array comparator class.
-	 */
-	public static class ByteArrayComparator implements RawComparator<byte[]> {
-		/**
-		 * Constructor
-		 */
-		public ByteArrayComparator() {
-			super();
-		}
+  /**
+   * Byte array comparator class.
+   */
+  public static class ByteArrayComparator implements RawComparator<byte[]> {
+    /**
+     * Constructor
+     */
+    public ByteArrayComparator() {
+      super();
+    }
 
-		public int compare(byte[] left, byte[] right) {
-			return compareTo(left, right);
-		}
+    public int compare(byte[] left, byte[] right) {
+      return compareTo(left, right);
+    }
 
-		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-			return compareTo(b1, s1, l1, b2, s2, l2);
-		}
-	}
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return compareTo(b1, s1, l1, b2, s2, l2);
+    }
+  }
 
-	/**
-	 * Pass this to TreeMaps where byte [] are keys.
-	 */
-	public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
+  /**
+   * Pass this to TreeMaps where byte [] are keys.
+   */
+  public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Use comparing byte arrays, byte-by-byte
-	 */
-	public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
+  /**
+   * Use comparing byte arrays, byte-by-byte
+   */
+  public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 * @throws IOException
-	 *             e
-	 */
-	public static byte[] readByteArray(final DataInput in) throws IOException {
-		int len = WritableUtils.readVInt(in);
-		if (len < 0) {
-			throw new NegativeArraySizeException(Integer.toString(len));
-		}
-		byte[] result = new byte[len];
-		in.readFully(result, 0, len);
-		return result;
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   * @throws IOException
+   *           e
+   */
+  public static byte[] readByteArray(final DataInput in) throws IOException {
+    int len = WritableUtils.readVInt(in);
+    if (len < 0) {
+      throw new NegativeArraySizeException(Integer.toString(len));
+    }
+    byte[] result = new byte[len];
+    in.readFully(result, 0, len);
+    return result;
+  }
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix. IOException
-	 * is converted to a RuntimeException.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 */
-	public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
-		try {
-			return readByteArray(in);
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix. IOException
+   * is converted to a RuntimeException.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   */
+  public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
+    try {
+      return readByteArray(in);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
 
-	/**
-	 * Write byte-array with a WritableableUtils.vint prefix.
-	 * 
-	 * @param out
-	 *            output stream to be written to
-	 * @param b
-	 *            array to write
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b)
-			throws IOException {
-		if (b == null) {
-			WritableUtils.writeVInt(out, 0);
-		} else {
-			writeByteArray(out, b, 0, b.length);
-		}
-	}
+  /**
+   * Write byte-array with a WritableableUtils.vint prefix.
+   * 
+   * @param out
+   *          output stream to be written to
+   * @param b
+   *          array to write
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b)
+      throws IOException {
+    if (b == null) {
+      WritableUtils.writeVInt(out, 0);
+    } else {
+      writeByteArray(out, b, 0, b.length);
+    }
+  }
 
-	/**
-	 * Write byte-array to out with a vint length prefix.
-	 * 
-	 * @param out
-	 *            output stream
-	 * @param b
-	 *            array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length past offset
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b,
-			final int offset, final int length) throws IOException {
-		WritableUtils.writeVInt(out, length);
-		out.write(b, offset, length);
-	}
+  /**
+   * Write byte-array to out with a vint length prefix.
+   * 
+   * @param out
+   *          output stream
+   * @param b
+   *          array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length past offset
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b,
+      final int offset, final int length) throws IOException {
+    WritableUtils.writeVInt(out, length);
+    out.write(b, offset, length);
+  }
 
-	/**
-	 * Write byte-array from src to tgt with a vint length prefix.
-	 * 
-	 * @param tgt
-	 *            target array
-	 * @param tgtOffset
-	 *            offset into target array
-	 * @param src
-	 *            source array
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return New offset in src array.
-	 */
-	public static int writeByteArray(final byte[] tgt, final int tgtOffset,
-			final byte[] src, final int srcOffset, final int srcLength) {
-		byte[] vint = vintToBytes(srcLength);
-		System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
-		int offset = tgtOffset + vint.length;
-		System.arraycopy(src, srcOffset, tgt, offset, srcLength);
-		return offset + srcLength;
-	}
+  /**
+   * Write byte-array from src to tgt with a vint length prefix.
+   * 
+   * @param tgt
+   *          target array
+   * @param tgtOffset
+   *          offset into target array
+   * @param src
+   *          source array
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return New offset in src array.
+   */
+  public static int writeByteArray(final byte[] tgt, final int tgtOffset,
+      final byte[] src, final int srcOffset, final int srcLength) {
+    byte[] vint = vintToBytes(srcLength);
+    System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
+    int offset = tgtOffset + vint.length;
+    System.arraycopy(src, srcOffset, tgt, offset, srcLength);
+    return offset + srcLength;
+  }
 
-	/**
-	 * Put bytes at the specified byte array position.
-	 * 
-	 * @param tgtBytes
-	 *            the byte array
-	 * @param tgtOffset
-	 *            position in the array
-	 * @param srcBytes
-	 *            array to write out
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return incremented offset
-	 */
-	public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
-			int srcOffset, int srcLength) {
-		System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
-		return tgtOffset + srcLength;
-	}
+  /**
+   * Put bytes at the specified byte array position.
+   * 
+   * @param tgtBytes
+   *          the byte array
+   * @param tgtOffset
+   *          position in the array
+   * @param srcBytes
+   *          array to write out
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return incremented offset
+   */
+  public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
+      int srcOffset, int srcLength) {
+    System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
+    return tgtOffset + srcLength;
+  }
 
-	/**
-	 * Write a single byte out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param b
-	 *            byte to write out
-	 * @return incremented offset
-	 */
-	public static int putByte(byte[] bytes, int offset, byte b) {
-		bytes[offset] = b;
-		return offset + 1;
-	}
+  /**
+   * Write a single byte out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param b
+   *          byte to write out
+   * @return incremented offset
+   */
+  public static int putByte(byte[] bytes, int offset, byte b) {
+    bytes[offset] = b;
+    return offset + 1;
+  }
 
-	/**
-	 * Returns a new byte array, copied from the passed ByteBuffer.
-	 * 
-	 * @param bb
-	 *            A ByteBuffer
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(ByteBuffer bb) {
-		int length = bb.remaining();
-		byte[] result = new byte[length];
-		System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0, length);
-		return result;
-	}
+  /**
+   * Returns a new byte array, copied from the passed ByteBuffer.
+   * 
+   * @param bb
+   *          A ByteBuffer
+   * @return the byte array
+   */
+  public static byte[] toBytes(ByteBuffer bb) {
+    int length = bb.remaining();
+    byte[] result = new byte[length];
+    System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0,
+        length);
+    return result;
+  }
 
-    /**
-     * This method will convert utf8 encoded bytes into a string. If an
-     * UnsupportedEncodingException occurs, this method will eat it and return
-     * null instead.
-     *
-     * @param bb
-     *            Presumed UTF-8 encoded ByteBuffer.
-     * @return String made from <code>b</code> or null
-     */
-    public static String toString(ByteBuffer bb) {
-        return bb == null
-               ? null
-               : toString(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+  /**
+   * This method will convert utf8 encoded bytes into a string. If an
+   * UnsupportedEncodingException occurs, this method will eat it and return
+   * null instead.
+   * 
+   * @param bb
+   *          Presumed UTF-8 encoded ByteBuffer.
+   * @return String made from <code>b</code> or null
+   */
+  public static String toString(ByteBuffer bb) {
+    return bb == null ? null : toString(bb.array(),
+        bb.arrayOffset() + bb.position(), bb.remaining());
+  }
+
+  /**
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @return String made from <code>b</code>
+   */
+  public static String toString(final byte[] b) {
+    if (b == null) {
+      return null;
     }
+    return toString(b, 0, b.length);
+  }
 
-	/**
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @return String made from <code>b</code>
-	 */
-	public static String toString(final byte[] b) {
-		if (b == null) {
-			return null;
-		}
-		return toString(b, 0, b.length);
-	}
+  /**
+   * Joins two byte arrays together using a separator.
+   * 
+   * @param b1
+   *          The first byte array.
+   * @param sep
+   *          The separator to use.
+   * @param b2
+   *          The second byte array.
+   */
+  public static String toString(final byte[] b1, String sep, final byte[] b2) {
+    return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
+  }
 
-	/**
-	 * Joins two byte arrays together using a separator.
-	 * 
-	 * @param b1
-	 *            The first byte array.
-	 * @param sep
-	 *            The separator to use.
-	 * @param b2
-	 *            The second byte array.
-	 */
-	public static String toString(final byte[] b1, String sep, final byte[] b2) {
-		return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
-	}
+  /**
+   * This method will convert utf8 encoded bytes into a string. If an
+   * UnsupportedEncodingException occurs, this method will eat it and return
+   * null instead.
+   * 
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @param off
+   *          offset into array
+   * @param len
+   *          length of utf-8 sequence
+   * @return String made from <code>b</code> or null
+   */
+  public static String toString(final byte[] b, int off, int len) {
+    if (b == null) {
+      return null;
+    }
+    if (len == 0) {
+      return "";
+    }
+    try {
+      return new String(b, off, len, UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-	/**
-	 * This method will convert utf8 encoded bytes into a string. If an
-	 * UnsupportedEncodingException occurs, this method will eat it and return
-	 * null instead.
-	 * 
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @param off
-	 *            offset into array
-	 * @param len
-	 *            length of utf-8 sequence
-	 * @return String made from <code>b</code> or null
-	 */
-	public static String toString(final byte[] b, int off, int len) {
-		if (b == null) {
-			return null;
-		}
-		if (len == 0) {
-			return "";
-		}
-		try {
-			return new String(b, off, len, UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * Write a printable representation of a ByteBuffer. Non-printable characters
+   * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+   * 
+   * @param bb
+   *          ByteBuffer to write out
+   * @return string output
+   */
+  public static String toStringBinary(ByteBuffer bb) {
+    return bb == null ? null : toStringBinary(bb.array(),
+        bb.arrayOffset() + bb.position(), bb.remaining());
+  }
 
-    /**
-     * Write a printable representation of a ByteBuffer. Non-printable
-     * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
-     *
-     * @param bb
-     *            ByteBuffer to write out
-     * @return string output
-     */
-    public static String toStringBinary(ByteBuffer bb) {
-        return bb == null
-               ? null
-               : toStringBinary(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+  /**
+   * Write a printable representation of a byte array.
+   * 
+   * @param b
+   *          byte array
+   * @return string
+   * @see #toStringBinary(byte[], int, int)
+   */
+  public static String toStringBinary(final byte[] b) {
+    return toStringBinary(b, 0, b.length);
+  }
+
+  /**
+   * Write a printable representation of a byte array. Non-printable characters
+   * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+   * 
+   * @param b
+   *          array to write out
+   * @param off
+   *          offset to start at
+   * @param len
+   *          length to write
+   * @return string output
+   */
+  public static String toStringBinary(final byte[] b, int off, int len) {
+    StringBuilder result = new StringBuilder();
+    try {
+      String first = new String(b, off, len, "ISO-8859-1");
+      for (int i = 0; i < first.length(); ++i) {
+        int ch = first.charAt(i) & 0xFF;
+        if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
+            || (ch >= 'a' && ch <= 'z')
+            || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
+          result.append(first.charAt(i));
+        } else {
+          result.append(String.format("\\x%02X", ch));
+        }
+      }
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("ISO-8859-1 not supported?", e);
     }
+    return result.toString();
+  }
 
-	/**
-	 * Write a printable representation of a byte array.
-	 * 
-	 * @param b
-	 *            byte array
-	 * @return string
-	 * @see #toStringBinary(byte[], int, int)
-	 */
-	public static String toStringBinary(final byte[] b) {
-		return toStringBinary(b, 0, b.length);
-	}
+  private static boolean isHexDigit(char c) {
+    return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
+  }
 
-	/**
-	 * Write a printable representation of a byte array. Non-printable
-	 * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
-	 * 
-	 * @param b
-	 *            array to write out
-	 * @param off
-	 *            offset to start at
-	 * @param len
-	 *            length to write
-	 * @return string output
-	 */
-	public static String toStringBinary(final byte[] b, int off, int len) {
-		StringBuilder result = new StringBuilder();
-		try {
-			String first = new String(b, off, len, "ISO-8859-1");
-			for (int i = 0; i < first.length(); ++i) {
-				int ch = first.charAt(i) & 0xFF;
-				if ((ch >= '0' && ch <= '9')
-						|| (ch >= 'A' && ch <= 'Z')
-						|| (ch >= 'a' && ch <= 'z')
-						|| " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
-					result.append(first.charAt(i));
-				} else {
-					result.append(String.format("\\x%02X", ch));
-				}
-			}
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("ISO-8859-1 not supported?", e);
-		}
-		return result.toString();
-	}
+  /**
+   * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
+   * integer/ordinal value.
+   * 
+   * @param ch
+   *          The hex digit.
+   * @return The converted hex value as a byte.
+   */
+  public static byte toBinaryFromHex(byte ch) {
+    if (ch >= 'A' && ch <= 'F')
+      return (byte) ((byte) 10 + (byte) (ch - 'A'));
+    // else
+    return (byte) (ch - '0');
+  }
 
-	private static boolean isHexDigit(char c) {
-		return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
-	}
+  public static byte[] toBytesBinary(String in) {
+    // this may be bigger than we need, but lets be safe.
+    byte[] b = new byte[in.length()];
+    int size = 0;
+    for (int i = 0; i < in.length(); ++i) {
+      char ch = in.charAt(i);
+      if (ch == '\\') {
+        // begin hex escape:
+        char next = in.charAt(i + 1);
+        if (next != 'x') {
+          // invalid escape sequence, ignore this one.
+          b[size++] = (byte) ch;
+          continue;
+        }
+        // ok, take next 2 hex digits.
+        char hd1 = in.charAt(i + 2);
+        char hd2 = in.charAt(i + 3);
 
-	/**
-	 * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
-	 * integer/ordinal value.
-	 * 
-	 * @param ch
-	 *            The hex digit.
-	 * @return The converted hex value as a byte.
-	 */
-	public static byte toBinaryFromHex(byte ch) {
-		if (ch >= 'A' && ch <= 'F')
-			return (byte) ((byte) 10 + (byte) (ch - 'A'));
-		// else
-		return (byte) (ch - '0');
-	}
+        // they need to be A-F0-9:
+        if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
+          // bogus escape code, ignore:
+          continue;
+        }
+        // turn hex ASCII digit -> number
+        byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
 
-	public static byte[] toBytesBinary(String in) {
-		// this may be bigger than we need, but lets be safe.
-		byte[] b = new byte[in.length()];
-		int size = 0;
-		for (int i = 0; i < in.length(); ++i) {
-			char ch = in.charAt(i);
-			if (ch == '\\') {
-				// begin hex escape:
-				char next = in.charAt(i + 1);
-				if (next != 'x') {
-					// invalid escape sequence, ignore this one.
-					b[size++] = (byte) ch;
-					continue;
-				}
-				// ok, take next 2 hex digits.
-				char hd1 = in.charAt(i + 2);
-				char hd2 = in.charAt(i + 3);
+        b[size++] = d;
+        i += 3; // skip 3
+      } else {
+        b[size++] = (byte) ch;
+      }
+    }
+    // resize:
+    byte[] b2 = new byte[size];
+    System.arraycopy(b, 0, b2, 0, size);
+    return b2;
+  }
 
-				// they need to be A-F0-9:
-				if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
-					// bogus escape code, ignore:
-					continue;
-				}
-				// turn hex ASCII digit -> number
-				byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
+  /**
+   * Converts a string to a UTF-8 byte array.
+   * 
+   * @param s
+   *          string
+   * @return the byte array
+   */
+  public static byte[] toBytes(String s) {
+    try {
+      return s.getBytes(UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-				b[size++] = d;
-				i += 3; // skip 3
-			} else {
-				b[size++] = (byte) ch;
-			}
-		}
-		// resize:
-		byte[] b2 = new byte[size];
-		System.arraycopy(b, 0, b2, 0, size);
-		return b2;
-	}
+  /**
+   * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
+   * 
+   * @param b
+   *          value
+   * @return <code>b</code> encoded in a byte array.
+   */
+  public static byte[] toBytes(final boolean b) {
+    return new byte[] { b ? (byte) -1 : (byte) 0 };
+  }
 
-	/**
-	 * Converts a string to a UTF-8 byte array.
-	 * 
-	 * @param s
-	 *            string
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(String s) {
-		try {
-			return s.getBytes(UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * Reverses {@link #toBytes(boolean)}
+   * 
+   * @param b
+   *          array
+   * @return True or false.
+   */
+  public static boolean toBoolean(final byte[] b) {
+    if (b.length != 1) {
+      throw new IllegalArgumentException("Array has wrong size: " + b.length);
+    }
+    return b[0] != (byte) 0;
+  }
 
-	/**
-	 * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
-	 * 
-	 * @param b
-	 *            value
-	 * @return <code>b</code> encoded in a byte array.
-	 */
-	public static byte[] toBytes(final boolean b) {
-		return new byte[] { b ? (byte) -1 : (byte) 0 };
-	}
+  /**
+   * Convert a long value to a byte array using big-endian.
+   * 
+   * @param val
+   *          value to convert
+   * @return the byte array
+   */
+  public static byte[] toBytes(long val) {
+    byte[] b = new byte[8];
+    for (int i = 7; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Reverses {@link #toBytes(boolean)}
-	 * 
-	 * @param b
-	 *            array
-	 * @return True or false.
-	 */
-	public static boolean toBoolean(final byte[] b) {
-		if (b.length != 1) {
-			throw new IllegalArgumentException("Array has wrong size: "
-					+ b.length);
-		}
-		return b[0] != (byte) 0;
-	}
+  /**
+   * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
+   * 
+   * @param bytes
+   *          array
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes) {
+    return toLong(bytes, 0, SIZEOF_LONG);
+  }
 
-	/**
-	 * Convert a long value to a byte array using big-endian.
-	 * 
-	 * @param val
-	 *            value to convert
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(long val) {
-		byte[] b = new byte[8];
-		for (int i = 7; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to a long value. Assumes there will be
+   * {@link #SIZEOF_LONG} bytes available.
+   * 
+   * @param bytes
+   *          bytes
+   * @param offset
+   *          offset
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes, int offset) {
+    return toLong(bytes, offset, SIZEOF_LONG);
+  }
 
-	/**
-	 * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
-	 * 
-	 * @param bytes
-	 *            array
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes) {
-		return toLong(bytes, 0, SIZEOF_LONG);
-	}
+  /**
+   * Converts a byte array to a long value.
+   * 
+   * @param bytes
+   *          array of bytes
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of data (must be {@link #SIZEOF_LONG})
+   * @return the long value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_LONG} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static long toLong(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_LONG || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
+    }
+    long l = 0;
+    for (int i = offset; i < offset + length; i++) {
+      l <<= 8;
+      l ^= bytes[i] & 0xFF;
+    }
+    return l;
+  }
 
-	/**
-	 * Converts a byte array to a long value. Assumes there will be
-	 * {@link #SIZEOF_LONG} bytes available.
-	 * 
-	 * @param bytes
-	 *            bytes
-	 * @param offset
-	 *            offset
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes, int offset) {
-		return toLong(bytes, offset, SIZEOF_LONG);
-	}
+  private static IllegalArgumentException explainWrongLengthOrOffset(
+      final byte[] bytes, final int offset, final int length,
+      final int expectedLength) {
+    String reason;
+    if (length != expectedLength) {
+      reason = "Wrong length: " + length + ", expected " + expectedLength;
+    } else {
+      reason = "offset (" + offset + ") + length (" + length + ") exceed the"
+          + " capacity of the array: " + bytes.length;
+    }
+    return new IllegalArgumentException(reason);
+  }
 
-	/**
-	 * Converts a byte array to a long value.
-	 * 
-	 * @param bytes
-	 *            array of bytes
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of data (must be {@link #SIZEOF_LONG})
-	 * @return the long value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_LONG} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static long toLong(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_LONG || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
-		}
-		long l = 0;
-		for (int i = offset; i < offset + length; i++) {
-			l <<= 8;
-			l ^= bytes[i] & 0xFF;
-		}
-		return l;
-	}
+  /**
+   * Put a long value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          long to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putLong(byte[] bytes, int offset, long val) {
+    if (bytes.length - offset < SIZEOF_LONG) {
+      throw new IllegalArgumentException("Not enough room to put a long at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 7; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_LONG;
+  }
 
-	private static IllegalArgumentException explainWrongLengthOrOffset(
-			final byte[] bytes, final int offset, final int length,
-			final int expectedLength) {
-		String reason;
-		if (length != expectedLength) {
-			reason = "Wrong length: " + length + ", expected " + expectedLength;
-		} else {
-			reason = "offset (" + offset + ") + length (" + length
-					+ ") exceed the" + " capacity of the array: "
-					+ bytes.length;
-		}
-		return new IllegalArgumentException(reason);
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          byte array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes) {
+    return toFloat(bytes, 0);
+  }
 
-	/**
-	 * Put a long value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            long to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putLong(byte[] bytes, int offset, long val) {
-		if (bytes.length - offset < SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a long at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 7; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_LONG;
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          array to convert
+   * @param offset
+   *          offset into array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes, int offset) {
+    return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes) {
-		return toFloat(bytes, 0);
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param f
+   *          float value
+   * @return New offset in <code>bytes</code>
+   */
+  public static int putFloat(byte[] bytes, int offset, float f) {
+    return putInt(bytes, offset, Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            array to convert
-	 * @param offset
-	 *            offset into array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes, int offset) {
-		return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
-	}
+  /**
+   * @param f
+   *          float value
+   * @return the float represented as byte []
+   */
+  public static byte[] toBytes(final float f) {
+    // Encode it as int
+    return Bytes.toBytes(Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param f
-	 *            float value
-	 * @return New offset in <code>bytes</code>
-	 */
-	public static int putFloat(byte[] bytes, int offset, float f) {
-		return putInt(bytes, offset, Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes) {
+    return toDouble(bytes, 0);
+  }
 
-	/**
-	 * @param f
-	 *            float value
-	 * @return the float represented as byte []
-	 */
-	public static byte[] toBytes(final float f) {
-		// Encode it as int
-		return Bytes.toBytes(Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset where double is
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes, final int offset) {
+    return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes) {
-		return toDouble(bytes, 0);
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param d
+   *          value
+   * @return New offset into array <code>bytes</code>
+   */
+  public static int putDouble(byte[] bytes, int offset, double d) {
+    return putLong(bytes, offset, Double.doubleToLongBits(d));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset where double is
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes, final int offset) {
-		return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
-	}
+  /**
+   * Serialize a double as the IEEE 754 double format output. The resultant
+   * array will be 8 bytes long.
+   * 
+   * @param d
+   *          value
+   * @return the double represented as byte []
+   */
+  public static byte[] toBytes(final double d) {
+    // Encode it as a long
+    return Bytes.toBytes(Double.doubleToRawLongBits(d));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param d
-	 *            value
-	 * @return New offset into array <code>bytes</code>
-	 */
-	public static int putDouble(byte[] bytes, int offset, double d) {
-		return putLong(bytes, offset, Double.doubleToLongBits(d));
-	}
+  /**
+   * Convert an int value to a byte array
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(int val) {
+    byte[] b = new byte[4];
+    for (int i = 3; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Serialize a double as the IEEE 754 double format output. The resultant
-	 * array will be 8 bytes long.
-	 * 
-	 * @param d
-	 *            value
-	 * @return the double represented as byte []
-	 */
-	public static byte[] toBytes(final double d) {
-		// Encode it as a long
-		return Bytes.toBytes(Double.doubleToRawLongBits(d));
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes) {
+    return toInt(bytes, 0, SIZEOF_INT);
+  }
 
-	/**
-	 * Convert an int value to a byte array
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(int val) {
-		byte[] b = new byte[4];
-		for (int i = 3; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes, int offset) {
+    return toInt(bytes, offset, SIZEOF_INT);
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes) {
-		return toInt(bytes, 0, SIZEOF_INT);
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of int (has to be {@link #SIZEOF_INT})
+   * @return the int value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_INT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static int toInt(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_INT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
+    }
+    int n = 0;
+    for (int i = offset; i < (offset + length); i++) {
+      n <<= 8;
+      n ^= bytes[i] & 0xFF;
+    }
+    return n;
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes, int offset) {
-		return toInt(bytes, offset, SIZEOF_INT);
-	}
+  /**
+   * Put an int value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          int to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putInt(byte[] bytes, int offset, int val) {
+    if (bytes.length - offset < SIZEOF_INT) {
+      throw new IllegalArgumentException("Not enough room to put an int at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 3; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_INT;
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of int (has to be {@link #SIZEOF_INT})
-	 * @return the int value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_INT} or if there's not enough
-	 *             room in the array at the offset indicated.
-	 */
-	public static int toInt(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_INT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
-		}
-		int n = 0;
-		for (int i = offset; i < (offset + length); i++) {
-			n <<= 8;
-			n ^= bytes[i] & 0xFF;
-		}
-		return n;
-	}
+  /**
+   * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(short val) {
+    byte[] b = new byte[SIZEOF_SHORT];
+    b[1] = (byte) val;
+    val >>= 8;
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Put an int value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            int to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putInt(byte[] bytes, int offset, int val) {
-		if (bytes.length - offset < SIZEOF_INT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put an int at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 3; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_INT;
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes) {
+    return toShort(bytes, 0, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes
-	 * long.
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(short val) {
-		byte[] b = new byte[SIZEOF_SHORT];
-		b[1] = (byte) val;
-		val >>= 8;
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes, int offset) {
+    return toShort(bytes, offset, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes) {
-		return toShort(bytes, 0, SIZEOF_SHORT);
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length, has to be {@link #SIZEOF_SHORT}
+   * @return the short value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_SHORT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static short toShort(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_SHORT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT);
+    }
+    short n = 0;
+    n ^= bytes[offset] & 0xFF;
+    n <<= 8;
+    n ^= bytes[offset + 1] & 0xFF;
+    return n;
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes, int offset) {
-		return toShort(bytes, offset, SIZEOF_SHORT);
-	}
+  /**
+   * Put a short value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          short to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putShort(byte[] bytes, int offset, short val) {
+    if (bytes.length - offset < SIZEOF_SHORT) {
+      throw new IllegalArgumentException("Not enough room to put a short at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    bytes[offset + 1] = (byte) val;
+    val >>= 8;
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_SHORT;
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length, has to be {@link #SIZEOF_SHORT}
-	 * @return the short value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_SHORT} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static short toShort(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_SHORT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length,
-					SIZEOF_SHORT);
-		}
-		short n = 0;
-		n ^= bytes[offset] & 0xFF;
-		n <<= 8;
-		n ^= bytes[offset + 1] & 0xFF;
-		return n;
-	}
+  /**
+   * @param vint
+   *          Integer to make a vint of.
+   * @return Vint as bytes array.
+   */
+  public static byte[] vintToBytes(final long vint) {
+    long i = vint;
+    int size = WritableUtils.getVIntSize(i);
+    byte[] result = new byte[size];
+    int offset = 0;
+    if (i >= -112 && i <= 127) {
+      result[offset] = (byte) i;
+      return result;
+    }
 
-	/**
-	 * Put a short value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            short to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putShort(byte[] bytes, int offset, short val) {
-		if (bytes.length - offset < SIZEOF_SHORT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a short at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		bytes[offset + 1] = (byte) val;
-		val >>= 8;
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_SHORT;
-	}
+    int len = -112;
+    if (i < 0) {
+      i ^= -1L; // take one's complement'
+      len = -120;
+    }
 
-	/**
-	 * @param vint
-	 *            Integer to make a vint of.
-	 * @return Vint as bytes array.
-	 */
-	public static byte[] vintToBytes(final long vint) {
-		long i = vint;
-		int size = WritableUtils.getVIntSize(i);
-		byte[] result = new byte[size];
-		int offset = 0;
-		if (i >= -112 && i <= 127) {
-			result[offset] = (byte) i;
-			return result;
-		}
+    long tmp = i;
+    while (tmp != 0) {
+      tmp = tmp >> 8;
+      len--;
+    }
 
-		int len = -112;
-		if (i < 0) {
-			i ^= -1L; // take one's complement'
-			len = -120;
-		}
+    result[offset++] = (byte) len;
 
-		long tmp = i;
-		while (tmp != 0) {
-			tmp = tmp >> 8;
-			len--;
-		}
+    len = (len < -120) ? -(len + 120) : -(len + 112);
 
-		result[offset++] = (byte) len;
+    for (int idx = len; idx != 0; idx--) {
+      int shiftbits = (idx - 1) * 8;
+      long mask = 0xFFL << shiftbits;
+      result[offset++] = (byte) ((i & mask) >> shiftbits);
+    }
+    return result;
+  }
 
-		len = (len < -120) ? -(len + 120) : -(len + 112);
+  /**
+   * @param buffer
+   *          buffer to convert
+   * @return vint bytes as an integer.
+   */
+  public static long bytesToVint(final byte[] buffer) {
+    int offset = 0;
+    byte firstByte = buffer[offset++];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset++];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-		for (int idx = len; idx != 0; idx--) {
-			int shiftbits = (idx - 1) * 8;
-			long mask = 0xFFL << shiftbits;
-			result[offset++] = (byte) ((i & mask) >> shiftbits);
-		}
-		return result;
-	}
+  /**
+   * Reads a zero-compressed encoded long from input stream and returns it.
+   * 
+   * @param buffer
+   *          Binary array
+   * @param offset
+   *          Offset into array at which vint begins.
+   * @throws java.io.IOException
+   *           e
+   * @return deserialized long from stream.
+   */
+  public static long readVLong(final byte[] buffer, final int offset)
+      throws IOException {
+    byte firstByte = buffer[offset];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset + 1 + idx];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-	/**
-	 * @param buffer
-	 *            buffer to convert
-	 * @return vint bytes as an integer.
-	 */
-	public static long bytesToVint(final byte[] buffer) {
-		int offset = 0;
-		byte firstByte = buffer[offset++];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset++];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(final byte[] left, final byte[] right) {
+    return compareTo(left, 0, left.length, right, 0, right.length);
+  }
 
-	/**
-	 * Reads a zero-compressed encoded long from input stream and returns it.
-	 * 
-	 * @param buffer
-	 *            Binary array
-	 * @param offset
-	 *            Offset into array at which vint begins.
-	 * @throws java.io.IOException
-	 *             e
-	 * @return deserialized long from stream.
-	 */
-	public static long readVLong(final byte[] buffer, final int offset)
-			throws IOException {
-		byte firstByte = buffer[offset];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset + 1 + idx];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * Lexographically compare two arrays.
+   * 
+   * @param buffer1
+   *          left operand
+   * @param buffer2
+   *          right operand
+   * @param offset1
+   *          Where to start comparing in the left buffer
+   * @param offset2
+   *          Where to start comparing in the right buffer
+   * @param length1
+   *          How much to compare from the left buffer
+   * @param length2
+   *          How much to compare from the right buffer
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(byte[] buffer1, int offset1, int length1,
+      byte[] buffer2, int offset2, int length2) {
+    // Bring WritableComparator code local
+    int end1 = offset1 + length1;
+    int end2 = offset2 + length2;
+    for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+      int a = (buffer1[i] & 0xff);
+      int b = (buffer2[j] & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return length1 - length2;
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(final byte[] left, final byte[] right) {
-		return compareTo(left, 0, left.length, right, 0, right.length);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return True if equal
+   */
+  public static boolean equals(final byte[] left, final byte[] right) {
+    // Could use Arrays.equals?
+    // noinspection SimplifiableConditionalExpression
+    if (left == null && right == null) {
+      return true;
+    }
+    return (left == null || right == null || (left.length != right.length) ? false
+        : compareTo(left, right) == 0);
+  }
 
-	/**
-	 * Lexographically compare two arrays.
-	 * 
-	 * @param buffer1
-	 *            left operand
-	 * @param buffer2
-	 *            right operand
-	 * @param offset1
-	 *            Where to start comparing in the left buffer
-	 * @param offset2
-	 *            Where to start comparing in the right buffer
-	 * @param length1
-	 *            How much to compare from the left buffer
-	 * @param length2
-	 *            How much to compare from the right buffer
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(byte[] buffer1, int offset1, int length1,
-			byte[] buffer2, int offset2, int length2) {
-		// Bring WritableComparator code local
-		int end1 = offset1 + length1;
-		int end2 = offset2 + length2;
-		for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
-			int a = (buffer1[i] & 0xff);
-			int b = (buffer2[j] & 0xff);
-			if (a != b) {
-				return a - b;
-			}
-		}
-		return length1 - length2;
-	}
+  /**
+   * Return true if the byte array on the right is a prefix of the byte array on
+   * the left.
+   */
+  public static boolean startsWith(byte[] bytes, byte[] prefix) {
+    return bytes != null && prefix != null && bytes.length >= prefix.length
+        && compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return True if equal
-	 */
-	public static boolean equals(final byte[] left, final byte[] right) {
-		// Could use Arrays.equals?
-		// noinspection SimplifiableConditionalExpression
-		if (left == null && right == null) {
-			return true;
-		}
-		return (left == null || right == null || (left.length != right.length) ? false
-				: compareTo(left, right) == 0);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b) {
+    return hashCode(b, b.length);
+  }
 
-	/**
-	 * Return true if the byte array on the right is a prefix of the byte array
-	 * on the left.
-	 */
-	public static boolean startsWith(byte[] bytes, byte[] prefix) {
-		return bytes != null
-				&& prefix != null
-				&& bytes.length >= prefix.length
-				&& compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
-	}
+  /**
+   * @param b
+   *          value
+   * @param length
+   *          length of the value
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b, final int length) {
+    return WritableComparator.hashBytes(b, length);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b) {
-		return hashCode(b, b.length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b) {
+    return hashCode(b);
+  }
 
-	/**
-	 * @param b
-	 *            value
-	 * @param length
-	 *            length of the value
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b, final int length) {
-		return WritableComparator.hashBytes(b, length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @param length
+   *          length to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b, final int length) {
+    return hashCode(b, length);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b) {
-		return hashCode(b);
-	}
+  /**
+   * @param a
+   *          lower half
+   * @param b
+   *          upper half
+   * @return New array that has a in lower half and b in upper half.
+   */
+  public static byte[] add(final byte[] a, final byte[] b) {
+    return add(a, b, EMPTY_BYTE_ARRAY);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @param length
-	 *            length to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b, final int length) {
-		return hashCode(b, length);
-	}
+  /**
+   * @param a
+   *          first third
+   * @param b
+   *          second third
+   * @param c
+   *          third third
+   * @return New array made from a, b and c
+   */
+  public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
+    byte[] result = new byte[a.length + b.length + c.length];
+    System.arraycopy(a, 0, result, 0, a.length);
+    System.arraycopy(b, 0, result, a.length, b.length);
+    System.arraycopy(c, 0, result, a.length + b.length, c.length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            lower half
-	 * @param b
-	 *            upper half
-	 * @return New array that has a in lower half and b in upper half.
-	 */
-	public static byte[] add(final byte[] a, final byte[] b) {
-		return add(a, b, EMPTY_BYTE_ARRAY);
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to grab
+   * @return First <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] head(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, 0, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            first third
-	 * @param b
-	 *            second third
-	 * @param c
-	 *            third third
-	 * @return New array made from a, b and c
-	 */
-	public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
-		byte[] result = new byte[a.length + b.length + c.length];
-		System.arraycopy(a, 0, result, 0, a.length);
-		System.arraycopy(b, 0, result, a.length, b.length);
-		System.arraycopy(c, 0, result, a.length + b.length, c.length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to snarf
+   * @return Last <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] tail(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, a.length - length, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to grab
-	 * @return First <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] head(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, 0, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+   */
+  public static byte[] padHead(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(padding, a);
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to snarf
-	 * @return Last <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] tail(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, a.length - length, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+   */
+  public static byte[] padTail(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(a, padding);
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> prepended 0
-	 *         bytes
-	 */
-	public static byte[] padHead(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(padding, a);
-	}
+  /**
+   * Split passed range. Expensive operation relatively. Uses BigInteger math.
+   * Useful splitting ranges for MapReduce jobs.
+   * 
+   * @param a
+   *          Beginning of range
+   * @param b
+   *          End of range
+   * @param num
+   *          Number of times to split range. Pass 1 if you want to split the
+   *          range in two; i.e. one split.
+   * @return Array of dividing values
+   */
+  public static byte[][] split(final byte[] a, final byte[] b, final int num) {
+    byte[][] ret = new byte[num + 2][];
+    int i = 0;
+    Iterable<byte[]> iter = iterateOnSplits(a, b, num);
+    if (iter == null)
+      return null;
+    for (byte[] elem : iter) {
+      ret[i++] = elem;
+    }
+    return ret;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
-	 */
-	public static byte[] padTail(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(a, padding);
-	}
+  /**
+   * Iterate over keys within the passed inclusive range.
+   */
+  public static Iterable<byte[]> iterateOnSplits(final byte[] a,
+      final byte[] b, final int num) {
+    byte[] aPadded;
+    byte[] bPadded;
+    if (a.length < b.length) {
+      aPadded = padTail(a, b.length - a.length);
+      bPadded = b;
+    } else if (b.length < a.length) {
+      aPadded = a;
+      bPadded = padTail(b, a.length - b.length);
+    } else {
+      aPadded = a;
+      bPadded = b;
+    }
+    if (compareTo(aPadded, bPadded) >= 0) {
+      throw new IllegalArgumentException("b <= a");
+    }
+    if (num <= 0) {
+      throw new IllegalArgumentException("num cannot be < 0");
+    }
+    byte[] prependHeader = { 1, 0 };
+    final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+    final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+    final BigInteger diffBI = stopBI.subtract(startBI);
+    final BigInteger splitsBI = BigInteger.valueOf(num + 1);
+    if (diffBI.compareTo(splitsBI) < 0) {
+      return null;
+    }
+    final BigInteger intervalBI;
+    try {
+      intervalBI = diffBI.divide(splitsBI);
+    } catch (Exception e) {
+      LOG.error("Exception caught during division", e);
+      return null;
+    }
 
-	/**
-	 * Split passed range. Expensive operation relatively. Uses BigInteger math.
-	 * Useful splitting ranges for MapReduce jobs.
-	 * 
-	 * @param a
-	 *            Beginning of range
-	 * @param b
-	 *            End of range
-	 * @param num
-	 *            Number of times to split range. Pass 1 if you want to split
-	 *            the range in two; i.e. one split.
-	 * @return Array of dividing values
-	 */
-	public static byte[][] split(final byte[] a, final byte[] b, final int num) {
-		byte[][] ret = new byte[num + 2][];
-		int i = 0;
-		Iterable<byte[]> iter = iterateOnSplits(a, b, num);
-		if (iter == null)
-			return null;
-		for (byte[] elem : iter) {
-			ret[i++] = elem;
-		}
-		return ret;
-	}
+    final Iterator<byte[]> iterator = new Iterator<byte[]>() {
+      private int i = -1;
 
-	/**
-	 * Iterate over keys within the passed inclusive range.
-	 */
-	public static Iterable<byte[]> iterateOnSplits(final byte[] a,
-			final byte[] b, final int num) {
-		byte[] aPadded;
-		byte[] bPadded;
-		if (a.length < b.length) {
-			aPadded = padTail(a, b.length - a.length);
-			bPadded = b;
-		} else if (b.length < a.length) {
-			aPadded = a;
-			bPadded = padTail(b, a.length - b.length);
-		} else {
-			aPadded = a;
-			bPadded = b;
-		}
-		if (compareTo(aPadded, bPadded) >= 0) {
-			throw new IllegalArgumentException("b <= a");
-		}
-		if (num <= 0) {
-			throw new IllegalArgumentException("num cannot be < 0");
-		}
-		byte[] prependHeader = { 1, 0 };
-		final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
-		final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
-		final BigInteger diffBI = stopBI.subtract(startBI);
-		final BigInteger splitsBI = BigInteger.valueOf(num + 1);
-		if (diffBI.compareTo(splitsBI) < 0) {
-			return null;
-		}
-		final BigInteger intervalBI;
-		try {
-			intervalBI = diffBI.divide(splitsBI);
-		} catch (Exception e) {
-			LOG.error("Exception caught during division", e);
-			return null;
-		}
+      @Override
+      public boolean hasNext() {
+        return i < num + 1;
+      }
 
-		final Iterator<byte[]> iterator = new Iterator<byte[]>() {
-			private int i = -1;
+      @Override
+      public byte[] next() {
+        i++;
+        if (i == 0)
+          return a;
+        if (i == num + 1)
+          return b;
 
-			@Override
-			public boolean hasNext() {
-				return i < num + 1;
-			}
+        BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
+            .valueOf(i)));
+        byte[] padded = curBI.toByteArray();
+        if (padded[1] == 0)
+          padded = tail(padded, padded.length - 2);
+        else
+          padded = tail(padded, padded.length - 1);
+        return padded;
+      }
 
-			@Override
-			public byte[] next() {
-				i++;
-				if (i == 0)
-					return a;
-				if (i == num + 1)
-					return b;
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
 
-				BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
-						.valueOf(i)));
-				byte[] padded = curBI.toByteArray();
-				if (padded[1] == 0)
-					padded = tail(padded, padded.length - 2);
-				else
-					padded = tail(padded, padded.length - 1);
-				return padded;
-			}
+    };
 
-			@Override
-			public void remove() {
-				throw new UnsupportedOperationException();
-			}
+    return new Iterable<byte[]>() {
+      @Override
+      public Iterator<byte[]> iterator() {
+        return iterator;
+      }
+    };
+  }
 
-		};
+  /**
+   * @param t
+   *          operands
+   * @return Array of byte arrays made from passed array of Text
+   */
+  public static byte[][] toByteArrays(final String[] t) {
+    byte[][] result = new byte[t.length][];
+    for (int i = 0; i < t.length; i++) {
+      result[i] = Bytes.toBytes(t[i]);
+    }
+    return result;
+  }
 
-		return new Iterable<byte[]>() {
-			@Override
-			public Iterator<byte[]> iterator() {
-				return iterator;
-			}
-		};
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final String column) {
+    return toByteArrays(toBytes(column));
+  }
 
-	/**
-	 * @param t
-	 *            operands
-	 * @return Array of byte arrays made from passed array of Text
-	 */
-	public static byte[][] toByteArrays(final String[] t) {
-		byte[][] result = new byte[t.length][];
-		for (int i = 0; i < t.length; i++) {
-			result[i] = Bytes.toBytes(t[i]);
-		}
-		return result;
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final byte[] column) {
+    byte[][] result = new byte[1][];
+    result[0] = column;
+    return result;
+  }
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final String column) {
-		return toByteArrays(toBytes(column));
-	}
+  /**
+   * Binary search for keys in indexes.
+   * 
+   * @param arr
+   *          array of byte arrays to search for
+   * @param key
+   *          the key you want to find
+   * @param offset
+   *          the offset in the key you want to find
+   * @param length
+   *          the length of the key
+   * @param comparator
+   *          a comparator to compare.
+   * @return index of key
+   */
+  public static int binarySearch(byte[][] arr, byte[] key, int offset,
+      int length, RawComparator<byte[]> comparator) {
+    int low = 0;
+    int high = arr.length - 1;
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final byte[] column) {
-		byte[][] result = new byte[1][];
-		result[0] = column;
-		return result;
-	}
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+      // we have to compare in this order, because the comparator order
+      // has special logic when the 'left side' is a special key.
+      int cmp = comparator.compare(key, offset, length, arr[mid], 0,
+          arr[mid].length);
+      // key lives above the midpoint
+      if (cmp > 0)
+        low = mid + 1;
+      // key lives below the midpoint
+      else if (cmp < 0)
+        high = mid - 1;
+      // BAM. how often does this really happen?
+      else
+        return mid;
+    }
+    return -(low + 1);
+  }
 
-	/**
-	 * Binary search for keys in indexes.
-	 * 
-	 * @param arr
-	 *            array of byte arrays to search for
-	 * @param key
-	 *            the key you want to find
-	 * @param offset
-	 *            the offset in the key you want to find
-	 * @param length
-	 *            the length of the key
-	 * @param comparator
-	 *            a comparator to compare.
-	 * @return index of key
-	 */
-	public static int binarySearch(byte[][] arr, byte[] key, int offset,
-			int length, RawComparator<byte[]> comparator) {
-		int low = 0;
-		int high = arr.length - 1;
+  /**
+   * Bytewise binary increment/deincrement of long contained in byte array on
+   * given amount.
+   * 
+   * @param value
+   *          - array of bytes containing long (length <= SIZEOF_LONG)
+   * @param amount
+   *          value will be incremented on (deincremented if negative)
+   * @return array of bytes containing incremented long (length == SIZEOF_LONG)
+   * @throws IOException
+   *           - if value.length > SIZEOF_LONG
+   */
+  public static byte[] incrementBytes(byte[] value, long amount)
+      throws IOException {
+    byte[] val = value;
+    if (val.length < SIZEOF_LONG) {
+      // Hopefully this doesn't happen too often.
+      byte[] newvalue;
+      if (val[0] < 0) {
+        newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
+      } else {
+        newvalue = new byte[SIZEOF_LONG];
+      }
+      System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
+          val.length);
+      val = newvalue;
+    } else if (val.length > SIZEOF_LONG) {
+      throw new IllegalArgumentException("Increment Bytes - value too big: "
+          + val.length);
+    }
+    if (amount == 0)
+      return val;
+    if (val[0] < 0) {
+      return binaryIncrementNeg(val, amount);
+    }
+    return binaryIncrementPos(val, amount);
+  }
 
-		while (low <= high) {
-			int mid = (low + high) >>> 1;
-			// we have to compare in this order, because the comparator order
-			// has special logic when the 'left side' is a special key.
-			int cmp = comparator.compare(key, offset, length, arr[mid], 0,
-					arr[mid].length);
-			// key lives above the midpoint
-			if (cmp > 0)
-				low = mid + 1;
-			// key lives below the midpoint
-			else if (cmp < 0)
-				high = mid - 1;
-			// BAM. how often does this really happen?
-			else
-				return mid;
-		}
-		return -(low + 1);
-	}
+  /* increment/deincrement for positive value */
+  private static byte[] binaryIncrementPos(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = value[value.length - i - 1] & 0x0ff;
+      int total = val + cur;
+      if (total > 255) {
+        amo += sign;
+        total %= 256;
+      } else if (total < 0) {
+        amo -= sign;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
-	/**
-	 * Bytewise binary increment/deincrement of long contained in byte array on
-	 * given amount.
-	 * 
-	 * @param value
-	 *            - array of bytes containing long (length <= SIZEOF_LONG)
-	 * @param amount
-	 *            value will be incremented on (deincremented if negative)
-	 * @return array of bytes containing incremented long (length ==
-	 *         SIZEOF_LONG)
-	 * @throws IOException
-	 *             - if value.length > SIZEOF_LONG
-	 */
-	public static byte[] incrementBytes(byte[] value, long amount)
-			throws IOException {
-		byte[] val = value;
-		if (val.length < SIZEOF_LONG) {
-			// Hopefully this doesn't happen too often.
-			byte[] newvalue;
-			if (val[0] < 0) {
-				newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
-			} else {
-				newvalue = new byte[SIZEOF_LONG];
-			}
-			System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
-					val.length);
-			val = newvalue;
-		} else if (val.length > SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Increment Bytes - value too big: " + val.length);
-		}
-		if (amount == 0)
-			return val;
-		if (val[0] < 0) {
-			return binaryIncrementNeg(val, amount);
-		}
-		return binaryIncrementPos(val, amount);
-	}
+  /* increment/deincrement for negative value */
+  private static byte[] binaryIncrementNeg(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
+      int total = cur - val;
+      if (total >= 0) {
+        amo += sign;
+      } else if (total < -256) {
+        amo -= sign;
+        total %= 256;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
-	/* increment/deincrement for positive value */
-	private static byte[] binaryIncrementPos(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = value[value.length - i - 1] & 0x0ff;
-			int total = val + cur;
-			if (total > 255) {
-				amo += sign;
-				total %= 256;
-			} else if (total < 0) {
-				amo -= sign;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
-
-	/* increment/deincrement for negative value */
-	private static byte[] binaryIncrementNeg(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
-			int total = cur - val;
-			if (total >= 0) {
-				amo += sign;
-			} else if (total < -256) {
-				amo -= sign;
-				total %= 256;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
-
 }
Index: src/java/org/apache/nutch/util/CommandRunner.java
===================================================================
--- src/java/org/apache/nutch/util/CommandRunner.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/CommandRunner.java	(working copy)
@@ -82,11 +82,11 @@
   }
 
   public void evaluate() throws IOException {
-      this.exec();
+    this.exec();
   }
 
   /**
-   *
+   * 
    * @return process exit value (return code) or -1 if timed out.
    * @throws IOException
    */
@@ -94,13 +94,11 @@
     Process proc = Runtime.getRuntime().exec(_command);
     _barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
 
-    PullerThread so =
-      new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+    PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
     so.setDaemon(true);
     so.start();
 
-    PullerThread se =
-      new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+    PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
     se.setDaemon(true);
     se.start();
 
@@ -145,11 +143,11 @@
             Thread.sleep(1000);
             _xit = proc.exitValue();
           } catch (InterruptedException ie) {
-              if (Thread.interrupted()) {
-                  break; // stop waiting on an interrupt for this thread
-              } else {
-                  continue;
-              }
+            if (Thread.interrupted()) {
+              break; // stop waiting on an interrupt for this thread
+            } else {
+              continue;
+            }
           } catch (IllegalThreadStateException iltse) {
             continue;
           }
@@ -181,11 +179,8 @@
 
     private boolean _closeInput;
 
-    protected PumperThread(
-      String name,
-      InputStream is,
-      OutputStream os,
-      boolean closeInput) {
+    protected PumperThread(String name, InputStream is, OutputStream os,
+        boolean closeInput) {
       super(name);
       _is = is;
       _os = os;
@@ -218,12 +213,12 @@
         }
       }
       try {
-         _barrier.await();
-       } catch (InterruptedException ie) {
-         /* IGNORE */
-       } catch (BrokenBarrierException bbe) {
-         /* IGNORE */
-       }
+        _barrier.await();
+      } catch (InterruptedException ie) {
+        /* IGNORE */
+      } catch (BrokenBarrierException bbe) {
+        /* IGNORE */
+      }
     }
   }
 
@@ -269,8 +264,9 @@
 
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-timeout")) {
-        timeout = Integer.parseInt(args[++i]);;
-      } else if (i != args.length-2) {
+        timeout = Integer.parseInt(args[++i]);
+        ;
+      } else if (i != args.length - 2) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -290,6 +286,6 @@
 
     cr.evaluate();
 
-    System.err.println("output value: "+cr.getExitValue());
+    System.err.println("output value: " + cr.getExitValue());
   }
 }
Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
  */
 public class DeflateUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
   private static final int EXPECTED_COMPRESSION_RATIO = 5;
   private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an inflated copy of the input array.  If the deflated 
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an inflated copy of the input array. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in) {
     return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,37 +47,36 @@
 
   /**
    * Returns an inflated copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
     // "true" because HTTP does not provide zlib headers
     Inflater inflater = new Inflater(true);
-    InflaterInputStream inStream = 
-      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in), inflater);
 
     byte[] buf = new byte[BUF_SIZE];
     int written = 0;
     while (true) {
       try {
-	int size = inStream.read(buf);
-	if (size <= 0) 
-	  break;
-	if ((written + size) > sizeLimit) {
-	  outStream.write(buf, 0, sizeLimit - written);
-	  break;
-	}
-	outStream.write(buf, 0, size);
-	written+= size;
+        int size = inStream.read(buf);
+        if (size <= 0)
+          break;
+        if ((written + size) > sizeLimit) {
+          outStream.write(buf, 0, sizeLimit - written);
+          break;
+        }
+        outStream.write(buf, 0, size);
+        written += size;
       } catch (Exception e) {
-	LOG.info( "Caught Exception in inflateBestEffort", e );
-	break;
+        LOG.info("Caught Exception in inflateBestEffort", e);
+        break;
       }
     }
     try {
@@ -89,23 +87,24 @@
     return outStream.toByteArray();
   }
 
-
   /**
-   * Returns an inflated copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an inflated copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] inflate(byte[] in) throws IOException {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    InflaterInputStream inStream = 
-      new InflaterInputStream ( new ByteArrayInputStream(in) );
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -118,9 +117,9 @@
    * Returns a deflated copy of the input array.
    */
   public static final byte[] deflate(byte[] in) {
-    // compress using DeflaterOutputStream 
-    ByteArrayOutputStream byteOut = 
-      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+    // compress using DeflaterOutputStream
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+        / EXPECTED_COMPRESSION_RATIO);
 
     DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
 
Index: src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DomUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/DomUtil.java	(working copy)
@@ -38,7 +38,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class DomUtil {
 
   private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,16 +60,16 @@
       input.setEncoding("UTF-8");
       parser.parse(input);
       int i = 0;
-      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
-       i++;
-      } 
-      element = (Element)parser.getDocument().getChildNodes().item(i);
+      while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+        i++;
+      }
+      element = (Element) parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
-        LOG.error("Failed to find file: ", e);
+      LOG.error("Failed to find file: ", e);
     } catch (SAXException e) {
-        LOG.error("Failed with the following SAX exception: ", e);
+      LOG.error("Failed with the following SAX exception: ", e);
     } catch (IOException e) {
-        LOG.error("Failed with the following IOException", e);
+      LOG.error("Failed with the following IOException", e);
     }
     return element;
   }
@@ -93,13 +92,14 @@
       transformer.transform(source, result);
       os.flush();
     } catch (UnsupportedEncodingException e1) {
-        LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
+      LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
     } catch (IOException e1) {
-        LOG.error("Failed to with the following IOException: ", e1);
+      LOG.error("Failed to with the following IOException: ", e1);
     } catch (TransformerConfigurationException e2) {
-        LOG.error("Failed with the following TransformerConfigurationException: ", e2);
+      LOG.error(
+          "Failed with the following TransformerConfigurationException: ", e2);
     } catch (TransformerException ex) {
-       LOG.error("Failed with the following TransformerException: ", ex);
+      LOG.error("Failed with the following TransformerException: ", ex);
     }
   }
 }
Index: src/java/org/apache/nutch/util/EncodingDetector.java
===================================================================
--- src/java/org/apache/nutch/util/EncodingDetector.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/EncodingDetector.java	(working copy)
@@ -35,27 +35,26 @@
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@
 
     @Override
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence));
     }
   }
 
-  public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -164,16 +160,16 @@
 
   public void autoDetectClues(WebPage page, boolean filter) {
     autoDetectClues(page.getContent(), page.getContentType(),
-        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)), filter);
+        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)),
+        filter);
   }
 
   private void autoDetectClues(ByteBuffer dataBuffer, CharSequence typeUtf8,
-                               String encoding, boolean filter) {
+      String encoding, boolean filter) {
     int length = dataBuffer.remaining();
     String type = TableUtil.toString(typeUtf8);
 
-    if (minConfidence >= 0 && DETECTABLES.contains(type)
-        && length > MIN_LENGTH) {
+    if (minConfidence >= 0 && DETECTABLES.contains(type) && length > MIN_LENGTH) {
       CharsetMatch[] matches = null;
 
       // do all these in a try/catch; setText and detect/detectAll
@@ -214,12 +210,14 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param row URL's row
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param row
+   *          URL's row
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(WebPage page, String defaultValue) {
@@ -230,33 +228,33 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param baseUrl Base URL
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param baseUrl
+   *          Base URL
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   private String guessEncoding(String baseUrl, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
-
     if (LOG.isTraceEnabled()) {
       findDisagreements(baseUrl, clues);
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -268,8 +266,8 @@
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(baseUrl + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(baseUrl + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -289,10 +287,10 @@
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -314,9 +312,9 @@
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -331,7 +329,7 @@
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -339,13 +337,12 @@
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
+   * Parse the character encoding from the specified content type header. If the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
    * @param contentTypeUtf8
    */
   public static String parseCharacterEncoding(CharSequence contentTypeUtf8) {
@@ -361,51 +358,36 @@
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
   }
 
-  /*public static void main(String[] args) throws IOException {
-    if (args.length != 1) {
-      System.err.println("Usage: EncodingDetector <file>");
-      System.exit(1);
-    }
+  /*
+   * public static void main(String[] args) throws IOException { if (args.length
+   * != 1) { System.err.println("Usage: EncodingDetector <file>");
+   * System.exit(1); }
+   * 
+   * Configuration conf = NutchConfiguration.create(); EncodingDetector detector
+   * = new EncodingDetector(NutchConfiguration.create());
+   * 
+   * // do everything as bytes; don't want any conversion BufferedInputStream
+   * istr = new BufferedInputStream(new FileInputStream(args[0]));
+   * ByteArrayOutputStream ostr = new ByteArrayOutputStream(); byte[] bytes =
+   * new byte[1000]; boolean more = true; while (more) { int len =
+   * istr.read(bytes); if (len < bytes.length) { more = false; if (len > 0) {
+   * ostr.write(bytes, 0, len); } } else { ostr.write(bytes); } }
+   * 
+   * byte[] data = ostr.toByteArray(); MimeUtil mimeTypes = new MimeUtil(conf);
+   * 
+   * // make a fake Content Content content = new Content("", "", data,
+   * "text/html", new Metadata(), mimeTypes);
+   * 
+   * detector.autoDetectClues(content, true); String encoding =
+   * detector.guessEncoding(content,
+   * conf.get("parser.character.encoding.default"));
+   * System.out.println("Guessed encoding: " + encoding); }
+   */
 
-    Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
-
-    // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
-    ByteArrayOutputStream ostr = new ByteArrayOutputStream();
-    byte[] bytes = new byte[1000];
-    boolean more = true;
-    while (more) {
-      int len = istr.read(bytes);
-      if (len < bytes.length) {
-        more = false;
-        if (len > 0) {
-          ostr.write(bytes, 0, len);
-        }
-      } else {
-        ostr.write(bytes);
-      }
-    }
-
-    byte[] data = ostr.toByteArray();
-    MimeUtil mimeTypes = new MimeUtil(conf);
-
-    // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), mimeTypes);
-
-    detector.autoDetectClues(content, true);
-    String encoding = detector.guessEncoding(content,
-        conf.get("parser.character.encoding.default"));
-    System.out.println("Guessed encoding: " + encoding);
-  }*/
-
 }
Index: src/java/org/apache/nutch/util/FSUtils.java
===================================================================
--- src/java/org/apache/nutch/util/FSUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/FSUtils.java	(working copy)
@@ -33,16 +33,20 @@
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException {
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
Index: src/java/org/apache/nutch/util/GZIPUtils.java
===================================================================
--- src/java/org/apache/nutch/util/GZIPUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/GZIPUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,19 +47,18 @@
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
@@ -67,7 +65,7 @@
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
@@ -74,7 +72,7 @@
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -142,9 +140,9 @@
       return byteOut.toByteArray();
 
     } catch (IOException e) {
-        LOG.error("Failed with IOException", e);
+      LOG.error("Failed with IOException", e);
       return null;
     }
   }
-    
+
 }
Index: src/java/org/apache/nutch/util/GenericWritableConfigurable.java
===================================================================
--- src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(working copy)
@@ -24,12 +24,15 @@
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/HadoopFSUtil.java
===================================================================
--- src/java/org/apache/nutch/util/HadoopFSUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/HadoopFSUtil.java	(working copy)
@@ -25,48 +25,48 @@
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-        };
+    };
+  }
+
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
     }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
+    }
+    return res;
+  }
 
 }
Index: src/java/org/apache/nutch/util/Histogram.java
===================================================================
--- src/java/org/apache/nutch/util/Histogram.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/Histogram.java	(working copy)
@@ -72,8 +72,8 @@
   }
 
   public List<E> sortInverseByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator for
     // the compare method
@@ -93,8 +93,8 @@
   }
 
   public List<E> sortByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator for
     // the compare method
Index: src/java/org/apache/nutch/util/IdentityPageReducer.java
===================================================================
--- src/java/org/apache/nutch/util/IdentityPageReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/IdentityPageReducer.java	(working copy)
@@ -21,12 +21,12 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class IdentityPageReducer
-extends GoraReducer<String, WebPage, String, WebPage> {
+public class IdentityPageReducer extends
+    GoraReducer<String, WebPage, String, WebPage> {
 
   @Override
-  protected void reduce(String key, Iterable<WebPage> values,
-      Context context) throws IOException, InterruptedException {
+  protected void reduce(String key, Iterable<WebPage> values, Context context)
+      throws IOException, InterruptedException {
     for (WebPage page : values) {
       context.write(key, page);
     }
Index: src/java/org/apache/nutch/util/LockUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LockUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/LockUtil.java	(working copy)
@@ -28,22 +28,29 @@
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -37,7 +37,7 @@
 // Slf4j logging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
- 
+
 // imported for Javadoc
 import org.apache.nutch.protocol.ProtocolOutput;
 
@@ -45,12 +45,12 @@
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/">Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -66,7 +66,8 @@
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     tika = new Tika();
@@ -75,25 +76,26 @@
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -129,14 +131,13 @@
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
-   * is found, then that mime type is used, otherwise URL resolution is
-   * used to try and determine the mime type. However, if
-   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
-   * better-than-the-default approximation of the {@link MimeType}.
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
+   * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise URL resolution is used to try and determine
+   * the mime type. However, if <code>mime.type.magic</code> is enabled in
+   * {@link NutchConfiguration}, then mime type magic resolution is used to try
+   * and obtain a better-than-the-default approximation of the {@link MimeType}.
    * 
    * @param typeName
    *          The original mime type, returned from a {@link ProtocolOutput}.
@@ -177,7 +178,7 @@
         throw new RuntimeException(e);
       }
     } else {
-        retType = type.getName();
+      retType = type.getName();
     }
 
     // if magic is enabled use mime magic to guess if the mime type returned
@@ -195,14 +196,15 @@
         InputStream stream = TikaInputStream.get(data);
         try {
           magicType = tika.detect(stream, tikaMeta);
-       } finally {
-         stream.close();
+        } finally {
+          stream.close();
         }
-      } catch (IOException ignore) {}
+      } catch (IOException ignore) {
+      }
 
       if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
-          && !magicType.equals(MimeTypes.PLAIN_TEXT)
-          && retType != null && !retType.equals(magicType)) {
+          && !magicType.equals(MimeTypes.PLAIN_TEXT) && retType != null
+          && !retType.equals(magicType)) {
 
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType
@@ -225,12 +227,12 @@
   /**
    * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(String)}
    * method.
-   *
+   * 
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given Document
+   *         url in string form.
    */
   public String getMimeType(String url) {
     return tika.detect(url);
@@ -239,11 +241,11 @@
   /**
    * A facade interface to Tika's underlying {@link MimeTypes#forName(String)}
    * method.
-   *
+   * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, or
+   *         null otherwise.
    */
   public String forName(String name) {
     try {
@@ -258,7 +260,7 @@
   /**
    * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(File)}
    * method.
-   *
+   * 
    * @param f
    *          The {@link File} to sense the {@link MimeType} for.
    * @return The {@link MimeType} of the given {@link File}, or null if it
Index: src/java/org/apache/nutch/util/NodeWalker.java
===================================================================
--- src/java/org/apache/nutch/util/NodeWalker.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NodeWalker.java	(working copy)
@@ -22,13 +22,17 @@
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,53 +51,58 @@
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
    * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
@@ -100,16 +109,19 @@
       }
     }
   }
-  
+
   /**
    * Return the current node.
+   * 
    * @return Node
    */
   public Node getCurrentNode() {
     return currentNode;
   }
-  
-  /**   * Returns true if there are more nodes on the current stack.
+
+  /**
+   * * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {
Index: src/java/org/apache/nutch/util/NutchConfiguration.java
===================================================================
--- src/java/org/apache/nutch/util/NutchConfiguration.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchConfiguration.java	(working copy)
@@ -23,28 +23,32 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
@@ -51,9 +55,10 @@
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first <code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@
     return conf;
   }
 }
-
Index: src/java/org/apache/nutch/util/NutchJob.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchJob.java	(working copy)
@@ -35,10 +35,10 @@
 
   public NutchJob(Configuration conf, String jobName) throws IOException {
     super(conf, jobName);
-    //prefix jobName with crawlId if not empty
+    // prefix jobName with crawlId if not empty
     String crawlId = conf.get("storage.crawl.id");
     if (!StringUtils.isEmpty(crawlId)) {
-      jobName = "["+crawlId+"]"+jobName;
+      jobName = "[" + crawlId + "]" + jobName;
       setJobName(jobName);
     }
     setJarByClass(this.getClass());
Index: src/java/org/apache/nutch/util/NutchJobConf.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJobConf.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchJobConf.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJobConf extends JobConf {
 
   public NutchJobConf(Configuration conf) {
@@ -28,4 +28,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/util/NutchTool.java
===================================================================
--- src/java/org/apache/nutch/util/NutchTool.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchTool.java	(working copy)
@@ -26,19 +26,20 @@
 import org.apache.nutch.metadata.Nutch;
 
 public abstract class NutchTool extends Configured {
-  
-  protected HashMap<String,Object> results = new HashMap<String,Object>();
-  protected Map<String,Object> status =
-    Collections.synchronizedMap(new HashMap<String,Object>());
+
+  protected HashMap<String, Object> results = new HashMap<String, Object>();
+  protected Map<String, Object> status = Collections
+      .synchronizedMap(new HashMap<String, Object>());
   protected Job currentJob;
   protected int numJobs;
   protected int currentJobNum;
-  
-  /** Runs the tool, using a map of arguments.
-   * May return results, or null.
+
+  /**
+   * Runs the tool, using a map of arguments. May return results, or null.
    */
-  public abstract Map<String,Object> run(Map<String,Object> args) throws Exception;
-  
+  public abstract Map<String, Object> run(Map<String, Object> args)
+      throws Exception;
+
   /** Returns relative progress of the tool, a float in range [0,1]. */
   public float getProgress() {
     float res = 0;
@@ -55,29 +56,31 @@
     }
     // take into account multiple jobs
     if (numJobs > 1) {
-      res = (currentJobNum + res) / (float)numJobs;
+      res = (currentJobNum + res) / (float) numJobs;
     }
     status.put(Nutch.STAT_PROGRESS, res);
     return res;
   }
-  
-  
+
   /** Returns current status of the running tool. */
-  public Map<String,Object> getStatus() {
+  public Map<String, Object> getStatus() {
     return status;
   }
-  
-  /** Stop the job with the possibility to resume. Subclasses should
-   * override this, since by default it calls {@link #killJob()}.
+
+  /**
+   * Stop the job with the possibility to resume. Subclasses should override
+   * this, since by default it calls {@link #killJob()}.
+   * 
    * @return true if succeeded, false otherwise
    */
   public boolean stopJob() throws Exception {
     return killJob();
   }
-  
+
   /**
-   * Kill the job immediately. Clients should assume that any results
-   * that the job produced so far are in inconsistent state or missing.
+   * Kill the job immediately. Clients should assume that any results that the
+   * job produced so far are in inconsistent state or missing.
+   * 
    * @return true if succeeded, false otherwise.
    * @throws Exception
    */
Index: src/java/org/apache/nutch/util/ObjectCache.java
===================================================================
--- src/java/org/apache/nutch/util/ObjectCache.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/ObjectCache.java	(working copy)
@@ -24,35 +24,33 @@
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
 
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>();
+
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-
Index: src/java/org/apache/nutch/util/PrefixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/PrefixStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/PrefixStringMatcher.java	(working copy)
@@ -21,46 +21,47 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection<String> prefixes) {
     super();
-    Iterator<String> iter= prefixes.iterator();
+    Iterator<String> iter = prefixes.iterator();
     while (iter.hasNext())
       addPatternForward(iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/StringUtil.java
===================================================================
--- src/java/org/apache/nutch/util/StringUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/StringUtil.java	(working copy)
@@ -20,42 +20,42 @@
 import java.nio.ByteBuffer;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
-
   /**
    * Convenience call for {@link #toHexString(ByteBuffer, String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(ByteBuffer buf) {
@@ -65,19 +65,25 @@
   /**
    * Get a text representation of a ByteBuffer as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(ByteBuffer buf, String sep, int lineLen) {
-    return toHexString(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(), sep, lineLen);
+    return toHexString(buf.array(), buf.arrayOffset() + buf.position(),
+        buf.remaining(), sep, lineLen);
   }
 
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -87,11 +93,15 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
     return toHexString(buf, 0, buf.length, sep, lineLen);
@@ -100,39 +110,53 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param of the offset into the byte[] to start reading
-   * @param cb the number of bytes to read from the byte[]
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param of
+   *          the offset into the byte[] to start reading
+   * @param cb
+   *          the number of bytes to read from the byte[]
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
-  public static String toHexString(byte[] buf, int of, int cb, String sep, int lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+  public static String toHexString(byte[] buf, int of, int cb, String sep,
+      int lineLen) {
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(cb * 2);
     for (int c = 0; c < cb; c++) {
       int b = buf[of++];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (c > 0 && (c % lineLen) == 0) res.append('\n');
-      else if (sep != null && c < lineLen - 1) res.append(sep);
+      if (c > 0 && (c % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && c < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -140,12 +164,13 @@
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -164,11 +189,12 @@
   public static boolean isEmpty(String str) {
     return (str == null) || (str.equals(""));
   }
-  
 
   /**
    * Takes in a String value and cleans out any offending "�"
-   * @param value the dirty String value.
+   * 
+   * @param value
+   *          the dirty String value.
    * @return clean String
    */
   public static String cleanField(String value) {
@@ -178,8 +204,8 @@
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }
Index: src/java/org/apache/nutch/util/SuffixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/SuffixStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/SuffixStringMatcher.java	(working copy)
@@ -21,8 +21,8 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -49,14 +49,14 @@
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} );
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/TableUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TableUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TableUtil.java	(working copy)
@@ -33,7 +33,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:8983:http/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -50,7 +50,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:http:8983/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -93,8 +93,11 @@
       pathBegin = reversedUrl.length();
     String sub = reversedUrl.substring(0, pathBegin);
 
-    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed host>, <port>, <protocol>}
-    
+    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed
+                                                                    // host>,
+                                                                    // <port>,
+                                                                    // <protocol>}
+
     buf.append(splits[1]); // add protocol
     buf.append("://");
     reverseAppendSplits(splits[0], buf); // splits[0] is reversed
@@ -110,7 +113,7 @@
   /**
    * Given a reversed url, returns the reversed host E.g
    * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
-   *
+   * 
    * @param reversedUrl
    *          Reversed url
    * @return Reversed host
@@ -120,7 +123,7 @@
   }
 
   private static void reverseAppendSplits(String string, StringBuilder buf) {
-    String[] splits = StringUtils.split(string,'.');
+    String[] splits = StringUtils.split(string, '.');
     if (splits.length > 0) {
       for (int i = splits.length - 1; i > 0; i--) {
         buf.append(splits[i]);
@@ -136,18 +139,18 @@
     StringBuilder buf = new StringBuilder();
     reverseAppendSplits(hostName, buf);
     return buf.toString();
-    
+
   }
+
   public static String unreverseHost(String reversedHostName) {
     return reverseHost(reversedHostName); // Reversible
   }
-  
-  
+
   /**
-   * Convert given Utf8 instance to String and and cleans out 
-   * any offending "�" from the String.
-   *
-   *
+   * Convert given Utf8 instance to String and and cleans out any offending "�"
+   * from the String.
+   * 
+   * 
    * @param utf8
    *          Utf8 object
    * @return string-ifed Utf8 object or null if Utf8 instance is null
Index: src/java/org/apache/nutch/util/TimingUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TimingUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TimingUtil.java	(working copy)
@@ -21,35 +21,39 @@
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
+    }
 
-        long[] elapsedTime = new long[TIME_FACTOR.length];
+    long[] elapsedTime = new long[TIME_FACTOR.length];
 
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
 
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
     }
+    return buf.toString();
+  }
 }
Index: src/java/org/apache/nutch/util/ToolUtil.java
===================================================================
--- src/java/org/apache/nutch/util/ToolUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/ToolUtil.java	(working copy)
@@ -28,7 +28,7 @@
 
 public class ToolUtil {
 
-  public static final Map<String,Object> toArgMap(Object... args) {
+  public static final Map<String, Object> toArgMap(Object... args) {
     if (args == null) {
       return null;
     }
@@ -35,7 +35,7 @@
     if (args.length % 2 != 0) {
       throw new RuntimeException("expected pairs of argName argValue");
     }
-    HashMap<String,Object> res = new HashMap<String,Object>();
+    HashMap<String, Object> res = new HashMap<String, Object>();
     for (int i = 0; i < args.length; i += 2) {
       if (args[i + 1] != null) {
         res.put(String.valueOf(args[i]), args[i + 1]);
@@ -43,20 +43,22 @@
     }
     return res;
   }
-  
+
   @SuppressWarnings("unchecked")
-  public static final void recordJobStatus(String label, Job job, Map<String,Object> results) {
-    Map<String,Object> jobs = (Map<String,Object>)results.get(Nutch.STAT_JOBS);
+  public static final void recordJobStatus(String label, Job job,
+      Map<String, Object> results) {
+    Map<String, Object> jobs = (Map<String, Object>) results
+        .get(Nutch.STAT_JOBS);
     if (jobs == null) {
-      jobs = new LinkedHashMap<String,Object>();
+      jobs = new LinkedHashMap<String, Object>();
       results.put(Nutch.STAT_JOBS, jobs);
     }
-    Map<String,Object> stats = new HashMap<String,Object>();
-    Map<String,Object> countStats = new HashMap<String,Object>();
+    Map<String, Object> stats = new HashMap<String, Object>();
+    Map<String, Object> countStats = new HashMap<String, Object>();
     try {
       Counters counters = job.getCounters();
       for (CounterGroup cg : counters) {
-        Map<String,Object> cnts = new HashMap<String,Object>();
+        Map<String, Object> cnts = new HashMap<String, Object>();
         countStats.put(cg.getDisplayName(), cnts);
         for (Counter c : cg) {
           cnts.put(c.getName(), c.getValue());
Index: src/java/org/apache/nutch/util/TrieStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/TrieStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TrieStringMatcher.java	(working copy)
@@ -17,21 +17,19 @@
 
 package org.apache.nutch.util;
 
-
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
 /**
- * TrieStringMatcher is a base class for simple tree-based string
- * matching.
- *
+ * TrieStringMatcher is a base class for simple tree-based string matching.
+ * 
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
 
   protected TrieStringMatcher() {
-    this.root= new TrieNode('\000', false);
+    this.root = new TrieNode('\000', false);
   }
 
   /**
@@ -44,20 +42,19 @@
     protected boolean terminal;
 
     /**
-     * Creates a new TrieNode, which contains the given
-     * <code>nodeChar</code>.  If <code>isTerminal</code> is
-     * <code>true</code>, the new node is a <em>terminal</em> node in
-     * the trie.
-     */  
+     * Creates a new TrieNode, which contains the given <code>nodeChar</code>.
+     * If <code>isTerminal</code> is <code>true</code>, the new node is a
+     * <em>terminal</em> node in the trie.
+     */
     TrieNode(char nodeChar, boolean isTerminal) {
-      this.nodeChar= nodeChar;
-      this.terminal= isTerminal;
-      this.childrenList= new LinkedList<TrieNode>();
+      this.nodeChar = nodeChar;
+      this.terminal = isTerminal;
+      this.childrenList = new LinkedList<TrieNode>();
     }
 
     /**
-     * Returns <code>true</code> if this node is a <em>terminal</em>
-     * node in the trie.
+     * Returns <code>true</code> if this node is a <em>terminal</em> node in the
+     * trie.
      */
     boolean isTerminal() {
       return terminal;
@@ -65,67 +62,68 @@
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists, one will be is
-     * added.  If <em>isTerminal</em> is <code>true</code>, the node 
-     * will be a terminal node in the trie.
+     * <code>nextChar</code>. If no such node exists, one will be is added. If
+     * <em>isTerminal</em> is <code>true</code>, the node will be a terminal
+     * node in the trie.
      */
     TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
       if (childrenList == null) {
-        childrenList= new LinkedList<TrieNode>();
+        childrenList = new LinkedList<TrieNode>();
         childrenList.addAll(Arrays.asList(children));
-        children= null;
+        children = null;
       }
 
       if (childrenList.size() == 0) {
-        TrieNode newNode= new TrieNode(nextChar, isTerminal);
+        TrieNode newNode = new TrieNode(nextChar, isTerminal);
         childrenList.add(newNode);
         return newNode;
       }
 
-      ListIterator<TrieNode> iter= childrenList.listIterator();
-      TrieNode node= iter.next();
-      while ( (node.nodeChar < nextChar) && iter.hasNext() ) 
-        node= iter.next();
-                        
+      ListIterator<TrieNode> iter = childrenList.listIterator();
+      TrieNode node = iter.next();
+      while ((node.nodeChar < nextChar) && iter.hasNext())
+        node = iter.next();
+
       if (node.nodeChar == nextChar) {
-        node.terminal= node.terminal | isTerminal;
+        node.terminal = node.terminal | isTerminal;
         return node;
       }
 
-      if (node.nodeChar > nextChar) 
+      if (node.nodeChar > nextChar)
         iter.previous();
 
-      TrieNode newNode= new TrieNode(nextChar, isTerminal);
+      TrieNode newNode = new TrieNode(nextChar, isTerminal);
       iter.add(newNode);
-      return newNode;                   
+      return newNode;
     }
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists,
-     * <code>null</code> is returned.
+     * <code>nextChar</code>. If no such node exists, <code>null</code> is
+     * returned.
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children= childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList= null;
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
         Arrays.sort(children);
       }
 
-      int min= 0;
-      int max= children.length - 1;
-      int mid= 0;
+      int min = 0;
+      int max = children.length - 1;
+      int mid = 0;
       while (min < max) {
-        mid= (min + max) / 2;
-        if (children[mid].nodeChar == nextChar) 
+        mid = (min + max) / 2;
+        if (children[mid].nodeChar == nextChar)
           return children[mid];
         if (children[mid].nodeChar < nextChar)
-          min= mid + 1;
-        else // if (children[mid].nodeChar > nextChar)
-          max= mid - 1;
+          min = mid + 1;
+        else
+          // if (children[mid].nodeChar > nextChar)
+          max = mid - 1;
       }
 
-      if (min == max) 
+      if (min == max)
         if (children[min].nodeChar == nextChar)
           return children[min];
 
@@ -133,11 +131,11 @@
     }
 
     public int compareTo(TrieNode other) {
-      if (this.nodeChar < other.nodeChar) 
+      if (this.nodeChar < other.nodeChar)
         return -1;
-      if (this.nodeChar == other.nodeChar) 
+      if (this.nodeChar == other.nodeChar)
         return 0;
-//    if (this.nodeChar > other.nodeChar) 
+      // if (this.nodeChar > other.nodeChar)
       return 1;
     }
   }
@@ -144,8 +142,8 @@
 
   /**
    * Returns the next {@link TrieNode} visited, given that you are at
-   * <code>node</code>, and the the next character in the input is 
-   * the <code>idx</code>'th character of <code>s</code>.
+   * <code>node</code>, and the the next character in the input is the
+   * <code>idx</code>'th character of <code>s</code>.
    */
   protected final TrieNode matchChar(TrieNode node, String s, int idx) {
     return node.getChild(s.charAt(idx));
@@ -152,40 +150,38 @@
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded and the last character is
-   * represented by a terminal node.  Zero-length <code>Strings</code>
-   * are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded and the last character is represented by a terminal node.
+   * Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternForward(String s) {
-    TrieNode node= root;
-    int stop= s.length() - 1;
+    TrieNode node = root;
+    int stop = s.length() - 1;
     int i;
     if (s.length() > 0) {
-      for (i= 0; i < stop; i++)
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(i), true);
+      for (i = 0; i < stop; i++)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(i), true);
     }
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded <em>in reverse</em> and the
-   * first character is represented by a terminal node.  Zero-length
-   * <code>Strings</code> are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded <em>in reverse</em> and the first character is represented
+   * by a terminal node. Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternBackward(String s) {
-    TrieNode node= root;
+    TrieNode node = root;
     if (s.length() > 0) {
-      for (int i= s.length()-1; i > 0; i--) 
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(0), true);
+      for (int i = s.length() - 1; i > 0; i--)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(0), true);
     }
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * pattern in the trie
+   * Returns true if the given <code>String</code> is matched by a pattern in
+   * the trie
    */
   public abstract boolean matches(String input);
 
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -28,15 +28,18 @@
 public class URLUtil {
 
   /**
-   * Resolve relative URL-s and fix a java.net.URL error
-   * in handling of URLs with pure query targets.
-   * @param base base url
-   * @param target target url (may be relative)
+   * Resolve relative URL-s and fix a java.net.URL error in handling of URLs
+   * with pure query targets.
+   * 
+   * @param base
+   *          base url
+   * @param target
+   *          target url (may be relative)
    * @return resolved absolute url.
    * @throws MalformedURLException
    */
   public static URL resolveURL(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
     target = target.trim();
 
     // handle the case that there is a target that is a pure query,
@@ -58,9 +61,10 @@
   }
 
   /** Handle the case in RFC3986 section 5.4.1 example 7, and similar. */
-   static URL fixPureQueryTargets(URL base, String target)
-          throws MalformedURLException {
-    if (!target.startsWith("?")) return new URL(base, target);
+  static URL fixPureQueryTargets(URL base, String target)
+      throws MalformedURLException {
+    if (!target.startsWith("?"))
+      return new URL(base, target);
 
     String basePath = base.getPath();
     String baseRightMost = "";
@@ -69,36 +73,40 @@
       baseRightMost = basePath.substring(baseRightMostIdx + 1);
     }
 
-    if (target.startsWith("?")) target = baseRightMost + target;
+    if (target.startsWith("?"))
+      target = baseRightMost + target;
 
     return new URL(base, target);
   }
 
-  private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
+  private static Pattern IP_PATTERN = Pattern
+      .compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new URL(http://lucene.apache.org/))
    *  </code><br>
-   *  will return <br><code> apache.org</code>
-   *   */
+   * will return <br>
+   * <code> apache.org</code>
+   * */
   public static String getDomainName(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    //it seems that java returns hostnames ending with .
-    if(host.endsWith("."))
+    // it seems that java returns hostnames ending with .
+    if (host.endsWith("."))
       host = host.substring(0, host.length() - 1);
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return host;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1); 
-      if(tlds.isDomainSuffix(subCandidate)) {
-        return candidate; 
+      String subCandidate = candidate.substring(index + 1);
+      if (tlds.isDomainSuffix(subCandidate)) {
+        return candidate;
       }
       candidate = subCandidate;
     }
@@ -105,12 +113,15 @@
     return candidate;
   }
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new http://lucene.apache.org/)
    *  </code><br>
-   *  will return <br><code> apache.org</code>
+   * will return <br>
+   * <code> apache.org</code>
+   * 
    * @throws MalformedURLException
    */
   public static String getDomainName(String url) throws MalformedURLException {
@@ -117,12 +128,12 @@
     return getDomainName(new URL(url));
   }
 
-  /** Returns whether the given urls have the same domain name.
-   * As an example, <br>
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
    * <code> isSameDomain(new URL("http://lucene.apache.org")
    * , new URL("http://people.apache.org/"))
    * <br> will return true. </code>
-   *
+   * 
    * @return true if the domain names are equal
    */
   public static boolean isSameDomainName(URL url1, URL url2) {
@@ -129,36 +140,38 @@
     return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
   }
 
-  /**Returns whether the given urls have the same domain name.
-  * As an example, <br>
-  * <code> isSameDomain("http://lucene.apache.org"
-  * ,"http://people.apache.org/")
-  * <br> will return true. </code>
-  * @return true if the domain names are equal
-  * @throws MalformedURLException
-  */
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
+   * <code> isSameDomain("http://lucene.apache.org"
+   * ,"http://people.apache.org/")
+   * <br> will return true. </code>
+   * 
+   * @return true if the domain names are equal
+   * @throws MalformedURLException
+   */
   public static boolean isSameDomainName(String url1, String url2)
-    throws MalformedURLException {
+      throws MalformedURLException {
     return isSameDomainName(new URL(url1), new URL(url2));
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
   public static DomainSuffix getDomainSuffix(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return null;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1);
+      String subCandidate = candidate.substring(index + 1);
       DomainSuffix d = tlds.get(subCandidate);
-      if(d != null) {
-        return d; 
+      if (d != null) {
+        return d;
       }
       candidate = subCandidate;
     }
@@ -165,34 +178,43 @@
     return null;
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
-  public static DomainSuffix getDomainSuffix(String url) throws MalformedURLException {
+  public static DomainSuffix getDomainSuffix(String url)
+      throws MalformedURLException {
     return getDomainSuffix(new URL(url));
   }
 
-  /** Partitions of the hostname of the url by "."  */
+  /** Partitions of the hostname of the url by "." */
   public static String[] getHostBatches(URL url) {
     String host = url.getHost();
-    //return whole hostname, if it is an ipv4
-    //TODO : handle ipv6
-    if(IP_PATTERN.matcher(host).matches())
-      return new String[] {host};
+    // return whole hostname, if it is an ipv4
+    // TODO : handle ipv6
+    if (IP_PATTERN.matcher(host).matches())
+      return new String[] { host };
     return host.split("\\.");
   }
 
-  /** Partitions of the hostname of the url by "."
-   * @throws MalformedURLException */
-  public static String[] getHostBatches(String url) throws MalformedURLException {
-   return getHostBatches(new URL(url));
+  /**
+   * Partitions of the hostname of the url by "."
+   * 
+   * @throws MalformedURLException
+   */
+  public static String[] getHostBatches(String url)
+      throws MalformedURLException {
+    return getHostBatches(new URL(url));
   }
 
   /**
-   * <p>Given two urls, a src and a destination of a redirect, it returns the 
-   * representative url.<p>
+   * <p>
+   * Given two urls, a src and a destination of a redirect, it returns the
+   * representative url.
+   * <p>
    * 
-   * <p>This method implements an extended version of the algorithm used by the
+   * <p>
+   * This method implements an extended version of the algorithm used by the
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html"> How
@@ -200,27 +222,39 @@
    * <br>
    * <ol>
    * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the 
+   * <li>If different domains the keep the destination whether or not the
    * redirect is temp or perm</li>
-   * <ul><li>a.com -> b.com*</li></ul>
+   * <ul>
+   * <li>a.com -> b.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and the source is root, keep the source.</li>
-   * <ul><li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li></ul>
-   * <li>If the redirect is permanent and the source is not root and the 
+   * <ul>
+   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
+   * </ul>
+   * <li>If the redirect is permanent and the source is not root and the
    * destination is root, keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and neither the source nor the destination
    * is root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com/abc/page.html*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
+   * </ul>
    * <li>If the redirect is temporary and source is root and destination is not
    * root, then keep the source</li>
-   * <ul><li>*a.com -> a.com/xyz/index.html</li></ul>
+   * <ul>
+   * <li>*a.com -> a.com/xyz/index.html</li>
+   * </ul>
    * <li>If the redirect is temporary and source is not root and destination is
    * root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is temporary and neither the source or the destination
-   * is root, then keep the shortest url.  First check for the shortest host,
-   * and if both are equal then check by path.  Path is first by length then by
-   * the number of / path separators.</li>
+   * is root, then keep the shortest url. First check for the shortest host, and
+   * if both are equal then check by path. Path is first by length then by the
+   * number of / path separators.</li>
    * <ul>
    * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
    * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
@@ -227,19 +261,24 @@
    * </ul>
    * <li>If the redirect is temporary and both the source and the destination
    * are root, then keep the shortest sub-domain</li>
-   * <ul><li>*www.a.com -> www.news.a.com</li></ul>
+   * <ul>
+   * <li>*www.a.com -> www.news.a.com</li>
+   * </ul>
    * <br>
-   * While not in this logic there is a further piece of representative url 
-   * logic that occurs during indexing and after scoring.  During creation of 
-   * the basic fields before indexing, if a url has a representative url stored
-   * we check both the url and its representative url (which should never be 
-   * the same) against their linkrank scores and the highest scoring one is 
-   * kept as the url and the lower scoring one is held as the orig url inside 
-   * of the index.
+   * While not in this logic there is a further piece of representative url
+   * logic that occurs during indexing and after scoring. During creation of the
+   * basic fields before indexing, if a url has a representative url stored we
+   * check both the url and its representative url (which should never be the
+   * same) against their linkrank scores and the highest scoring one is kept as
+   * the url and the lower scoring one is held as the orig url inside of the
+   * index.
    * 
-   * @param src The source url.
-   * @param dst The destination url.
-   * @param temp Is the redirect a temporary redirect.
+   * @param src
+   *          The source url.
+   * @param dst
+   *          The destination url.
+   * @param temp
+   *          Is the redirect a temporary redirect.
    * 
    * @return String The representative url.
    */
@@ -251,8 +290,7 @@
     try {
       srcUrl = new URL(src);
       dstUrl = new URL(dst);
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return dst;
     }
 
@@ -270,27 +308,27 @@
 
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
-    //    
+    //
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
-    //      
+    //
     // 3) permanent and not root and dest root, keep dest
     // a.com/xyz/index.html -> a.com*
-    //      
+    //
     // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
-    //      
+    //
     // 5) temp and root and dest not root keep src
     // *a.com -> a.com/xyz/index.html
-    //  
+    //
     // 7) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
-    //  
+    //
     // 8) temp and neither root, keep shortest, if hosts equal by path else by
     // hosts. paths are first by length then by number of / separators
     // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
-    //  
+    //
     // 9) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
 
@@ -302,25 +340,21 @@
 
     // if it is a permanent redirect
     if (!temp) {
-      
+
       // if source is root return source, otherwise destination
       if (srcRoot) {
         return src;
-      }
-      else {
+      } else {
         return dst;
       }
-    }
-    else { // temporary redirect
+    } else { // temporary redirect
 
       // source root and destination not root
       if (srcRoot && !destRoot) {
         return src;
-      }
-      else if (!srcRoot && destRoot) { // destination root and source not
+      } else if (!srcRoot && destRoot) { // destination root and source not
         return dst;
-      }
-      else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
+      } else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
 
         // source and destination hosts are the same, check paths, host length
         int numSrcPaths = srcFile.split("/").length;
@@ -327,14 +361,12 @@
         int numDstPaths = dstFile.split("/").length;
         if (numSrcPaths != numDstPaths) {
           return (numDstPaths < numSrcPaths ? dst : src);
-        }
-        else {
+        } else {
           int srcPathLength = srcFile.length();
           int dstPathLength = dstFile.length();
           return (dstPathLength < srcPathLength ? dst : src);
         }
-      }
-      else {
+      } else {
 
         // different host names and both root take the shortest
         int numSrcSubs = srcHost.split("\\.").length;
@@ -348,24 +380,25 @@
    * Returns the lowercased hostname for the url or null if the url is not well
    * formed.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The hostname for the url.
    */
   public static String getHost(String url) {
     try {
       return new URL(url).getHost().toLowerCase();
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
 
   /**
-   * Returns the page for the url.  The page consists of the protocol, host,
-   * and path, but does not include the query string.  The host is lowercased
-   * but the path is not.
+   * Returns the page for the url. The page consists of the protocol, host, and
+   * path, but does not include the query string. The host is lowercased but the
+   * path is not.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The page for the url.
    */
   public static String getPage(String url) {
@@ -374,12 +407,11 @@
       url = url.toLowerCase();
       String queryStr = new URL(url).getQuery();
       return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
-  
+
   public static String toASCII(String url) {
     try {
       URL u = new URL(url);
@@ -389,17 +421,11 @@
         // also do not add additional slashes for file: URLs (NUTCH-1880)
         return url;
       }
-      URI p = new URI(u.getProtocol(),
-        u.getUserInfo(),
-        IDN.toASCII(host),
-        u.getPort(),
-        u.getPath(),
-        u.getQuery(),
-        u.getRef());
+      URI p = new URI(u.getProtocol(), u.getUserInfo(), IDN.toASCII(host),
+          u.getPort(), u.getPath(), u.getQuery(), u.getRef());
 
       return p.toString();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       return null;
     }
   }
@@ -432,26 +458,23 @@
       }
 
       return sb.toString();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       return null;
     }
   }
 
+  /** For testing */
+  public static void main(String[] args) {
 
-  /** For testing */
-  public static void main(String[] args){
-    
-    if(args.length!=1) {
+    if (args.length != 1) {
       System.err.println("Usage : URLUtil <url>");
-      return ;
+      return;
     }
-    
+
     String url = args[0];
     try {
       System.out.println(URLUtil.getDomainName(new URL(url)));
-    }
-    catch (MalformedURLException ex) {
+    } catch (MalformedURLException ex) {
       ex.printStackTrace();
     }
   }
Index: src/java/org/apache/nutch/util/WebPageWritable.java
===================================================================
--- src/java/org/apache/nutch/util/WebPageWritable.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/WebPageWritable.java	(working copy)
@@ -26,8 +26,7 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-public class WebPageWritable extends Configured
-implements Writable {
+public class WebPageWritable extends Configured implements Writable {
 
   private WebPage webPage;
 
@@ -53,7 +52,7 @@
   public WebPage getWebPage() {
     return webPage;
   }
-  
+
   public void setWebPage(WebPage webPage) {
     this.webPage = webPage;
   }
Index: src/java/org/apache/nutch/util/domain/DomainStatistics.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainStatistics.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainStatistics.java	(working copy)
@@ -71,7 +71,8 @@
   public int run(String[] args) throws IOException, ClassNotFoundException,
       InterruptedException {
     if (args.length < 2) {
-      System.out.println("usage: DomainStatistics outDir host|domain|suffix [-numReducers n] [-crawlId <id>]");
+      System.out
+          .println("usage: DomainStatistics outDir host|domain|suffix [-numReducers n] [-crawlId <id>]");
       return 1;
     }
     String outputDir = args[0];
@@ -193,9 +194,8 @@
     }
 
     @Override
-    protected void map(
-        String key, WebPage value, Context context) 
-            throws IOException, InterruptedException {
+    protected void map(String key, WebPage value, Context context)
+        throws IOException, InterruptedException {
       if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
         try {
           URL url = new URL(TableUtil.unreverseUrl(key.toString()));
Index: src/java/org/apache/nutch/util/domain/DomainSuffix.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffix.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffix.java	(working copy)
@@ -18,17 +18,18 @@
 package org.apache.nutch.util.domain;
 
 /**
- * This class represents the last part of the host name, 
- * which is operated by authoritives, not individuals. This information 
- * is needed to find the domain name of a host. The domain name of a host
- * is defined to be the last part before the domain suffix, w/o subdomain 
- * names.  As an example the domain name of <br><code> http://lucene.apache.org/ 
- * </code><br> is <code> apache.org</code>   
- * <br>
- * This class holds three fields,  
- * <strong>domain</strong> field represents the suffix (such as "co.uk")
- * <strong>boost</strong> is a float for boosting score of url's with this suffix
- * <strong>status</strong> field represents domain's status
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
  * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see TopLevelDomain
@@ -37,10 +38,10 @@
 public class DomainSuffix {
 
   /**
-   * Enumeration of the status of the tld. Please see domain-suffixes.xml. 
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
    */
-  public enum Status { INFRASTRUCTURE, SPONSORED, UNSPONSORED
-    , STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
   };
 
   private String domain;
@@ -49,7 +50,7 @@
 
   public static final float DEFAULT_BOOST = 1.0f;
   public static final Status DEFAULT_STATUS = Status.IN_USE;
-  
+
   public DomainSuffix(String domain, Status status, float boost) {
     this.domain = domain;
     this.status = status;
@@ -59,7 +60,7 @@
   public DomainSuffix(String domain) {
     this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
   }
-  
+
   public String getDomain() {
     return domain;
   }
@@ -71,7 +72,7 @@
   public float getBoost() {
     return boost;
   }
-  
+
   @Override
   public String toString() {
     return domain;
Index: src/java/org/apache/nutch/util/domain/DomainSuffixes.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(working copy)
@@ -25,40 +25,43 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * Storage class for <code>DomainSuffix</code> objects 
- * Note: this class is singleton
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class DomainSuffixes {
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixes.class);
-  
-  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>(); 
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
   private static DomainSuffixes instance;
-  
+
   /** private ctor */
   private DomainSuffixes() {
     String file = "domain-suffixes.xml";
-    InputStream input = this.getClass().getClassLoader().getResourceAsStream(file);
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
     try {
       new DomainSuffixesReader().read(this, input);
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       LOG.warn(StringUtils.stringifyException(ex));
     }
   }
-  
+
   /**
    * Singleton instance, lazy instantination
+   * 
    * @return
    */
   public static DomainSuffixes getInstance() {
-    if(instance == null) {
+    if (instance == null) {
       instance = new DomainSuffixes();
     }
     return instance;
   }
-  
+
   void addDomainSuffix(DomainSuffix tld) {
     domains.put(tld.getDomain(), tld);
   }
@@ -65,17 +68,19 @@
 
   /** return whether the extension is a registered domain entry */
   public boolean isDomainSuffix(String extension) {
-    return domains.containsKey(extension); 
+    return domains.containsKey(extension);
   }
-    
+
   /**
-   * Return the {@link DomainSuffix} object for the extension, if 
-   * extension is a top level domain returned object will be an 
-   * instance of {@link TopLevelDomain}
-   * @param extension of the domain
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
    */
   public DomainSuffix get(String extension) {
     return domains.get(extension);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(working copy)
@@ -36,16 +36,17 @@
 import org.xml.sax.SAXException;
 
 /**
- * For parsing xml files containing domain suffix definitions.
- * Parsed xml files should validate against 
- * <code>domain-suffixes.xsd</code>  
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 class DomainSuffixesReader {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
 
-  void read(DomainSuffixes tldEntries, InputStream input) throws IOException{
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
     try {
 
       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@@ -54,28 +55,29 @@
       Document document = builder.parse(new InputSource(input));
 
       Element root = document.getDocumentElement();
-      
-      if(root != null && root.getTagName().equals("domains")) {
-        
-        Element tlds = (Element)root.getElementsByTagName("tlds").item(0);
-        Element suffixes = (Element)root.getElementsByTagName("suffixes").item(0);
-        
-        //read tlds
-        readITLDs(tldEntries, (Element)tlds.getElementsByTagName("itlds").item(0));
-        readGTLDs(tldEntries, (Element)tlds.getElementsByTagName("gtlds").item(0));
-        readCCTLDs(tldEntries, (Element)tlds.getElementsByTagName("cctlds").item(0));
-        
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
         readSuffixes(tldEntries, suffixes);
-      }
-      else {
+      } else {
         throw new IOException("xml file is not valid");
       }
-    }
-    catch (ParserConfigurationException ex) {
+    } catch (ParserConfigurationException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
-    }
-    catch (SAXException ex) {
+    } catch (SAXException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
     }
@@ -83,22 +85,24 @@
 
   void readITLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.INFRASTRUCTURE));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
     }
   }
-    
+
   void readGTLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.GENERIC));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
     }
   }
 
   void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readCCTLD((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
     }
   }
 
@@ -113,39 +117,40 @@
     String domain = el.getAttribute("domain");
     Status status = readStatus(el);
     float boost = readBoost(el);
-    String countryName = readCountryName(el); 
-    return new TopLevelDomain(domain, status, boost, countryName);  
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
   }
-  
+
   /** read optional field status */
   Status readStatus(Element el) {
     NodeList list = el.getElementsByTagName("status");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_STATUS;
     return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
   }
-  
+
   /** read optional field boost */
   float readBoost(Element el) {
     NodeList list = el.getElementsByTagName("boost");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_BOOST;
     return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
   }
-  
-  /** read field countryname 
-    */
+
+  /**
+   * read field countryname
+   */
   String readCountryName(Element el) throws IOException {
     NodeList list = el.getElementsByTagName("country");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       throw new IOException("Country name should be given");
     return list.item(0).getNodeValue();
   }
-  
+
   void readSuffixes(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("suffix");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readSuffix((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
     }
   }
 
@@ -155,5 +160,5 @@
     float boost = readBoost(el);
     return new DomainSuffix(domain, status, boost);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/TopLevelDomain.java
===================================================================
--- src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(working copy)
@@ -18,10 +18,11 @@
 package org.apache.nutch.util.domain;
 
 /**
- * (From wikipedia) A top-level domain (TLD) is the last part of an 
- * Internet domain name; that is, the letters which follow the final 
- * dot of any domain name. For example, in the domain name 
- * <code>www.website.com</code>, the top-level domain is <code>com</code>.
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see http://www.iana.org/
  * @see http://en.wikipedia.org/wiki/Top-level_domain
@@ -28,31 +29,36 @@
  */
 public class TopLevelDomain extends DomainSuffix {
 
-  public enum Type { INFRASTRUCTURE, GENERIC, COUNTRY };
-  
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
   private Type type;
   private String countryName = null;
-  
-  public TopLevelDomain(String domain, Type type, Status status, float boost){
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
     super(domain, status, boost);
     this.type = type;
   }
 
-  public TopLevelDomain(String domain, Status status, float boost, String countryName){
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
     super(domain, status, boost);
     this.type = Type.COUNTRY;
     this.countryName = countryName;
   }
-  
+
   public Type getType() {
     return type;
   }
 
-  /** Returns the country name if TLD is Country Code TLD
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
    * @return country name or null
-   */ 
-  public String getCountryName(){
+   */
+  public String getCountryName() {
     return countryName;
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/package-info.java
===================================================================
--- src/java/org/apache/nutch/util/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Miscellaneous utility classes.
  */
 package org.apache.nutch.util;
+
Index: src/java/org/apache/nutch/webui/NutchUiApplication.java
===================================================================
--- src/java/org/apache/nutch/webui/NutchUiApplication.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/NutchUiApplication.java	(working copy)
@@ -34,7 +34,8 @@
 import de.agilecoders.wicket.extensions.markup.html.bootstrap.icon.FontAwesomeCssReference;
 
 @Component
-public class NutchUiApplication extends WebApplication implements ApplicationContextAware {
+public class NutchUiApplication extends WebApplication implements
+    ApplicationContextAware {
   private static final String THEME_NAME = "bootstrap";
   private ApplicationContext context;
 
@@ -56,7 +57,8 @@
     Bootstrap.install(this, settings);
     configureTheme(settings);
 
-    getComponentInstantiationListeners().add(new SpringComponentInjector(this, context));
+    getComponentInstantiationListeners().add(
+        new SpringComponentInjector(this, context));
   }
 
   private void configureTheme(BootstrapSettings settings) {
@@ -66,7 +68,8 @@
   }
 
   @Override
-  public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
+  public void setApplicationContext(ApplicationContext applicationContext)
+      throws BeansException {
     this.context = applicationContext;
   }
 }
Index: src/java/org/apache/nutch/webui/NutchUiServer.java
===================================================================
--- src/java/org/apache/nutch/webui/NutchUiServer.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/NutchUiServer.java	(working copy)
@@ -50,7 +50,7 @@
     HelpFormatter formatter = new HelpFormatter();
     try {
       commandLine = parser.parse(options, args);
-    }  catch (Exception e) {
+    } catch (Exception e) {
       formatter.printHelp("NutchUiServer", options, true);
       StringUtils.stringifyException(e);
     }
Index: src/java/org/apache/nutch/webui/client/NutchClient.java
===================================================================
--- src/java/org/apache/nutch/webui/client/NutchClient.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/NutchClient.java	(working copy)
@@ -36,7 +36,7 @@
   public String executeJob(JobConfig jobConfig);
 
   public JobInfo getJobInfo(String jobId);
-  
+
   public Map<String, String> getNutchConfig(String config);
 
   /**
Index: src/java/org/apache/nutch/webui/client/NutchClientFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/client/NutchClientFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/NutchClientFactory.java	(working copy)
@@ -42,7 +42,8 @@
     }
   }
 
-  private static class NutchClientCacheLoader extends CacheLoader<NutchInstance, NutchClient> {
+  private static class NutchClientCacheLoader extends
+      CacheLoader<NutchInstance, NutchClient> {
     @Override
     public NutchClient load(NutchInstance key) throws Exception {
       return new NutchClientImpl(key);
Index: src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java	(working copy)
@@ -43,8 +43,8 @@
   private List<RemoteCommand> remoteCommands;
   private List<RemoteCommand> executedCommands = Lists.newArrayList();
 
-  public CrawlingCycle(CrawlingCycleListener listener, RemoteCommandExecutor executor, Crawl crawl,
-      List<RemoteCommand> commands) {
+  public CrawlingCycle(CrawlingCycleListener listener,
+      RemoteCommandExecutor executor, Crawl crawl, List<RemoteCommand> commands) {
     this.listener = listener;
     this.executor = executor;
     this.crawl = crawl;
@@ -64,7 +64,7 @@
         listener.onCrawlError(crawl, jobInfo.getMsg());
         return;
       }
-      
+
       executedCommands.add(command);
       listener.commandExecuted(crawl, command, calculateProgress());
     }
@@ -75,7 +75,8 @@
     if (CollectionUtils.isEmpty(remoteCommands)) {
       return 0;
     }
-    return (int) ((float) executedCommands.size() / (float) remoteCommands.size() * 100);
+    return (int) ((float) executedCommands.size()
+        / (float) remoteCommands.size() * 100);
   }
 
 }
Index: src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java	(working copy)
@@ -46,7 +46,8 @@
 
   public void createClient() {
     ClientConfig clientConfig = new DefaultClientConfig();
-    clientConfig.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING, true);
+    clientConfig.getFeatures()
+        .put(JSONConfiguration.FEATURE_POJO_MAPPING, true);
     this.client = Client.create(clientConfig);
     this.nutchResource = client.resource(instance.getUrl());
   }
@@ -53,7 +54,8 @@
 
   @Override
   public NutchStatus getNutchStatus() {
-    return nutchResource.path("/admin").type(APPLICATION_JSON).get(NutchStatus.class);
+    return nutchResource.path("/admin").type(APPLICATION_JSON)
+        .get(NutchStatus.class);
   }
 
   @Override
@@ -66,12 +68,14 @@
 
   @Override
   public String executeJob(JobConfig jobConfig) {
-    return nutchResource.path("/job/create").type(APPLICATION_JSON).post(String.class, jobConfig);
+    return nutchResource.path("/job/create").type(APPLICATION_JSON)
+        .post(String.class, jobConfig);
   }
 
   @Override
   public JobInfo getJobInfo(String jobId) {
-    return nutchResource.path("/job/" + jobId).type(APPLICATION_JSON).get(JobInfo.class);
+    return nutchResource.path("/job/" + jobId).type(APPLICATION_JSON)
+        .get(JobInfo.class);
   }
 
   @Override
@@ -82,11 +86,13 @@
   @SuppressWarnings("unchecked")
   @Override
   public Map<String, String> getNutchConfig(String config) {
-    return nutchResource.path("/config/" + config).type(APPLICATION_JSON).get(Map.class);
+    return nutchResource.path("/config/" + config).type(APPLICATION_JSON)
+        .get(Map.class);
   }
-  
+
   @Override
   public String createSeed(SeedList seedList) {
-    return nutchResource.path("/seed/create").type(APPLICATION_JSON).post(String.class, seedList);
+    return nutchResource.path("/seed/create").type(APPLICATION_JSON)
+        .post(String.class, seedList);
   }
 }
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java	(working copy)
@@ -70,6 +70,7 @@
     if (jobInfo != null) {
       statusInfo = MessageFormat.format("{0}", jobInfo.getState());
     }
-    return MessageFormat.format("{0} status: {1}", jobConfig.getType(), statusInfo);
+    return MessageFormat.format("{0} status: {1}", jobConfig.getType(),
+        statusInfo);
   }
 }
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java	(working copy)
@@ -40,10 +40,12 @@
     jobConfig.setConfId(configId);
     return this;
   }
+
   public RemoteCommandBuilder withCrawlId(String crawlId) {
     jobConfig.setCrawlId(crawlId);
     return this;
   }
+
   public RemoteCommandBuilder withArgument(String key, String value) {
     jobConfig.setArgument(key, value);
     return this;
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java	(working copy)
@@ -56,7 +56,8 @@
   public JobInfo executeRemoteJob(RemoteCommand command) {
     try {
       String jobId = client.executeJob(command.getJobConfig());
-      Future<JobInfo> chekerFuture = executor.submit(new JobStateChecker(jobId));
+      Future<JobInfo> chekerFuture = executor
+          .submit(new JobStateChecker(jobId));
       return chekerFuture.get(getTimeout(command), TimeUnit.MILLISECONDS);
     } catch (Exception e) {
       log.error("Remote command failed", e);
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java	(working copy)
@@ -62,8 +62,9 @@
   }
 
   private RemoteCommand inject() {
-    RemoteCommandBuilder builder = RemoteCommandBuilder.instance(JobType.INJECT)
-        .withCrawlId(crawl.getCrawlId()).withArgument("seedDir", crawl.getSeedDirectory());
+    RemoteCommandBuilder builder = RemoteCommandBuilder
+        .instance(JobType.INJECT).withCrawlId(crawl.getCrawlId())
+        .withArgument("seedDir", crawl.getSeedDirectory());
     return builder.build();
   }
 
@@ -72,7 +73,8 @@
   }
 
   private RemoteCommand createFetchCommand() {
-    return createBuilder(JobType.FETCH).withTimeout(Duration.standardSeconds(50)).build();
+    return createBuilder(JobType.FETCH).withTimeout(
+        Duration.standardSeconds(50)).build();
   }
 
   private RemoteCommand createParseCommand() {
@@ -88,8 +90,8 @@
   }
 
   private RemoteCommandBuilder createBuilder(JobType jobType) {
-    return RemoteCommandBuilder.instance(jobType).withCrawlId(crawl.getCrawlId())
-        .withArgument("batch", batchId);
+    return RemoteCommandBuilder.instance(jobType)
+        .withCrawlId(crawl.getCrawlId()).withArgument("batch", batchId);
   }
 
 }
Index: src/java/org/apache/nutch/webui/client/model/JobConfig.java
===================================================================
--- src/java/org/apache/nutch/webui/client/model/JobConfig.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/model/JobConfig.java	(working copy)
@@ -34,7 +34,7 @@
   public void setArgument(String key, String value) {
     args.put(key, value);
   }
-  
+
   public String getCrawlId() {
     return crawlId;
   }
Index: src/java/org/apache/nutch/webui/client/model/NutchStatus.java
===================================================================
--- src/java/org/apache/nutch/webui/client/model/NutchStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/model/NutchStatus.java	(working copy)
@@ -22,7 +22,7 @@
 import java.util.Set;
 
 public class NutchStatus implements Serializable {
-  
+
   private Date startDate;
   private Set<String> configuration;
   private Collection<JobInfo> jobs;
Index: src/java/org/apache/nutch/webui/config/CustomDaoFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/config/CustomDaoFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/CustomDaoFactory.java	(working copy)
@@ -27,7 +27,8 @@
 
 public class CustomDaoFactory {
   private ConnectionSource connectionSource;
-  private List<Dao<?, ?>> registredDaos = Collections.synchronizedList(new ArrayList<Dao<?, ?>>());
+  private List<Dao<?, ?>> registredDaos = Collections
+      .synchronizedList(new ArrayList<Dao<?, ?>>());
 
   public CustomDaoFactory(ConnectionSource connectionSource) {
     this.connectionSource = connectionSource;
Index: src/java/org/apache/nutch/webui/config/CustomTableCreator.java
===================================================================
--- src/java/org/apache/nutch/webui/config/CustomTableCreator.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/CustomTableCreator.java	(working copy)
@@ -30,7 +30,8 @@
   private ConnectionSource connectionSource;
   private List<Dao<?, ?>> configuredDaos;
 
-  public CustomTableCreator(ConnectionSource connectionSource, List<Dao<?, ?>> configuredDaos) {
+  public CustomTableCreator(ConnectionSource connectionSource,
+      List<Dao<?, ?>> configuredDaos) {
     this.connectionSource = connectionSource;
     this.configuredDaos = configuredDaos;
     initialize();
@@ -38,7 +39,8 @@
 
   private void initialize() {
     if (configuredDaos == null) {
-      throw new IllegalStateException("configuredDaos was not set in " + getClass().getSimpleName());
+      throw new IllegalStateException("configuredDaos was not set in "
+          + getClass().getSimpleName());
     }
 
     for (Dao<?, ?> dao : configuredDaos) {
Index: src/java/org/apache/nutch/webui/config/SpringConfiguration.java
===================================================================
--- src/java/org/apache/nutch/webui/config/SpringConfiguration.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/SpringConfiguration.java	(working copy)
@@ -51,8 +51,8 @@
 
   @Bean
   public JdbcConnectionSource getConnectionSource() throws SQLException {
-    JdbcConnectionSource source = new JdbcConnectionSource("jdbc:h2:~/.nutch/config",
-        new H2DatabaseType());
+    JdbcConnectionSource source = new JdbcConnectionSource(
+        "jdbc:h2:~/.nutch/config", new H2DatabaseType());
     source.initialize();
     return source;
   }
@@ -84,7 +84,8 @@
 
   @Bean
   public CustomTableCreator createTableCreator() throws SQLException {
-    return new CustomTableCreator(getConnectionSource(), getDaoFactory().getCreatedDaos());
+    return new CustomTableCreator(getConnectionSource(), getDaoFactory()
+        .getCreatedDaos());
   }
 
 }
Index: src/java/org/apache/nutch/webui/model/NutchConfig.java
===================================================================
--- src/java/org/apache/nutch/webui/model/NutchConfig.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/model/NutchConfig.java	(working copy)
@@ -3,16 +3,19 @@
 public class NutchConfig {
   private String name = "name";
   private String value;
-  
-  public void setName (String name){
+
+  public void setName(String name) {
     this.name = name;
   }
-  public String getName(){
+
+  public String getName() {
     return this.name;
   }
+
   public String getValue() {
     return value;
   }
+
   public void setValue(String value) {
     this.value = value;
   }
Index: src/java/org/apache/nutch/webui/model/SeedUrl.java
===================================================================
--- src/java/org/apache/nutch/webui/model/SeedUrl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/model/SeedUrl.java	(working copy)
@@ -57,7 +57,7 @@
   public void setUrl(String url) {
     this.url = url;
   }
-  
+
   @JsonIgnore
   public SeedList getSeedList() {
     return seedList;
Index: src/java/org/apache/nutch/webui/pages/AbstractBasePage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/AbstractBasePage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/AbstractBasePage.java	(working copy)
@@ -74,16 +74,25 @@
     navbar.setPosition(Position.TOP);
     add(navbar);
 
-    addMenuItem(DashboardPage.class, "navbar.menu.dashboard", FontAwesomeIconType.dashboard);
-    addMenuItem(StatisticsPage.class, "navbar.menu.statistics", FontAwesomeIconType.bar_chart_o);
-    addMenuItem(InstancesPage.class, "navbar.menu.instances", FontAwesomeIconType.gears);
-    addMenuItem(SettingsPage.class, "navbar.menu.settings", FontAwesomeIconType.wrench);
-    addMenuItem(CrawlsPage.class, "navbar.menu.crawls", FontAwesomeIconType.refresh);
-    addMenuItem(SchedulingPage.class, "navbar.menu.scheduling", FontAwesomeIconType.clock_o);
-    addMenuItem(SearchPage.class, "navbar.menu.search", FontAwesomeIconType.search);
-    addMenuItem(SeedListsPage.class, "navbar.menu.seedLists", FontAwesomeIconType.file);
+    addMenuItem(DashboardPage.class, "navbar.menu.dashboard",
+        FontAwesomeIconType.dashboard);
+    addMenuItem(StatisticsPage.class, "navbar.menu.statistics",
+        FontAwesomeIconType.bar_chart_o);
+    addMenuItem(InstancesPage.class, "navbar.menu.instances",
+        FontAwesomeIconType.gears);
+    addMenuItem(SettingsPage.class, "navbar.menu.settings",
+        FontAwesomeIconType.wrench);
+    addMenuItem(CrawlsPage.class, "navbar.menu.crawls",
+        FontAwesomeIconType.refresh);
+    addMenuItem(SchedulingPage.class, "navbar.menu.scheduling",
+        FontAwesomeIconType.clock_o);
+    addMenuItem(SearchPage.class, "navbar.menu.search",
+        FontAwesomeIconType.search);
+    addMenuItem(SeedListsPage.class, "navbar.menu.seedLists",
+        FontAwesomeIconType.file);
 
-    navbar.addComponents(transform(ComponentPosition.RIGHT, addInstancesMenuMenu()));
+    navbar.addComponents(transform(ComponentPosition.RIGHT,
+        addInstancesMenuMenu()));
     navbar.addComponents(transform(ComponentPosition.RIGHT, addUserMenu()));
 
     add(new NotificationPanel("globalNotificationPanel"));
@@ -99,11 +108,13 @@
       @Override
       protected List<AbstractLink> newSubMenuButtons(final String buttonMarkupId) {
         List<AbstractLink> subMenu = Lists.newArrayList();
-        subMenu.add(new MenuBookmarkablePageLink<Void>(UserSettingsPage.class, new ResourceModel(
-            "navbar.userMenu.settings")).setIconType(FontAwesomeIconType.gear));
+        subMenu.add(new MenuBookmarkablePageLink<Void>(UserSettingsPage.class,
+            new ResourceModel("navbar.userMenu.settings"))
+            .setIconType(FontAwesomeIconType.gear));
         subMenu.add(new MenuDivider());
-        subMenu.add(new MenuBookmarkablePageLink<Void>(LogOutPage.class, new ResourceModel(
-            "navbar.userMenu.logout")).setIconType(FontAwesomeIconType.power_off));
+        subMenu.add(new MenuBookmarkablePageLink<Void>(LogOutPage.class,
+            new ResourceModel("navbar.userMenu.logout"))
+            .setIconType(FontAwesomeIconType.power_off));
         return subMenu;
       }
     }.setIconType(FontAwesomeIconType.user);
@@ -119,7 +130,8 @@
         List<NutchInstance> instances = instanceService.getInstances();
         List<AbstractLink> subMenu = Lists.newArrayList();
         for (NutchInstance instance : instances) {
-          subMenu.add(new Link<NutchInstance>(buttonMarkupId, Model.of(instance)) {
+          subMenu.add(new Link<NutchInstance>(buttonMarkupId, Model
+              .of(instance)) {
             @Override
             public void onClick() {
               currentInstance.setObject(getModelObject());
@@ -134,8 +146,10 @@
     return instancesMenu;
   }
 
-  private <P extends Page> void addMenuItem(Class<P> page, String label, IconType icon) {
-    Component button = new NavbarButton<Void>(page, Model.of(getString(label))).setIconType(icon);
+  private <P extends Page> void addMenuItem(Class<P> page, String label,
+      IconType icon) {
+    Component button = new NavbarButton<Void>(page, Model.of(getString(label)))
+        .setIconType(icon);
     navbar.addComponents(NavbarComponents.transform(LEFT, button));
   }
 
Index: src/java/org/apache/nutch/webui/pages/LogOutPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/LogOutPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/LogOutPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class LogOutPage extends AbstractBasePage{
+public class LogOutPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/SchedulingPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/SchedulingPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/SchedulingPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class SchedulingPage extends AbstractBasePage{
+public class SchedulingPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/SearchPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/SearchPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/SearchPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class SearchPage extends AbstractBasePage{
+public class SearchPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/StatisticsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/StatisticsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/StatisticsPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class StatisticsPage extends AbstractBasePage{
+public class StatisticsPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class UrlsUploadPage extends AbstractBasePage{
+public class UrlsUploadPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/UserSettingsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/UserSettingsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/UserSettingsPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class UserSettingsPage extends AbstractBasePage{
+public class UserSettingsPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java	(working copy)
@@ -63,7 +63,8 @@
     }
   }
 
-  public static <E extends Enum<E>> ColorEnumLabelBuilder<E> getBuilder(String id) {
+  public static <E extends Enum<E>> ColorEnumLabelBuilder<E> getBuilder(
+      String id) {
     return new ColorEnumLabelBuilder<E>(id);
   }
 
Index: src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java	(working copy)
@@ -21,9 +21,11 @@
 import org.apache.wicket.model.IModel;
 
 /**
- * This is iterator adapter, which wraps iterable items with CompoundPropertyModel.
+ * This is iterator adapter, which wraps iterable items with
+ * CompoundPropertyModel.
+ * 
  * @author feodor
- *
+ * 
  * @param <T>
  */
 public class CpmIteratorAdapter<T> extends ModelIteratorAdapter<T> {
Index: src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java	(working copy)
@@ -65,8 +65,9 @@
     form.add(new TextField<String>("crawlName").setRequired(true));
 
     form.add(new DropDownChoice<Integer>("numberOfRounds", getNumbersOfRounds()));
-    form.add(new DropDownChoice<SeedList>("seedList", seedListService.findAll(),
-        new ChoiceRenderer<SeedList>("name")).setRequired(true));
+    form.add(new DropDownChoice<SeedList>("seedList",
+        seedListService.findAll(), new ChoiceRenderer<SeedList>("name"))
+        .setRequired(true));
 
     addButton(new AjaxSubmitLink("button", form) {
       @Override
Index: src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java	(working copy)
@@ -103,7 +103,7 @@
       }
     }.add(new Label("crawlName")));
     item.add(new Label("seedList.name"));
-    
+
     item.add(new Label("progress"));
     item.add(createStatusLabel());
     item.add(new Link<Crawl>("start", item.getModel()) {
@@ -132,8 +132,8 @@
   }
 
   private EnumLabel<CrawlStatus> createStatusLabel() {
-    return new ColorEnumLabelBuilder<CrawlStatus>("status").withEnumColor(NEW, Default)
-        .withEnumColor(ERROR, Danger).withEnumColor(FINISHED, Success)
-        .withEnumColor(CRAWLING, Info).build();
+    return new ColorEnumLabelBuilder<CrawlStatus>("status")
+        .withEnumColor(NEW, Default).withEnumColor(ERROR, Danger)
+        .withEnumColor(FINISHED, Success).withEnumColor(CRAWLING, Info).build();
   }
 }
Index: src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java	(working copy)
@@ -37,7 +37,8 @@
     form.add(new TextField<String>("host").setRequired(true));
     form.add(new TextField<Integer>("port").setRequired(true));
     form.add(new TextField<String>("username"));
-    form.add(new PasswordTextField("password").setResetPassword(false).setRequired(false));
+    form.add(new PasswordTextField("password").setResetPassword(false)
+        .setRequired(false));
 
     addButton(new AjaxSubmitLink("button", form) {
       @Override
Index: src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java	(working copy)
@@ -53,11 +53,13 @@
   }
 
   private RefreshingView<NutchInstance> refreshingView() {
-    RefreshingView<NutchInstance> instances = new RefreshingView<NutchInstance>("instances") {
+    RefreshingView<NutchInstance> instances = new RefreshingView<NutchInstance>(
+        "instances") {
 
       @Override
       protected Iterator<IModel<NutchInstance>> getItemModels() {
-        return new CpmIteratorAdapter<NutchInstance>(instanceService.getInstances());
+        return new CpmIteratorAdapter<NutchInstance>(
+            instanceService.getInstances());
       }
 
       @Override
@@ -72,7 +74,8 @@
     return new AjaxLink<NutchInstance>("addInstance") {
       @Override
       public void onClick(AjaxRequestTarget target) {
-        instancePanel.setModel(new CompoundPropertyModel<NutchInstance>(new NutchInstance()));
+        instancePanel.setModel(new CompoundPropertyModel<NutchInstance>(
+            new NutchInstance()));
         target.add(instancePanel);
         instancePanel.appendShowDialogJavaScript(target);
       }
Index: src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java	(working copy)
@@ -18,7 +18,7 @@
 
 import de.agilecoders.wicket.core.markup.html.bootstrap.navbar.Navbar;
 
-public class VerticalMenu extends Navbar{
+public class VerticalMenu extends Navbar {
 
   public VerticalMenu(String componentId) {
     super(componentId);
Index: src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java	(working copy)
@@ -44,7 +44,8 @@
 
   public SeedListsPage() {
 
-    RefreshingView<SeedList> seedLists = new RefreshingView<SeedList>("seedLists") {
+    RefreshingView<SeedList> seedLists = new RefreshingView<SeedList>(
+        "seedLists") {
 
       @Override
       protected Iterator<IModel<SeedList>> getItemModels() {
@@ -56,7 +57,8 @@
         PageParameters params = new PageParameters();
         params.add("id", item.getModelObject().getId());
 
-        Link<Void> edit = new BookmarkablePageLink<Void>("edit", SeedPage.class, params);
+        Link<Void> edit = new BookmarkablePageLink<Void>("edit",
+            SeedPage.class, params);
         edit.add(new Label("name"));
         item.add(edit);
 
Index: src/java/org/apache/nutch/webui/pages/seed/SeedPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/seed/SeedPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/seed/SeedPage.java	(working copy)
@@ -122,7 +122,8 @@
   }
 
   private void addUrlForm() {
-    urlForm = new Form<SeedUrl>("urlForm", CompoundPropertyModel.of(Model.of(new SeedUrl())));
+    urlForm = new Form<SeedUrl>("urlForm", CompoundPropertyModel.of(Model
+        .of(new SeedUrl())));
     urlForm.setOutputMarkupId(true);
     urlForm.add(new TextField<String>("url"));
     urlForm.add(new AjaxSubmitLink("addUrl", urlForm) {
Index: src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java	(working copy)
@@ -26,12 +26,14 @@
   public SettingsPage() {
     settingsTable = new WebMarkupContainer("settingsTable");
     settingsTable.setOutputMarkupId(true);
-    RefreshingView<NutchConfig> nutchConfig = new RefreshingView<NutchConfig>("settings") {
+    RefreshingView<NutchConfig> nutchConfig = new RefreshingView<NutchConfig>(
+        "settings") {
 
       @Override
       protected Iterator<IModel<NutchConfig>> getItemModels() {
         return new CpmIteratorAdapter<NutchConfig>(
-            convertNutchConfig(nutchService.getNutchConfig(getCurrentInstance().getId())));
+            convertNutchConfig(nutchService.getNutchConfig(getCurrentInstance()
+                .getId())));
       }
 
       @Override
Index: src/java/org/apache/nutch/webui/service/CrawlService.java
===================================================================
--- src/java/org/apache/nutch/webui/service/CrawlService.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/CrawlService.java	(working copy)
@@ -22,7 +22,7 @@
 import org.apache.nutch.webui.model.NutchInstance;
 
 public interface CrawlService {
-  
+
   public void saveCrawl(Crawl crawl);
 
   public List<Crawl> getCrawls();
Index: src/java/org/apache/nutch/webui/service/NutchService.java
===================================================================
--- src/java/org/apache/nutch/webui/service/NutchService.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/NutchService.java	(working copy)
@@ -24,8 +24,8 @@
 public interface NutchService {
   public ConnectionStatus getConnectionStatus(Long instanceId);
 
-  public  Map<String, String> getNutchConfig(Long instanceId);
-  
+  public Map<String, String> getNutchConfig(Long instanceId);
+
   public NutchStatus getNutchStatus(Long instanceId);
 
 }
Index: src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java	(working copy)
@@ -67,7 +67,7 @@
 
       CrawlingCycle cycle = new CrawlingCycle(this, executor, crawl, commands);
       cycle.executeCrawlCycle();
-      
+
     } catch (Exception e) {
       crawl.setStatus(CrawlStatus.ERROR);
       saveCrawl(crawl);
Index: src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java	(working copy)
@@ -36,7 +36,8 @@
 
 @Service
 public class NutchServiceImpl implements NutchService {
-  private static final Logger logger = LoggerFactory.getLogger(NutchServiceImpl.class);
+  private static final Logger logger = LoggerFactory
+      .getLogger(NutchServiceImpl.class);
 
   @Resource
   private NutchClientFactory nutchClientFactory;
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(working copy)
@@ -38,101 +38,101 @@
 
 /** Adds basic searchable fields to a document. */
 public class CCIndexingFilter implements IndexingFilter {
-	public static final Logger LOG = LoggerFactory.getLogger(CCIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CCIndexingFilter.class);
 
-	/** The name of the document field we use. */
-	public static String FIELD = "cc";
+  /** The name of the document field we use. */
+  public static String FIELD = "cc";
 
-	private Configuration conf;
+  private Configuration conf;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	/**
-	 * Add the features represented by a license URL. Urls are of the form
-	 * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
-	 * license feature.
-	 */
-	public void addUrlFeatures(NutchDocument doc, String urlString) {
-		try {
-			URL url = new URL(urlString);
+  /**
+   * Add the features represented by a license URL. Urls are of the form
+   * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
+   * license feature.
+   */
+  public void addUrlFeatures(NutchDocument doc, String urlString) {
+    try {
+      URL url = new URL(urlString);
 
-			// tokenize the path of the url, breaking at slashes and dashes
-			StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
+      // tokenize the path of the url, breaking at slashes and dashes
+      StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
 
-			if (names.hasMoreTokens())
-				names.nextToken(); // throw away "licenses"
+      if (names.hasMoreTokens())
+        names.nextToken(); // throw away "licenses"
 
-			// add a feature per component after "licenses"
-			while (names.hasMoreTokens()) {
-				String feature = names.nextToken();
-				addFeature(doc, feature);
-			}
-		} catch (MalformedURLException e) {
-			if (LOG.isWarnEnabled()) {
-				LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
-			}
-		}
-	}
+      // add a feature per component after "licenses"
+      while (names.hasMoreTokens()) {
+        String feature = names.nextToken();
+        addFeature(doc, feature);
+      }
+    } catch (MalformedURLException e) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
+      }
+    }
+  }
 
-	private void addFeature(NutchDocument doc, String feature) {
-		doc.add(FIELD, feature);
-	}
+  private void addFeature(NutchDocument doc, String feature) {
+    doc.add(FIELD, feature);
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
 
-		ByteBuffer blicense = page.getMetadata().get(new Utf8(
-				CreativeCommons.LICENSE_URL));
-		if (blicense != null) {
-			String licenseUrl = Bytes.toString(blicense);
-			if (LOG.isInfoEnabled()) {
-				LOG.info("CC: indexing " + licenseUrl + " for: "
-						+ url.toString());
-			}
+    ByteBuffer blicense = page.getMetadata().get(
+        new Utf8(CreativeCommons.LICENSE_URL));
+    if (blicense != null) {
+      String licenseUrl = Bytes.toString(blicense);
+      if (LOG.isInfoEnabled()) {
+        LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
+      }
 
-			// add the entire license as cc:license=xxx
-			addFeature(doc, "license=" + licenseUrl);
+      // add the entire license as cc:license=xxx
+      addFeature(doc, "license=" + licenseUrl);
 
-			// index license attributes extracted of the license url
-			addUrlFeatures(doc, licenseUrl);
-		}
+      // index license attributes extracted of the license url
+      addUrlFeatures(doc, licenseUrl);
+    }
 
-		// index the license location as cc:meta=xxx
-		ByteBuffer blicenseloc = page.getMetadata().get(new Utf8(
-				CreativeCommons.LICENSE_LOCATION));
-		if (blicenseloc != null) {
-			String licenseLocation = Bytes.toString(blicenseloc);
-			addFeature(doc, "meta=" + licenseLocation);
-		}
+    // index the license location as cc:meta=xxx
+    ByteBuffer blicenseloc = page.getMetadata().get(
+        new Utf8(CreativeCommons.LICENSE_LOCATION));
+    if (blicenseloc != null) {
+      String licenseLocation = Bytes.toString(blicenseloc);
+      addFeature(doc, "meta=" + licenseLocation);
+    }
 
-		// index the work type cc:type=xxx
-		ByteBuffer bworkType = page.getMetadata().get(new Utf8(
-				CreativeCommons.WORK_TYPE));
-		if (bworkType != null) {
-			String workType = Bytes.toString(bworkType);
-			addFeature(doc, workType);
-		}
+    // index the work type cc:type=xxx
+    ByteBuffer bworkType = page.getMetadata().get(
+        new Utf8(CreativeCommons.WORK_TYPE));
+    if (bworkType != null) {
+      String workType = Bytes.toString(bworkType);
+      addFeature(doc, workType);
+    }
 
-		return doc;
-	}
+    return doc;
+  }
 
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -55,8 +55,8 @@
     }
 
     /** Scan the document adding attributes to metadata. */
-    public static void walk(Node doc, URL base, WebPage page,
-        Configuration conf) throws ParseException {
+    public static void walk(Node doc, URL base, WebPage page, Configuration conf)
+        throws ParseException {
 
       // walk the DOM tree, scanning for license data
       Walker walker = new Walker(base);
@@ -67,7 +67,7 @@
       String licenseLocation = null;
       if (walker.rdfLicense != null) { // 1st choice: subject in RDF
         licenseLocation = "rdf";
-	licenseUrl = walker.rdfLicense;
+        licenseUrl = walker.rdfLicense;
       } else if (walker.relLicense != null) { // 2nd: anchor w/
         // rel=license
         licenseLocation = "rel";
@@ -74,29 +74,30 @@
         licenseUrl = walker.relLicense.toString();
       } else if (walker.anchorLicense != null) { // 3rd: anchor w/ CC
         // license
-	licenseLocation = "a";
-	licenseUrl = walker.anchorLicense.toString();
+        licenseLocation = "a";
+        licenseUrl = walker.anchorLicense.toString();
       } else if (conf.getBoolean("creativecommons.exclude.unlicensed", false)) {
-          throw new ParseException("No CC license.  Excluding.");
+        throw new ParseException("No CC license.  Excluding.");
       }
 
       // add license to metadata
       if (licenseUrl != null) {
         if (LOG.isDebugEnabled()) {
-	  LOG.debug("CC: found " + licenseUrl + " in " + licenseLocation + " of " + base);
-	}
-	page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_URL),
-	ByteBuffer.wrap(licenseUrl.getBytes()));
-	page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_LOCATION),
-	    ByteBuffer.wrap(licenseLocation.getBytes()));
+          LOG.debug("CC: found " + licenseUrl + " in " + licenseLocation
+              + " of " + base);
+        }
+        page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_URL),
+            ByteBuffer.wrap(licenseUrl.getBytes()));
+        page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_LOCATION),
+            ByteBuffer.wrap(licenseLocation.getBytes()));
       }
 
       if (walker.workType != null) {
         if (LOG.isDebugEnabled()) {
-	  LOG.debug("CC: found " + walker.workType + " in " + base);
-	}
-	page.getMetadata().put(new Utf8(CreativeCommons.WORK_TYPE),
-	   ByteBuffer.wrap(walker.workType.getBytes()));
+          LOG.debug("CC: found " + walker.workType + " in " + base);
+        }
+        page.getMetadata().put(new Utf8(CreativeCommons.WORK_TYPE),
+            ByteBuffer.wrap(walker.workType.getBytes()));
       }
 
     }
@@ -121,8 +122,8 @@
     }
 
     /**
-     * Extract license url from element, if any. Thse are the href attribute
-     * of anchor elements with rel="license". These must also point to
+     * Extract license url from element, if any. Thse are the href attribute of
+     * anchor elements with rel="license". These must also point to
      * http://creativecommons.org/licenses/.
      */
     private void findLicenseUrl(Element element) {
@@ -137,27 +138,27 @@
       try {
         URL url = new URL(base, href); // resolve the url
         // check that it's a CC license URL
-	if ("http".equalsIgnoreCase(url.getProtocol())
-	    && "creativecommons.org".equalsIgnoreCase(url.getHost())
-	    && url.getPath() != null && url.getPath().startsWith("/licenses/")
-	    && url.getPath().length() > "/licenses/".length()) {
+        if ("http".equalsIgnoreCase(url.getProtocol())
+            && "creativecommons.org".equalsIgnoreCase(url.getHost())
+            && url.getPath() != null && url.getPath().startsWith("/licenses/")
+            && url.getPath().length() > "/licenses/".length()) {
 
-	  // check rel="license"
-	  String rel = element.getAttribute("rel");
-	  if (rel != null && "license".equals(rel)
-	      && this.relLicense == null) {
-	    this.relLicense = url; // found rel license
-	  } else if (this.anchorLicense == null) {
-	    this.anchorLicense = url; // found anchor license
-	  }
-	}
+          // check rel="license"
+          String rel = element.getAttribute("rel");
+          if (rel != null && "license".equals(rel) && this.relLicense == null) {
+            this.relLicense = url; // found rel license
+          } else if (this.anchorLicense == null) {
+            this.anchorLicense = url; // found anchor license
+          }
+        }
       } catch (MalformedURLException e) { // ignore malformed urls
       }
     }
 
     /** Configure a namespace aware XML parser. */
-    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory.newInstance();
-      
+    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory
+        .newInstance();
+
     static {
       FACTORY.setNamespaceAware(true);
     }
@@ -177,129 +178,132 @@
       if (rdfPosition < 0)
         return; // no RDF, abort
       int nsPosition = comment.indexOf(CC_NS);
-        if (nsPosition < 0)
-	  return; // no RDF, abort
-	// try to parse the XML
-	Document doc;
-	try {
-          DocumentBuilder parser = FACTORY.newDocumentBuilder();
-	  doc = parser.parse(new InputSource(new StringReader(comment)));
-	} catch (Exception e) {
-	  if (LOG.isWarnEnabled()) {
-	    LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
-	  }
-	  // e.printStackTrace();
-	  return;
-	}
+      if (nsPosition < 0)
+        return; // no RDF, abort
+      // try to parse the XML
+      Document doc;
+      try {
+        DocumentBuilder parser = FACTORY.newDocumentBuilder();
+        doc = parser.parse(new InputSource(new StringReader(comment)));
+      } catch (Exception e) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
+        }
+        // e.printStackTrace();
+        return;
+      }
 
-	// check that root is rdf:RDF
-	NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
-	if (roots.getLength() != 1) {
-	  if (LOG.isWarnEnabled()) {
-	    LOG.warn("CC: No RDF root in " + base);
-	  }
-	  return;
-	}
-	Element rdf = (Element) roots.item(0);
+      // check that root is rdf:RDF
+      NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
+      if (roots.getLength() != 1) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: No RDF root in " + base);
+        }
+        return;
+      }
+      Element rdf = (Element) roots.item(0);
 
-	// get cc:License nodes inside rdf:RDF
-	NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
-	for (int i = 0; i < licenses.getLength(); i++) {
-          Element l = (Element) licenses.item(i);
-	  // license is rdf:about= attribute from cc:License
-	  this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
+      // get cc:License nodes inside rdf:RDF
+      NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
+      for (int i = 0; i < licenses.getLength(); i++) {
+        Element l = (Element) licenses.item(i);
+        // license is rdf:about= attribute from cc:License
+        this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
 
-          // walk predicates of cc:License
-	  NodeList predicates = l.getChildNodes();
-	  for (int j = 0; j < predicates.getLength(); j++) {
-	    Node predicateNode = predicates.item(j);
-	    if (!(predicateNode instanceof Element))
-	      continue;
-	      Element predicateElement = (Element) predicateNode;
-              // extract predicates of cc:xxx predicates
-	      if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
-	        continue;
-	      }
-	      String predicate = predicateElement.getLocalName();
-              // object is rdf:resource from cc:xxx predicates
-	      String object = predicateElement.getAttributeNodeNS(RDF_NS, "resource").getValue();
-              // add object and predicate to metadata
-	      // metadata.put(object, predicate);
-	      //if (LOG.isInfoEnabled()) {
-	      // LOG.info("CC: found: "+predicate+"="+object);
-	      // }
-	  }
-	}
+        // walk predicates of cc:License
+        NodeList predicates = l.getChildNodes();
+        for (int j = 0; j < predicates.getLength(); j++) {
+          Node predicateNode = predicates.item(j);
+          if (!(predicateNode instanceof Element))
+            continue;
+          Element predicateElement = (Element) predicateNode;
+          // extract predicates of cc:xxx predicates
+          if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
+            continue;
+          }
+          String predicate = predicateElement.getLocalName();
+          // object is rdf:resource from cc:xxx predicates
+          String object = predicateElement.getAttributeNodeNS(RDF_NS,
+              "resource").getValue();
+          // add object and predicate to metadata
+          // metadata.put(object, predicate);
+          // if (LOG.isInfoEnabled()) {
+          // LOG.info("CC: found: "+predicate+"="+object);
+          // }
+        }
+      }
 
-	// get cc:Work nodes from rdf:RDF
-	NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
-	for (int i = 0; i < works.getLength(); i++) {
-	  Element l = (Element) works.item(i);
+      // get cc:Work nodes from rdf:RDF
+      NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
+      for (int i = 0; i < works.getLength(); i++) {
+        Element l = (Element) works.item(i);
 
-	  // get dc:type nodes from cc:Work
-	  NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
-	  for (int j = 0; j < types.getLength(); j++) {
-	    Element type = (Element) types.item(j);
-	    String workUri = type.getAttributeNodeNS(RDF_NS, "resource").getValue();
-	    this.workType = (String) WORK_TYPE_NAMES.get(workUri);
-	    break;
-	  }
-	}
+        // get dc:type nodes from cc:Work
+        NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
+        for (int j = 0; j < types.getLength(); j++) {
+          Element type = (Element) types.item(j);
+          String workUri = type.getAttributeNodeNS(RDF_NS, "resource")
+              .getValue();
+          this.workType = (String) WORK_TYPE_NAMES.get(workUri);
+          break;
+        }
       }
     }
+  }
 
-    private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
-      
-    static {
-      FIELDS.add(WebPage.Field.BASE_URL);
-      FIELDS.add(WebPage.Field.METADATA);
-    }
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-    private static final HashMap<String,String> WORK_TYPE_NAMES = new HashMap<String,String>();
-        
-    static {
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive", "interactive");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
-    }
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-    private Configuration conf;
+  private static final HashMap<String, String> WORK_TYPE_NAMES = new HashMap<String, String>();
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-    }
+  static {
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
+        "interactive");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
+  }
 
-    public Configuration getConf() {
-      return this.conf;
-    }
+  private Configuration conf;
 
-    @Override
-    public Collection<Field> getFields() {
-      return FIELDS;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-    /**
-     * Adds metadata or otherwise modifies a parse of an HTML document, given
-     * the DOM tree of a page.
-     */
-    @Override
-    public Parse filter(String url, WebPage page, Parse parse,
-        HTMLMetaTags metaTags, DocumentFragment doc) {
-      // construct base url
-      URL base;
-      try {
-        base = new URL(page.getBaseUrl().toString());
-	// extract license metadata
-	Walker.walk(doc, base, page, getConf());
-      } catch (Exception e) {
-        LOG.error("Error parsing " + url, e);
-	return ParseStatusUtils.getEmptyParse(e, getConf());
-      }
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-      return parse;
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
+
+  /**
+   * Adds metadata or otherwise modifies a parse of an HTML document, given the
+   * DOM tree of a page.
+   */
+  @Override
+  public Parse filter(String url, WebPage page, Parse parse,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+    // construct base url
+    URL base;
+    try {
+      base = new URL(page.getBaseUrl().toString());
+      // extract license metadata
+      Walker.walk(doc, base, page, getConf());
+    } catch (Exception e) {
+      LOG.error("Error parsing " + url, e);
+      return ParseStatusUtils.getEmptyParse(e, getConf());
     }
+
+    return parse;
+  }
 }
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -36,52 +36,50 @@
 
 public class TestCCParseFilter {
 
-	private static final File testDir = new File(
-			System.getProperty("test.input"));
+  private static final File testDir = new File(System.getProperty("test.input"));
 
   @Test
-	public void testPages() throws Exception {
-		pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
-		// Tika returns <a> whereas parse-html returns <rel>
-		// check later
-		pageTest(new File(testDir, "rel.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
-		// Tika returns <a> whereas parse-html returns <rdf>
-		// check later
-		pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
-	}
+  public void testPages() throws Exception {
+    pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
+    // Tika returns <a> whereas parse-html returns <rel>
+    // check later
+    pageTest(new File(testDir, "rel.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
+    // Tika returns <a> whereas parse-html returns <rdf>
+    // check later
+    pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
+  }
 
-	public void pageTest(File file, String url, String license,
-			String location, String type) throws Exception {
+  public void pageTest(File file, String url, String license, String location,
+      String type) throws Exception {
 
-		InputStream in = new FileInputStream(file);
-		ByteArrayOutputStream out = new ByteArrayOutputStream(
-				(int) file.length());
-		byte[] buffer = new byte[1024];
-		int i;
-		while ((i = in.read(buffer)) != -1) {
-			out.write(buffer, 0, i);
-		}
-		in.close();
-		byte[] bytes = out.toByteArray();
-		Configuration conf = NutchConfiguration.create();
+    InputStream in = new FileInputStream(file);
+    ByteArrayOutputStream out = new ByteArrayOutputStream((int) file.length());
+    byte[] buffer = new byte[1024];
+    int i;
+    while ((i = in.read(buffer)) != -1) {
+      out.write(buffer, 0, i);
+    }
+    in.close();
+    byte[] bytes = out.toByteArray();
+    Configuration conf = NutchConfiguration.create();
 
-		WebPage page = WebPage.newBuilder().build();
-		page.setBaseUrl(new Utf8(url));
-		page.setContent(ByteBuffer.wrap(bytes));
-		MimeUtil mimeutil = new MimeUtil(conf);
-		String mtype = mimeutil.getMimeType(file);
-		page.setContentType(new Utf8(mtype));
+    WebPage page = WebPage.newBuilder().build();
+    page.setBaseUrl(new Utf8(url));
+    page.setContent(ByteBuffer.wrap(bytes));
+    MimeUtil mimeutil = new MimeUtil(conf);
+    String mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype));
 
-		new ParseUtil(conf).parse(url, page);
+    new ParseUtil(conf).parse(url, page);
 
-		ByteBuffer bb = page.getMetadata().get(new Utf8("License-Url"));
-		assertEquals(license, Bytes.toString(bb));
-		bb = page.getMetadata().get(new Utf8("License-Location"));
-		assertEquals(location, Bytes.toString(bb));
-		bb = page.getMetadata().get(new Utf8("Work-Type"));
-        assertEquals(type, Bytes.toString(bb));
-	}
+    ByteBuffer bb = page.getMetadata().get(new Utf8("License-Url"));
+    assertEquals(license, Bytes.toString(bb));
+    bb = page.getMetadata().get(new Utf8("License-Location"));
+    assertEquals(location, Bytes.toString(bb));
+    bb = page.getMetadata().get(new Utf8("Work-Type"));
+    assertEquals(type, Bytes.toString(bb));
+  }
 }
Index: src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(working copy)
@@ -32,13 +32,15 @@
 import java.util.Map.Entry;
 
 /**
- * Indexing filter that offers an option to either index all inbound anchor text for 
- * a document or deduplicate anchors. Deduplication does have it's con's, 
+ * Indexing filter that offers an option to either index all inbound anchor text
+ * for a document or deduplicate anchors. Deduplication does have it's con's,
+ * 
  * @see {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
  */
 public class AnchorIndexingFilter implements IndexingFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(AnchorIndexingFilter.class);
   private Configuration conf;
   private boolean deduplicate = false;
 
@@ -47,7 +49,7 @@
   static {
     FIELDS.add(WebPage.Field.INLINKS);
   }
-  
+
   /**
    * Set the {@link Configuration} object
    */
@@ -57,7 +59,7 @@
     deduplicate = conf.getBoolean("anchorIndexingFilter.deduplicate", false);
     LOG.info("Anchor deduplication is: " + (deduplicate ? "on" : "off"));
   }
-  
+
   /**
    * Get the {@link Configuration} object
    */
@@ -64,18 +66,21 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
   public void addIndexBackendOptions(Configuration conf) {
   }
-  
+
   /**
-   * The {@link AnchorIndexingFilter} filter object which supports boolean 
-   * configuration settings for the deduplication of anchors. 
-   * See {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
-   *  
-   * @param doc The {@link NutchDocument} object
-   * @param url URL to be filtered for anchor text
-   * @param page {@link WebPage} object relative to the URL
+   * The {@link AnchorIndexingFilter} filter object which supports boolean
+   * configuration settings for the deduplication of anchors. See
+   * {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
+   * 
+   * @param doc
+   *          The {@link NutchDocument} object
+   * @param url
+   *          URL to be filtered for anchor text
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @return filtered NutchDocument
    */
   @Override
@@ -82,15 +87,16 @@
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
     HashSet<String> set = null;
-    
+
     for (Entry<CharSequence, CharSequence> e : page.getInlinks().entrySet()) {
       String anchor = TableUtil.toString(e.getValue());
-      
-      if(anchor.equals(""))
+
+      if (anchor.equals(""))
         continue;
-      
+
       if (deduplicate) {
-        if (set == null) set = new HashSet<String>();
+        if (set == null)
+          set = new HashSet<String>();
         String lcAnchor = anchor.toLowerCase();
 
         // Check if already processed the current anchor
@@ -104,15 +110,14 @@
         doc.add("anchor", anchor);
       }
     }
-    
+
     return doc;
   }
-  
+
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<WebPage.Field> getFields() {
Index: src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java	(working copy)
@@ -25,13 +25,12 @@
 import static org.junit.Assert.*;
 
 /**
- * JUnit test case which tests
- * 1. that anchor text is obtained
- * 2. that anchor deduplication functionality is working
- *
+ * JUnit test case which tests 1. that anchor text is obtained 2. that anchor
+ * deduplication functionality is working
+ * 
  */
 public class TestAnchorIndexingFilter {
-  
+
   @Test
   public void testDeduplicateAnchor() throws Exception {
     Configuration conf = NutchConfiguration.create();
@@ -40,14 +39,19 @@
     filter.setConf(conf);
     NutchDocument doc = new NutchDocument();
     WebPage page = WebPage.newBuilder().build();
-    page.getInlinks().put(new Utf8("http://example1.com/"), new Utf8("cool site"));
-    page.getInlinks().put(new Utf8("http://example2.com/"), new Utf8("cool site"));
-    page.getInlinks().put(new Utf8("http://example3.com/"), new Utf8("fun site"));
+    page.getInlinks().put(new Utf8("http://example1.com/"),
+        new Utf8("cool site"));
+    page.getInlinks().put(new Utf8("http://example2.com/"),
+        new Utf8("cool site"));
+    page.getInlinks().put(new Utf8("http://example3.com/"),
+        new Utf8("fun site"));
     filter.filter(doc, "http://myurldoesnotmatter.com/", page);
-    
-    assertTrue("test if there is an anchor at all", doc.getFieldNames().contains("anchor"));
-    
-    assertEquals("test dedup, we expect 2", 2, doc.getFieldValues("anchor").size());
+
+    assertTrue("test if there is an anchor at all", doc.getFieldNames()
+        .contains("anchor"));
+
+    assertEquals("test dedup, we expect 2", 2, doc.getFieldValues("anchor")
+        .size());
   }
 
 }
Index: src/plugin/index-basic/src/java/org/apache/nut