Index: src/java/org/apache/nutch/api/NutchServer.java
===================================================================
--- src/java/org/apache/nutch/api/NutchServer.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/NutchServer.java	(working copy)
@@ -165,7 +165,8 @@
    * Safety and convenience method to determine whether or not it is safe to
    * shut down the server. We make this assertion by consulting the
    * {@link org.apache.nutch.api.NutchApp#jobManager} for a list of jobs with
-   * {@link org.apache.nutch.api.model.response.JobInfo#state} equal to 'RUNNING'.
+   * {@link org.apache.nutch.api.model.response.JobInfo#state} equal to
+   * 'RUNNING'.
    * 
    * @param force
    *          ignore running tasks
Index: src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java
===================================================================
--- src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/NutchServerPoolExecutor.java	(working copy)
@@ -103,7 +103,7 @@
 
   public JobInfo getInfo(String jobId) {
     for (JobInfo jobInfo : getAllJobs()) {
-      if(StringUtils.equals(jobId, jobInfo.getId())){
+      if (StringUtils.equals(jobId, jobInfo.getId())) {
         return jobInfo;
       }
     }
Index: src/java/org/apache/nutch/api/impl/RAMConfManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMConfManager.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/RAMConfManager.java	(working copy)
@@ -89,7 +89,7 @@
     if (!canCreate(nutchConfig)) {
       throw new IllegalArgumentException("Config already exists.");
     }
-    
+
     createHadoopConfig(nutchConfig);
     return nutchConfig.getConfigId();
   }
Index: src/java/org/apache/nutch/api/impl/RAMJobManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMJobManager.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/RAMJobManager.java	(working copy)
@@ -80,9 +80,10 @@
 
   private NutchTool createTool(JobConfig jobConfig, Configuration conf) {
     if (StringUtils.isNotBlank(jobConfig.getJobClassName())) {
-      return jobFactory.createToolByClassName(jobConfig.getJobClassName(), conf);
+      return jobFactory
+          .createToolByClassName(jobConfig.getJobClassName(), conf);
     }
-    
+
     return jobFactory.createToolByType(jobConfig.getType(), conf);
   }
 
Index: src/java/org/apache/nutch/api/impl/db/DbIterator.java
===================================================================
--- src/java/org/apache/nutch/api/impl/db/DbIterator.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/db/DbIterator.java	(working copy)
@@ -100,7 +100,8 @@
   }
 
   private Map<String, Object> pageAsMap(String url, WebPage page) {
-    Map<String, Object> result = DbPageConverter.convertPage(page, commonFields);
+    Map<String, Object> result = DbPageConverter
+        .convertPage(page, commonFields);
 
     if (CollectionUtils.isEmpty(commonFields) || commonFields.contains("url")) {
       result.put("url", TableUtil.unreverseUrl(url));
Index: src/java/org/apache/nutch/api/impl/db/DbPageConverter.java
===================================================================
--- src/java/org/apache/nutch/api/impl/db/DbPageConverter.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/db/DbPageConverter.java	(working copy)
@@ -103,7 +103,7 @@
     if (CollectionUtils.isEmpty(queryFields)) {
       return Sets.newHashSet(pageFields);
     }
-    
+
     Set<Field> filteredFields = Sets.newLinkedHashSet();
     for (Field field : pageFields) {
       if (queryFields.contains(field.name())) {
Index: src/java/org/apache/nutch/api/impl/package-info.java
===================================================================
--- src/java/org/apache/nutch/api/impl/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/impl/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Implementations of REST API interfaces.
  */
 package org.apache.nutch.api.impl;
+
Index: src/java/org/apache/nutch/api/model/response/NutchStatus.java
===================================================================
--- src/java/org/apache/nutch/api/model/response/NutchStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/model/response/NutchStatus.java	(working copy)
@@ -54,18 +54,16 @@
     this.jobs = jobs;
   }
 
-  public Collection<JobInfo> getRunningJobs()
-  {
+  public Collection<JobInfo> getRunningJobs() {
     return purgeFinishedFailedJobs(runningJobs);
   }
 
-
   public void setRunningJobs(Collection<JobInfo> runningJobs) {
     this.runningJobs = runningJobs;
   }
 
-  private Collection<JobInfo> purgeFinishedFailedJobs(Collection<JobInfo> runningJobColl)
-  {
+  private Collection<JobInfo> purgeFinishedFailedJobs(
+      Collection<JobInfo> runningJobColl) {
     if (CollectionUtils.isNotEmpty(runningJobColl)) {
       Iterator<JobInfo> runningJobsIterator = runningJobColl.iterator();
       while (runningJobsIterator.hasNext()) {
@@ -73,8 +71,7 @@
 
         if (jobInfo.getState().equals(State.FINISHED)) {
           runningJobsIterator.remove();
-        }
-        else if (jobInfo.getState().equals(State.FAILED)) {
+        } else if (jobInfo.getState().equals(State.FAILED)) {
           runningJobsIterator.remove();
         }
 
Index: src/java/org/apache/nutch/api/package-info.java
===================================================================
--- src/java/org/apache/nutch/api/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * REST API to run and control crawl jobs.
  */
 package org.apache.nutch.api;
+
Index: src/java/org/apache/nutch/api/resources/SeedResource.java
===================================================================
--- src/java/org/apache/nutch/api/resources/SeedResource.java	(revision 1650444)
+++ src/java/org/apache/nutch/api/resources/SeedResource.java	(working copy)
@@ -43,7 +43,8 @@
 
 @Path("/seed")
 public class SeedResource extends AbstractResource {
-  private static final Logger log = LoggerFactory.getLogger(AdminResource.class);
+  private static final Logger log = LoggerFactory
+      .getLogger(AdminResource.class);
 
   @POST
   @Path("/create")
@@ -101,8 +102,8 @@
 
   private RuntimeException handleException(Exception e) {
     log.error("Cannot create seed file!", e);
-    return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR).entity(
-        "Cannot create seed file!").build());
+    return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR)
+        .entity("Cannot create seed file!").build());
   }
 
 }
Index: src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(working copy)
@@ -29,13 +29,13 @@
 /**
  * This class provides common methods for implementations of
  * {@link FetchSchedule}.
- *
+ * 
  * @author Andrzej Bialecki
  */
-public abstract class AbstractFetchSchedule
-extends Configured
-implements FetchSchedule {
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractFetchSchedule.class);
+public abstract class AbstractFetchSchedule extends Configured implements
+    FetchSchedule {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AbstractFetchSchedule.class);
 
   protected int defaultInterval;
   protected int maxInterval;
@@ -59,20 +59,22 @@
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     defaultInterval = conf.getInt("db.fetch.interval.default", 0);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
     LOG.info("defaultInterval=" + defaultInterval);
     LOG.info("maxInterval=" + maxInterval);
   }
-  
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation sets the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation sets the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   @Override
@@ -84,27 +86,31 @@
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page. NOTE: this implementation resets the
-   * retry counter - extending classes should call super.setFetchSchedule() to
+   * successfully fetched page. NOTE: this implementation resets the retry
+   * counter - extending classes should call super.setFetchSchedule() to
    * preserve this behavior.
    */
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     page.setRetriesSinceFetch(0);
   }
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%
-   * but the value may never exceed <code>maxInterval</code>.
-   * @param url URL of the page
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50% but the value may
+   * never exceed <code>maxInterval</code>.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   @Override
   public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
@@ -121,19 +127,23 @@
   }
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases
-   * the retry counter.
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   @Override
   public void setPageRetrySchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
     page.setFetchTime(fetchTime + SECONDS_PER_DAY * 1000L);
     page.setRetriesSinceFetch(page.getRetriesSinceFetch() + 1);
   }
@@ -140,6 +150,7 @@
 
   /**
    * This method return the last fetch time of the CrawlDatum
+   * 
    * @return the date as a long.
    */
   @Override
@@ -148,20 +159,24 @@
   }
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
    * @param page
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   @Override
   public boolean shouldFetch(String url, WebPage page, long curTime) {
@@ -181,11 +196,14 @@
   /**
    * This method resets fetchTime, fetchInterval, modifiedTime,
    * retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   @Override
   public void forceRefetch(String url, WebPage page, boolean asap) {
@@ -196,10 +214,10 @@
     page.setRetriesSinceFetch(0);
     // TODO: row.setSignature(null) ??
     page.setModifiedTime(0L);
-    if (asap) page.setFetchTime(System.currentTimeMillis());
+    if (asap)
+      page.setFetchTime(System.currentTimeMillis());
   }
 
-
   public Set<WebPage.Field> getFields() {
     return FIELDS;
   }
Index: src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(working copy)
@@ -30,11 +30,12 @@
  * If SYNC_DELTA property is true, then:
  * <ul>
  * <li>calculate a <code>delta = fetchTime - modifiedTime</code></li>
- * <li>try to synchronize with the time of change, by shifting the next fetchTime
- * by a fraction of the difference between the last modification time and the last
- * fetch time. I.e. the next fetch time will be set to
+ * <li>try to synchronize with the time of change, by shifting the next
+ * fetchTime by a fraction of the difference between the last modification time
+ * and the last fetch time. I.e. the next fetch time will be set to
  * <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li>
- * <li>if the adjusted fetch interval is bigger than the delta, then <code>fetchInterval = delta</code>.</li>
+ * <li>if the adjusted fetch interval is bigger than the delta, then
+ * <code>fetchInterval = delta</code>.</li>
  * </ul>
  * </li>
  * <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL
@@ -42,10 +43,13 @@
  * <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL
  * (default is 365 days).</li>
  * </ul>
- * <p>NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize the algorithm,
- * so that the fetch interval either increases or decreases infinitely, with little
- * relevance to the page changes. Please use {@link #main(String[])} method to
- * test the values before applying them in a production system.</p>
+ * <p>
+ * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
+ * the algorithm, so that the fetch interval either increases or decreases
+ * infinitely, with little relevance to the page changes. Please use
+ * {@link #main(String[])} method to test the values before applying them in a
+ * production system.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
@@ -58,51 +62,57 @@
   private int MAX_INTERVAL;
 
   private int MIN_INTERVAL;
-  
+
   private boolean SYNC_DELTA;
 
   private double SYNC_DELTA_RATE;
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
     DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
     MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60);
-    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval", SECONDS_PER_DAY * 365 ); // 1 year
+    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval",
+        SECONDS_PER_DAY * 365); // 1 year
     SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
-    SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
+    SYNC_DELTA_RATE = conf.getFloat(
+        "db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
   }
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     long refTime = fetchTime;
-    if (modifiedTime <= 0) modifiedTime = fetchTime;
+    if (modifiedTime <= 0)
+      modifiedTime = fetchTime;
     int interval = page.getFetchInterval();
     switch (state) {
-      case FetchSchedule.STATUS_MODIFIED:
-        interval *= (1.0f - DEC_RATE);
-        break;
-      case FetchSchedule.STATUS_NOTMODIFIED:
-        interval *= (1.0f + INC_RATE);
-        break;
-      case FetchSchedule.STATUS_UNKNOWN:
-        break;
+    case FetchSchedule.STATUS_MODIFIED:
+      interval *= (1.0f - DEC_RATE);
+      break;
+    case FetchSchedule.STATUS_NOTMODIFIED:
+      interval *= (1.0f + INC_RATE);
+      break;
+    case FetchSchedule.STATUS_UNKNOWN:
+      break;
     }
     if (SYNC_DELTA) {
       // try to synchronize with the time of change
       // TODO: different from normal class (is delta in seconds)?
-      int delta = (int) ((fetchTime - modifiedTime) / 1000L) ;
-      if (delta > interval) interval = delta;
+      int delta = (int) ((fetchTime - modifiedTime) / 1000L);
+      if (delta > interval)
+        interval = delta;
       refTime = fetchTime - Math.round(delta * SYNC_DELTA_RATE);
     }
-    if (interval < MIN_INTERVAL) interval = MIN_INTERVAL;
-    if (interval > MAX_INTERVAL) interval = MAX_INTERVAL;
-   
+    if (interval < MIN_INTERVAL)
+      interval = MIN_INTERVAL;
+    if (interval > MAX_INTERVAL)
+      interval = MAX_INTERVAL;
+
     page.setFetchInterval(interval);
     page.setFetchTime(refTime + interval * 1000L);
     page.setModifiedTime(modifiedTime);
@@ -109,5 +119,4 @@
     page.setPrevModifiedTime(prevModifiedTime);
   }
 
-
 }
Index: src/java/org/apache/nutch/crawl/CrawlStatus.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/CrawlStatus.java	(working copy)
@@ -21,22 +21,22 @@
 
 public class CrawlStatus {
   /** Page was not fetched yet. */
-  public static final byte STATUS_UNFETCHED      = 0x01;
+  public static final byte STATUS_UNFETCHED = 0x01;
   /** Page was successfully fetched. */
-  public static final byte STATUS_FETCHED        = 0x02;
+  public static final byte STATUS_FETCHED = 0x02;
   /** Page no longer exists. */
-  public static final byte STATUS_GONE           = 0x03;
+  public static final byte STATUS_GONE = 0x03;
   /** Page temporarily redirects to other page. */
-  public static final byte STATUS_REDIR_TEMP     = 0x04;
+  public static final byte STATUS_REDIR_TEMP = 0x04;
   /** Page permanently redirects to other page. */
-  public static final byte STATUS_REDIR_PERM     = 0x05;
+  public static final byte STATUS_REDIR_PERM = 0x05;
   /** Fetching unsuccessful, needs to be retried (transient errors). */
-  public static final byte STATUS_RETRY          = 0x22;
+  public static final byte STATUS_RETRY = 0x22;
   /** Fetching successful - page is not modified. */
-  public static final byte STATUS_NOTMODIFIED    = 0x26;
-  
+  public static final byte STATUS_NOTMODIFIED = 0x26;
+
   private static final Map<Byte, String> NAMES = new HashMap<Byte, String>();
-  
+
   static {
     NAMES.put(STATUS_UNFETCHED, "status_unfetched");
     NAMES.put(STATUS_FETCHED, "status_fetched");
@@ -46,9 +46,9 @@
     NAMES.put(STATUS_RETRY, "status_retry");
     NAMES.put(STATUS_NOTMODIFIED, "status_notmodified");
   }
-  
+
   public static String getName(byte status) {
     return NAMES.get(status);
   }
- 
+
 }
Index: src/java/org/apache/nutch/crawl/DbUpdateMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(working copy)
@@ -37,8 +37,8 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.apache.gora.mapreduce.GoraMapper;
 
-public class DbUpdateMapper
-extends GoraMapper<String, WebPage, UrlWithScore, NutchWritable> {
+public class DbUpdateMapper extends
+    GoraMapper<String, WebPage, UrlWithScore, NutchWritable> {
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private ScoringFilters scoringFilters;
@@ -46,8 +46,8 @@
   private final List<ScoreDatum> scoreData = new ArrayList<ScoreDatum>();
 
   private Utf8 batchId;
-  
-  //reuse writables
+
+  // reuse writables
   private UrlWithScore urlWithScore = new UrlWithScore();
   private NutchWritable nutchWritable = new NutchWritable();
   private WebPageWritable pageWritable;
@@ -54,14 +54,15 @@
 
   @Override
   public void map(String key, WebPage page, Context context)
-  throws IOException, InterruptedException {
-   if(Mark.GENERATE_MARK.checkMark(page) == null) {
+      throws IOException, InterruptedException {
+    if (Mark.GENERATE_MARK.checkMark(page) == null) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; not generated yet");
+        LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+            + "; not generated yet");
       }
       return;
     }
-  
+
     String url = TableUtil.unreverseUrl(key);
 
     scoreData.clear();
@@ -68,20 +69,22 @@
     Map<CharSequence, CharSequence> outlinks = page.getOutlinks();
     if (outlinks != null) {
       for (Entry<CharSequence, CharSequence> e : outlinks.entrySet()) {
-                int depth=Integer.MAX_VALUE;
+        int depth = Integer.MAX_VALUE;
         CharSequence depthUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
-        if (depthUtf8 != null) depth=Integer.parseInt(depthUtf8.toString());
-        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), 
-            e.getValue().toString(), depth));
+        if (depthUtf8 != null)
+          depth = Integer.parseInt(depthUtf8.toString());
+        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), e.getValue()
+            .toString(), depth));
       }
     }
 
     // TODO: Outlink filtering (i.e. "only keep the first n outlinks")
     try {
-      scoringFilters.distributeScoreToOutlinks(url, page, scoreData, (outlinks == null ? 0 : outlinks.size()));
+      scoringFilters.distributeScoreToOutlinks(url, page, scoreData,
+          (outlinks == null ? 0 : outlinks.size()));
     } catch (ScoringFilterException e) {
-      LOG.warn("Distributing score failed for URL: " + key +
-          " exception:" + StringUtils.stringifyException(e));
+      LOG.warn("Distributing score failed for URL: " + key + " exception:"
+          + StringUtils.stringifyException(e));
     }
 
     urlWithScore.setUrl(key);
@@ -104,7 +107,8 @@
   public void setup(Context context) {
     scoringFilters = new ScoringFilters(context.getConfiguration());
     pageWritable = new WebPageWritable(context.getConfiguration(), null);
-    batchId = new Utf8(context.getConfiguration().get(Nutch.BATCH_NAME_KEY,Nutch.ALL_BATCH_ID_STR));
+    batchId = new Utf8(context.getConfiguration().get(Nutch.BATCH_NAME_KEY,
+        Nutch.ALL_BATCH_ID_STR));
   }
 
 }
Index: src/java/org/apache/nutch/crawl/DbUpdateReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(working copy)
@@ -37,11 +37,11 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.slf4j.Logger;
 
-public class DbUpdateReducer
-extends GoraReducer<UrlWithScore, NutchWritable, String, WebPage> {
+public class DbUpdateReducer extends
+    GoraReducer<UrlWithScore, NutchWritable, String, WebPage> {
 
-  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";	
-	
+  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";
+
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private int retryMax;
@@ -53,11 +53,12 @@
   private int maxLinks;
 
   @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
     retryMax = conf.getInt("db.fetch.retry.max", 3);
     additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);
     maxLinks = conf.getInt("db.update.max.inlinks", 10000);
@@ -70,7 +71,7 @@
 
     WebPage page = null;
     inlinkedScoreData.clear();
-    
+
     for (NutchWritable nutchWritable : values) {
       Writable val = nutchWritable.get();
       if (val instanceof WebPageWritable) {
@@ -108,10 +109,10 @@
     } else {
       byte status = page.getStatus().byteValue();
       switch (status) {
-      case CrawlStatus.STATUS_FETCHED:         // succesful fetch
-      case CrawlStatus.STATUS_REDIR_TEMP:      // successful fetch, redirected
+      case CrawlStatus.STATUS_FETCHED: // succesful fetch
+      case CrawlStatus.STATUS_REDIR_TEMP: // successful fetch, redirected
       case CrawlStatus.STATUS_REDIR_PERM:
-      case CrawlStatus.STATUS_NOTMODIFIED:     // successful fetch, notmodified
+      case CrawlStatus.STATUS_NOTMODIFIED: // successful fetch, notmodified
         int modified = FetchSchedule.STATUS_UNKNOWN;
         if (status == CrawlStatus.STATUS_NOTMODIFIED) {
           modified = FetchSchedule.STATUS_NOTMODIFIED;
@@ -129,8 +130,9 @@
         long prevFetchTime = page.getPrevFetchTime();
         long modifiedTime = page.getModifiedTime();
         long prevModifiedTime = page.getPrevModifiedTime();
-        CharSequence lastModified = page.getHeaders().get(new Utf8("Last-Modified"));
-        if ( lastModified != null ){
+        CharSequence lastModified = page.getHeaders().get(
+            new Utf8("Last-Modified"));
+        if (lastModified != null) {
           try {
             modifiedTime = HttpDateFormat.toLong(lastModified.toString());
             prevModifiedTime = page.getModifiedTime();
@@ -143,15 +145,17 @@
           schedule.forceRefetch(url, page, false);
         break;
       case CrawlStatus.STATUS_RETRY:
-        schedule.setPageRetrySchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
+        schedule.setPageRetrySchedule(url, page, 0L,
+            page.getPrevModifiedTime(), page.getFetchTime());
         if (page.getRetriesSinceFetch() < retryMax) {
-          page.setStatus((int)CrawlStatus.STATUS_UNFETCHED);
+          page.setStatus((int) CrawlStatus.STATUS_UNFETCHED);
         } else {
-          page.setStatus((int)CrawlStatus.STATUS_GONE);
+          page.setStatus((int) CrawlStatus.STATUS_GONE);
         }
         break;
       case CrawlStatus.STATUS_GONE:
-        schedule.setPageGoneSchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
+        schedule.setPageGoneSchedule(url, page, 0L, page.getPrevModifiedTime(),
+            page.getFetchTime());
         break;
       }
     }
@@ -159,27 +163,31 @@
     if (page.getInlinks() != null) {
       page.getInlinks().clear();
     }
-    
+
     // Distance calculation.
     // Retrieve smallest distance from all inlinks distances
     // Calculate new distance for current page: smallest inlink distance plus 1.
-    // If the new distance is smaller than old one (or if old did not exist yet),
+    // If the new distance is smaller than old one (or if old did not exist
+    // yet),
     // write it to the page.
-    int smallestDist=Integer.MAX_VALUE;
+    int smallestDist = Integer.MAX_VALUE;
     for (ScoreDatum inlink : inlinkedScoreData) {
       int inlinkDist = inlink.getDistance();
       if (inlinkDist < smallestDist) {
-        smallestDist=inlinkDist;
+        smallestDist = inlinkDist;
       }
-      page.getInlinks().put(new Utf8(inlink.getUrl()), new Utf8(inlink.getAnchor()));
+      page.getInlinks().put(new Utf8(inlink.getUrl()),
+          new Utf8(inlink.getAnchor()));
     }
     if (smallestDist != Integer.MAX_VALUE) {
-      int oldDistance=Integer.MAX_VALUE;
+      int oldDistance = Integer.MAX_VALUE;
       CharSequence oldDistUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
-      if (oldDistUtf8 != null)oldDistance=Integer.parseInt(oldDistUtf8.toString());
-      int newDistance = smallestDist+1;
+      if (oldDistUtf8 != null)
+        oldDistance = Integer.parseInt(oldDistUtf8.toString());
+      int newDistance = smallestDist + 1;
       if (newDistance < oldDistance) {
-        page.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(Integer.toString(newDistance)));
+        page.getMarkers().put(DbUpdaterJob.DISTANCE,
+            new Utf8(Integer.toString(newDistance)));
       }
     }
 
@@ -186,8 +194,8 @@
     try {
       scoringFilters.updateScore(url, page, inlinkedScoreData);
     } catch (ScoringFilterException e) {
-      LOG.warn("Scoring filters failed with exception " +
-                StringUtils.stringifyException(e));
+      LOG.warn("Scoring filters failed with exception "
+          + StringUtils.stringifyException(e));
     }
 
     // clear markers
Index: src/java/org/apache/nutch/crawl/DbUpdaterJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(working copy)
@@ -48,10 +48,8 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(DbUpdaterJob.class);
 
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-  private static final Collection<WebPage.Field> FIELDS =
-    new HashSet<WebPage.Field>();
-
   static {
     FIELDS.add(WebPage.Field.OUTLINKS);
     FIELDS.add(WebPage.Field.INLINKS);
@@ -78,35 +76,35 @@
   public DbUpdaterJob(Configuration conf) {
     setConf(conf);
   }
-    
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String crawlId = (String)args.get(Nutch.ARG_CRAWL);
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
+
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String crawlId = (String) args.get(Nutch.ARG_CRAWL);
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
     numJobs = 1;
     currentJobNum = 0;
-    
+
     if (batchId == null) {
       batchId = Nutch.ALL_BATCH_ID_STR;
     }
     getConf().set(Nutch.BATCH_NAME_KEY, batchId);
-    //job.setBoolean(ALL, updateAll);
+    // job.setBoolean(ALL, updateAll);
     ScoringFilters scoringFilters = new ScoringFilters(getConf());
     HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
     fields.addAll(scoringFilters.getFields());
-    
+
     currentJob = new NutchJob(getConf(), "update-table");
     if (crawlId != null) {
       currentJob.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
     }
-    
+
     // Partition by {url}, sort by {url,score} and group by {url}.
     // This ensures that the inlinks are sorted by score when they enter
     // the reducer.
-    
+
     currentJob.setPartitionerClass(UrlOnlyPartitioner.class);
     currentJob.setSortComparatorClass(UrlScoreComparator.class);
     currentJob.setGroupingComparatorClass(UrlOnlyComparator.class);
-    
+
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
     StorageUtils.initMapperJob(currentJob, fields, UrlWithScore.class,
         NutchWritable.class, DbUpdateMapper.class, batchIdFilter);
@@ -129,22 +127,22 @@
     return filter;
   }
 
-  private int updateTable(String crawlId,String batchId) throws Exception {
-    
+  private int updateTable(String crawlId, String batchId) throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("DbUpdaterJob: starting at " + sdf.format(start));
-    
+
     if (batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
       LOG.info("DbUpdaterJob: updatinging all");
     } else {
       LOG.info("DbUpdaterJob: batchId: " + batchId);
     }
-    run(ToolUtil.toArgMap(Nutch.ARG_CRAWL, crawlId,
-            Nutch.ARG_BATCH, batchId));
-    
+    run(ToolUtil.toArgMap(Nutch.ARG_CRAWL, crawlId, Nutch.ARG_BATCH, batchId));
+
     long finish = System.currentTimeMillis();
-    LOG.info("DbUpdaterJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("DbUpdaterJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
     return 0;
   }
 
@@ -152,9 +150,9 @@
     String crawlId = null;
     String batchId;
 
-    String usage = "Usage: DbUpdaterJob (<batchId> | -all) [-crawlId <id>] " +
-            "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
-            "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n";
+    String usage = "Usage: DbUpdaterJob (<batchId> | -all) [-crawlId <id>] "
+        + "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n"
+        + "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -171,14 +169,15 @@
       if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
       } else {
-        throw new IllegalArgumentException("arg " +args[i]+ " not recognized");
+        throw new IllegalArgumentException("arg " + args[i] + " not recognized");
       }
     }
-    return updateTable(crawlId,batchId);
+    return updateTable(crawlId, batchId);
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(working copy)
@@ -20,19 +20,18 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This class implements the default re-fetch schedule. That is, no matter
- * if the page was changed or not, the <code>fetchInterval</code> remains
+ * This class implements the default re-fetch schedule. That is, no matter if
+ * the page was changed or not, the <code>fetchInterval</code> remains
  * unchanged, and the updated page fetchTime will always be set to
  * <code>fetchTime + fetchInterval * 1000</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class DefaultFetchSchedule extends AbstractFetchSchedule {
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     page.setFetchTime(fetchTime + page.getFetchInterval() * 1000L);
Index: src/java/org/apache/nutch/crawl/FetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchSchedule.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/FetchSchedule.java	(working copy)
@@ -24,29 +24,30 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This interface defines the contract for implementations that manipulate
- * fetch times and re-fetch intervals.
- *
+ * This interface defines the contract for implementations that manipulate fetch
+ * times and re-fetch intervals.
+ * 
  * @author Andrzej Bialecki
  */
 public interface FetchSchedule extends Configurable {
 
   /** It is unknown whether page was changed since our last visit. */
-  public static final int STATUS_UNKNOWN       = 0;
+  public static final int STATUS_UNKNOWN = 0;
   /** Page is known to have been modified since our last visit. */
-  public static final int STATUS_MODIFIED      = 1;
+  public static final int STATUS_MODIFIED = 1;
   /** Page is known to remain unmodified since our last visit. */
-  public static final int STATUS_NOTMODIFIED    = 2;
+  public static final int STATUS_NOTMODIFIED = 2;
 
   public static final int SECONDS_PER_DAY = 3600 * 24;
 
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation set the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation set the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   public void initializeSchedule(String url, WebPage page);
@@ -53,50 +54,67 @@
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page.
-   * Implementations may use supplied arguments to support different re-fetching
-   * schedules.
-   *
-   * @param url url of the page
+   * successfully fetched page. Implementations may use supplied arguments to
+   * support different re-fetching schedules.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param prevFetchTime previous value of fetch time, or -1 if not available
-   * @param prevModifiedTime previous value of modifiedTime, or -1 if not available
-   * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@param datum} to something greater than this value.
-   * @param modifiedTime last time the content was modified. This information comes from
-   * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@param datum} to this value.
-   * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
-   * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
-   * This information may be obtained by comparing page signatures before and after fetching. If this
-   * is set to {@link #STATUS_UNKNOWN}, then it is unknown whether the page was changed; implementations
-   * are free to follow a sensible default behavior.
+   * @param prevFetchTime
+   *          previous value of fetch time, or -1 if not available
+   * @param prevModifiedTime
+   *          previous value of modifiedTime, or -1 if not available
+   * @param fetchTime
+   *          the latest time, when the page was recently re-fetched. Most
+   *          FetchSchedule implementations should update the value in
+   * @param datum
+   *          to something greater than this value.
+   * @param modifiedTime
+   *          last time the content was modified. This information comes from
+   *          the protocol implementations, or is set to < 0 if not available.
+   *          Most FetchSchedule implementations should update the value in
+   * @param datum
+   *          to this value.
+   * @param state
+   *          if {@link #STATUS_MODIFIED}, then the content is considered to be
+   *          "changed" before the <code>fetchTime</code>, if
+   *          {@link #STATUS_NOTMODIFIED} then the content is known to be
+   *          unchanged. This information may be obtained by comparing page
+   *          signatures before and after fetching. If this is set to
+   *          {@link #STATUS_UNKNOWN}, then it is unknown whether the page was
+   *          changed; implementations are free to follow a sensible default
+   *          behavior.
    */
-  public void setFetchSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime,
-      long fetchTime, long modifiedTime, int state);
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state);
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    */
-  public void setPageGoneSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime, long fetchTime);
+  public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime);
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases the
-   * retry counter.Set
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.Set
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   public void setPageRetrySchedule(String url, WebPage page,
       long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -103,36 +121,45 @@
 
   /**
    * Calculates last fetch time of the given CrawlDatum.
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(WebPage page);
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param row url's row
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param row
+   *          url's row
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(String url, WebPage page, long curTime);
 
   /**
-   * This method resets fetchTime, fetchInterval, modifiedTime and
-   * page signature, so that it forces refetching.
-   * @param url URL of the page
+   * This method resets fetchTime, fetchInterval, modifiedTime and page
+   * signature, so that it forces refetching.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   public void forceRefetch(String url, WebPage row, boolean asap);
 
Index: src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(working copy)
@@ -25,20 +25,23 @@
 /** Creates and caches a {@link FetchSchedule} implementation. */
 public class FetchScheduleFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(FetchScheduleFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FetchScheduleFactory.class);
 
-  private FetchScheduleFactory() {}                   // no public ctor
+  private FetchScheduleFactory() {
+  } // no public ctor
 
   /** Return the FetchSchedule implementation. */
   public static FetchSchedule getFetchSchedule(Configuration conf) {
-    String clazz = conf.get("db.fetch.schedule.class", DefaultFetchSchedule.class.getName());
+    String clazz = conf.get("db.fetch.schedule.class",
+        DefaultFetchSchedule.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    FetchSchedule impl = (FetchSchedule)objectCache.getObject(clazz);
+    FetchSchedule impl = (FetchSchedule) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using FetchSchedule impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (FetchSchedule)implClass.newInstance();
+        impl = (FetchSchedule) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/GeneratorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorJob.java	(working copy)
@@ -74,13 +74,14 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(GeneratorJob.class);
 
-  public static class SelectorEntry
-  implements WritableComparable<SelectorEntry> {
+  public static class SelectorEntry implements
+      WritableComparable<SelectorEntry> {
 
     String url;
     float score;
 
-    public SelectorEntry() {  }
+    public SelectorEntry() {
+    }
 
     public SelectorEntry(String url, float score) {
       this.url = url;
@@ -109,7 +110,7 @@
     public int hashCode() {
       final int prime = 31;
       int result = 1;
-      result = prime * result +  url.hashCode();
+      result = prime * result + url.hashCode();
       result = prime * result + Float.floatToIntBits(score);
       return result;
     }
@@ -126,13 +127,13 @@
 
     /**
      * Sets url with score on this writable. Allows for writable reusing.
-     *
+     * 
      * @param url
      * @param score
      */
     public void set(String url, float score) {
-      this.url=url;
-      this.score=score;
+      this.url = url;
+      this.score = score;
     }
   }
 
@@ -144,7 +145,7 @@
 
   static {
     WritableComparator.define(SelectorEntry.class,
-                              new SelectorEntryComparator());
+        new SelectorEntryComparator());
   }
 
   public GeneratorJob() {
@@ -157,24 +158,25 @@
 
   public Collection<WebPage.Field> getFields(Job job) {
     Collection<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
-    fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration()).getFields());
+    fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration())
+        .getFields());
     return fields;
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
     if (batchId != null) {
       getConf().set(GeneratorJob.BATCH_ID, batchId);
     }
-    
+
     // map to inverted subset due for fetch, sort by score
-    Long topN = (Long)args.get(Nutch.ARG_TOPN);
-    Long curTime = (Long)args.get(Nutch.ARG_CURTIME);
+    Long topN = (Long) args.get(Nutch.ARG_TOPN);
+    Long curTime = (Long) args.get(Nutch.ARG_CURTIME);
     if (curTime == null) {
       curTime = System.currentTimeMillis();
     }
-    Boolean filter = (Boolean)args.get(Nutch.ARG_FILTER);
-    Boolean norm = (Boolean)args.get(Nutch.ARG_NORMALIZE);
+    Boolean filter = (Boolean) args.get(Nutch.ARG_FILTER);
+    Boolean norm = (Boolean) args.get(Nutch.ARG_NORMALIZE);
     // map to inverted subset due for fetch, sort by score
     getConf().setLong(GENERATOR_CUR_TIME, curTime);
     if (topN != null)
@@ -185,15 +187,20 @@
     getConf().setLong(Nutch.GENERATE_TIME_KEY, System.currentTimeMillis());
     if (norm != null)
       getConf().setBoolean(GENERATOR_NORMALISE, norm);
-    String mode = getConf().get(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
+    String mode = getConf().get(GENERATOR_COUNT_MODE,
+        GENERATOR_COUNT_VALUE_HOST);
     if (GENERATOR_COUNT_VALUE_HOST.equalsIgnoreCase(mode)) {
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     } else if (GENERATOR_COUNT_VALUE_DOMAIN.equalsIgnoreCase(mode)) {
-        getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_DOMAIN);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_DOMAIN);
     } else {
-      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode=" + GENERATOR_COUNT_VALUE_HOST);
+      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode="
+          + GENERATOR_COUNT_VALUE_HOST);
       getConf().set(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     }
     numJobs = 1;
     currentJobNum = 0;
@@ -200,7 +207,8 @@
     currentJob = new NutchJob(getConf(), "generate: " + getConf().get(BATCH_ID));
     Collection<WebPage.Field> fields = getFields(currentJob);
     StorageUtils.initMapperJob(currentJob, fields, SelectorEntry.class,
-        WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class, true);
+        WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class,
+        true);
     StorageUtils.initReducerJob(currentJob, GeneratorReducer.class);
     currentJob.waitForCompletion(true);
     ToolUtil.recordJobStatus(null, currentJob, results);
@@ -213,6 +221,7 @@
 
   /**
    * Mark URLs ready for fetching.
+   * 
    * @throws ClassNotFoundException
    * @throws InterruptedException
    * */
@@ -229,16 +238,16 @@
     if (topN != Long.MAX_VALUE) {
       LOG.info("GeneratorJob: topN: " + topN);
     }
-    Map<String,Object> results = run(ToolUtil.toArgMap(
-        Nutch.ARG_TOPN, topN,
-        Nutch.ARG_CURTIME, curTime,
-        Nutch.ARG_FILTER, filter,
+    Map<String, Object> results = run(ToolUtil.toArgMap(Nutch.ARG_TOPN, topN,
+        Nutch.ARG_CURTIME, curTime, Nutch.ARG_FILTER, filter,
         Nutch.ARG_NORMALIZE, norm));
-    String batchId =  getConf().get(BATCH_ID);
+    String batchId = getConf().get(BATCH_ID);
     long finish = System.currentTimeMillis();
     long generateCount = (Long) results.get(GENERATE_COUNT);
-    LOG.info("GeneratorJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
-    LOG.info("GeneratorJob: generated batch id: " + batchId + " containing " + generateCount + " URLs");
+    LOG.info("GeneratorJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("GeneratorJob: generated batch id: " + batchId + " containing "
+        + generateCount + " URLs");
     if (generateCount == 0) {
       return null;
     }
@@ -247,13 +256,20 @@
 
   public int run(String[] args) throws Exception {
     if (args.length <= 0) {
-      System.out.println("Usage: GeneratorJob [-topN N] [-crawlId id] [-noFilter] [-noNorm] [-adddays numDays]");
-      System.out.println("    -topN <N>      - number of top URLs to be selected, default is Long.MAX_VALUE ");
-      System.out.println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\");");
-      System.out.println("    -noFilter      - do not activate the filter plugin to filter the url, default is true ");
-      System.out.println("    -noNorm        - do not activate the normalizer plugin to normalize the url, default is true ");
-      System.out.println("    -adddays       - Adds numDays to the current time to facilitate crawling urls already");
-      System.out.println("                     fetched sooner then db.fetch.interval.default. Default value is 0.");
+      System.out
+          .println("Usage: GeneratorJob [-topN N] [-crawlId id] [-noFilter] [-noNorm] [-adddays numDays]");
+      System.out
+          .println("    -topN <N>      - number of top URLs to be selected, default is Long.MAX_VALUE ");
+      System.out
+          .println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\");");
+      System.out
+          .println("    -noFilter      - do not activate the filter plugin to filter the url, default is true ");
+      System.out
+          .println("    -noNorm        - do not activate the normalizer plugin to normalize the url, default is true ");
+      System.out
+          .println("    -adddays       - Adds numDays to the current time to facilitate crawling urls already");
+      System.out
+          .println("                     fetched sooner then db.fetch.interval.default. Default value is 0.");
       System.out.println("    -batchId       - the batch id ");
       System.out.println("----------------------");
       System.out.println("Please set the params.");
@@ -280,8 +296,8 @@
       } else if ("-adddays".equals(args[i])) {
         long numDays = Integer.parseInt(args[++i]);
         curTime += numDays * 1000L * 60 * 60 * 24;
-      }else if ("-batchId".equals(args[i]))
-        getConf().set(BATCH_ID,args[++i]);
+      } else if ("-batchId".equals(args[i]))
+        getConf().set(BATCH_ID, args[++i]);
       else {
         System.err.println("Unrecognized arg " + args[i]);
         return -1;
@@ -297,7 +313,8 @@
   }
 
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/GeneratorMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorMapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorMapper.java	(working copy)
@@ -34,8 +34,8 @@
 import java.nio.ByteBuffer;
 import java.util.HashMap;
 
-public class GeneratorMapper
-extends GoraMapper<String, WebPage, SelectorEntry, WebPage> {
+public class GeneratorMapper extends
+    GoraMapper<String, WebPage, SelectorEntry, WebPage> {
 
   private URLFilters filters;
   private URLNormalizers normalizers;
@@ -48,8 +48,8 @@
   private int maxDistance;
 
   @Override
-  public void map(String reversedUrl, WebPage page,
-      Context context) throws IOException, InterruptedException {
+  public void map(String reversedUrl, WebPage page, Context context)
+      throws IOException, InterruptedException {
     String url = TableUtil.unreverseUrl(reversedUrl);
 
     if (Mark.GENERATE_MARK.checkMark(page) != null) {
@@ -57,11 +57,11 @@
       return;
     }
 
-    //filter on distance
+    // filter on distance
     if (maxDistance > -1) {
       CharSequence distanceUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
       if (distanceUtf8 != null) {
-        int distance=Integer.parseInt(distanceUtf8.toString());
+        int distance = Integer.parseInt(distanceUtf8.toString());
         if (distance > maxDistance) {
           return;
         }
@@ -71,15 +71,18 @@
     // If filtering is on don't generate URLs that don't pass URLFilters
     try {
       if (normalise) {
-        url = normalizers.normalize(url, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+        url = normalizers.normalize(url,
+            URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
       }
       if (filter && filters.filter(url) == null)
         return;
     } catch (URLFilterException e) {
-      GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", url, e.getMessage());
+      GeneratorJob.LOG
+          .warn("Couldn't filter url: {} ({})", url, e.getMessage());
       return;
     } catch (MalformedURLException e) {
-      GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", url, e.getMessage());
+      GeneratorJob.LOG
+          .warn("Couldn't filter url: {} ({})", url, e.getMessage());
       return;
     }
 
@@ -86,8 +89,8 @@
     // check fetch schedule
     if (!schedule.shouldFetch(url, page, curTime)) {
       if (GeneratorJob.LOG.isDebugEnabled()) {
-        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url + "', fetchTime=" +
-            page.getFetchTime() + ", curTime=" + curTime);
+        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url
+            + "', fetchTime=" + page.getFetchTime() + ", curTime=" + curTime);
       }
       return;
     }
@@ -95,7 +98,7 @@
     try {
       score = scoringFilters.generatorSortValue(url, page, score);
     } catch (ScoringFilterException e) {
-      //ignore
+      // ignore
     }
     entry.set(url, score);
     context.write(entry, page);
@@ -110,10 +113,12 @@
       filters = new URLFilters(conf);
     }
     if (normalise) {
-      normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+      normalizers = new URLNormalizers(conf,
+          URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
     }
-    maxDistance=conf.getInt("generate.max.distance", -1);
-    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME, System.currentTimeMillis());
+    maxDistance = conf.getInt("generate.max.distance", -1);
+    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME,
+        System.currentTimeMillis());
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);
   }
Index: src/java/org/apache/nutch/crawl/GeneratorReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/GeneratorReducer.java	(working copy)
@@ -34,14 +34,15 @@
 import org.apache.nutch.util.TableUtil;
 import org.apache.nutch.util.URLUtil;
 
-/** Reduce class for generate
- *
- * The #reduce() method write a random integer to all generated URLs. This random
- * number is then used by {@link FetcherMapper}.
- *
+/**
+ * Reduce class for generate
+ * 
+ * The #reduce() method write a random integer to all generated URLs. This
+ * random number is then used by {@link FetcherMapper}.
+ * 
  */
-public class GeneratorReducer
-extends GoraReducer<SelectorEntry, WebPage, String, WebPage> {
+public class GeneratorReducer extends
+    GoraReducer<SelectorEntry, WebPage, String, WebPage> {
 
   private long limit;
   private long maxCount;
@@ -81,7 +82,7 @@
       try {
         context.write(TableUtil.reverseUrl(key.url), page);
       } catch (MalformedURLException e) {
-    	context.getCounter("Generator", "MALFORMED_URL").increment(1);
+        context.getCounter("Generator", "MALFORMED_URL").increment(1);
         continue;
       }
       context.getCounter("Generator", "GENERATE_MARK").increment(1);
@@ -90,10 +91,11 @@
   }
 
   @Override
-  protected void setup(Context context)
-      throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
-    long totalLimit = conf.getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
+    long totalLimit = conf
+        .getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
     if (totalLimit == Long.MAX_VALUE) {
       limit = Long.MAX_VALUE;
     } else {
@@ -101,8 +103,8 @@
     }
     maxCount = conf.getLong(GeneratorJob.GENERATOR_MAX_COUNT, -2);
     batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID));
-    String countMode =
-      conf.get(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    String countMode = conf.get(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     if (countMode.equals(GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN)) {
       byDomain = true;
     }
Index: src/java/org/apache/nutch/crawl/InjectorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/InjectorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/InjectorJob.java	(working copy)
@@ -47,14 +47,17 @@
 import java.text.SimpleDateFormat;
 import java.util.*;
 
-/** This class takes a flat file of URLs and adds them to the of pages to be
- * crawled.  Useful for bootstrapping the system.
- * The URL files contain one URL per line, optionally followed by custom metadata
- * separated by tabs with the metadata key separated from the corresponding value by '='. <br>
+/**
+ * This class takes a flat file of URLs and adds them to the of pages to be
+ * crawled. Useful for bootstrapping the system. The URL files contain one URL
+ * per line, optionally followed by custom metadata separated by tabs with the
+ * metadata key separated from the corresponding value by '='. <br>
  * Note that some metadata keys are reserved : <br>
  * - <i>nutch.score</i> : allows to set a custom score for a specific URL <br>
- * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a specific URL <br>
- * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
+ * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a
+ * specific URL <br>
+ * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000
+ * \t userType=open_source
  **/
 public class InjectorJob extends NutchTool implements Tool {
 
@@ -63,7 +66,7 @@
   private static final Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
   private static final Utf8 YES_STRING = new Utf8("y");
-  
+
   static {
     FIELDS.add(WebPage.Field.MARKERS);
     FIELDS.add(WebPage.Field.STATUS);
@@ -75,7 +78,7 @@
    * metadata key reserved for setting a custom fetchInterval for a specific URL
    */
   public static String nutchFetchIntervalMDName = "nutch.fetchInterval";
-  
+
   public static class UrlMapper extends
       Mapper<LongWritable, Text, String, WebPage> {
     private URLNormalizers urlNormalizers;
@@ -86,24 +89,25 @@
     private long curTime;
 
     @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
+    protected void setup(Context context) throws IOException,
+        InterruptedException {
       urlNormalizers = new URLNormalizers(context.getConfiguration(),
-        URLNormalizers.SCOPE_INJECT);
+          URLNormalizers.SCOPE_INJECT);
       interval = context.getConfiguration().getInt("db.fetch.interval.default",
-        2592000);
+          2592000);
       filters = new URLFilters(context.getConfiguration());
       scfilters = new ScoringFilters(context.getConfiguration());
       scoreInjected = context.getConfiguration().getFloat("db.score.injected",
-        1.0f);
+          1.0f);
       curTime = context.getConfiguration().getLong("injector.current.time",
-        System.currentTimeMillis());
+          System.currentTimeMillis());
     }
 
     protected void map(LongWritable key, Text value, Context context)
         throws IOException, InterruptedException {
       String url = value.toString().trim(); // value is line of text
-      
-      if (url != null && ( url.length() == 0 || url.startsWith("#") ) ) {
+
+      if (url != null && (url.length() == 0 || url.startsWith("#"))) {
         /* Ignore line that start with # */
         return;
       }
@@ -149,41 +153,43 @@
       if (url == null) {
         context.getCounter("injector", "urls_filtered").increment(1);
         return;
-      } else {                                         // if it passes
-      String reversedUrl = TableUtil.reverseUrl(url);  // collect it
-      WebPage row = WebPage.newBuilder().build();
-      row.setFetchTime(curTime);
-      row.setFetchInterval(customInterval);
+      } else { // if it passes
+        String reversedUrl = TableUtil.reverseUrl(url); // collect it
+        WebPage row = WebPage.newBuilder().build();
+        row.setFetchTime(curTime);
+        row.setFetchInterval(customInterval);
 
-      // now add the metadata
-      Iterator<String> keysIter = metadata.keySet().iterator();
-      while (keysIter.hasNext()) {
-        String keymd = keysIter.next();
-        String valuemd = metadata.get(keymd);
-        row.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
-      }
+        // now add the metadata
+        Iterator<String> keysIter = metadata.keySet().iterator();
+        while (keysIter.hasNext()) {
+          String keymd = keysIter.next();
+          String valuemd = metadata.get(keymd);
+          row.getMetadata().put(new Utf8(keymd),
+              ByteBuffer.wrap(valuemd.getBytes()));
+        }
 
-      if (customScore != -1)
-        row.setScore(customScore);
-      else
-        row.setScore(scoreInjected);
+        if (customScore != -1)
+          row.setScore(customScore);
+        else
+          row.setScore(scoreInjected);
 
-      try {
-        scfilters.injectedScore(url, row);
-      } catch (ScoringFilterException e) {
-        if (LOG.isWarnEnabled()) {
-          LOG.warn("Cannot filter injected score for url " + url
-          + ", using default (" + e.getMessage() + ")");
+        try {
+          scfilters.injectedScore(url, row);
+        } catch (ScoringFilterException e) {
+          if (LOG.isWarnEnabled()) {
+            LOG.warn("Cannot filter injected score for url " + url
+                + ", using default (" + e.getMessage() + ")");
+          }
         }
+        context.getCounter("injector", "urls_injected").increment(1);
+        row.getMarkers()
+            .put(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
+        Mark.INJECT_MARK.putMark(row, YES_STRING);
+        context.write(reversedUrl, row);
       }
-      context.getCounter("injector", "urls_injected").increment(1);
-      row.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
-      Mark.INJECT_MARK.putMark(row, YES_STRING);
-      context.write(reversedUrl, row);
     }
-    }
   }
-  
+
   public InjectorJob() {
   }
 
@@ -191,12 +197,12 @@
     setConf(conf);
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     getConf().setLong("injector.current.time", System.currentTimeMillis());
     Path input;
     Object path = args.get(Nutch.ARG_SEEDDIR);
     if (path instanceof Path) {
-      input = (Path)path;
+      input = (Path) path;
     } else {
       input = new Path(path.toString());
     }
@@ -208,26 +214,30 @@
     currentJob.setMapOutputKeyClass(String.class);
     currentJob.setMapOutputValueClass(WebPage.class);
     currentJob.setOutputFormatClass(GoraOutputFormat.class);
-    
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(),
-      String.class, WebPage.class);
+
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     GoraOutputFormat.setOutput(currentJob, store, true);
-    
+
     // NUTCH-1471 Make explicit which datastore class we use
-    Class<? extends DataStore<Object, Persistent>> dataStoreClass = 
-      StorageUtils.getDataStoreClass(currentJob.getConfiguration());
-    LOG.info("InjectorJob: Using " + dataStoreClass + " as the Gora storage class.");
-    
+    Class<? extends DataStore<Object, Persistent>> dataStoreClass = StorageUtils
+        .getDataStoreClass(currentJob.getConfiguration());
+    LOG.info("InjectorJob: Using " + dataStoreClass
+        + " as the Gora storage class.");
+
     currentJob.setReducerClass(Reducer.class);
     currentJob.setNumReduceTasks(0);
-    
+
     currentJob.waitForCompletion(true);
     ToolUtil.recordJobStatus(null, currentJob, results);
 
     // NUTCH-1370 Make explicit #URLs injected @runtime
-    long urlsInjected = currentJob.getCounters().findCounter("injector", "urls_injected").getValue();
-    long urlsFiltered = currentJob.getCounters().findCounter("injector", "urls_filtered").getValue();
-    LOG.info("InjectorJob: total number of urls rejected by filters: " + urlsFiltered);
+    long urlsInjected = currentJob.getCounters()
+        .findCounter("injector", "urls_injected").getValue();
+    long urlsFiltered = currentJob.getCounters()
+        .findCounter("injector", "urls_filtered").getValue();
+    LOG.info("InjectorJob: total number of urls rejected by filters: "
+        + urlsFiltered);
     LOG.info("InjectorJob: total number of urls injected after normalization and filtering: "
         + urlsInjected);
 
@@ -238,10 +248,11 @@
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("InjectorJob: starting at " + sdf.format(start));
-    LOG.info("InjectorJob: Injecting urlDir: " + urlDir); 
+    LOG.info("InjectorJob: Injecting urlDir: " + urlDir);
     run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir));
     long end = System.currentTimeMillis();
-    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   @Override
@@ -252,7 +263,7 @@
     }
     for (int i = 1; i < args.length; i++) {
       if ("-crawlId".equals(args[i])) {
-        getConf().set(Nutch.CRAWL_ID_KEY, args[i+1]);
+        getConf().set(Nutch.CRAWL_ID_KEY, args[i + 1]);
         i++;
       } else {
         System.err.println("Unrecognized arg " + args[i]);
@@ -270,7 +281,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/crawl/MD5Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/MD5Signature.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/MD5Signature.java	(working copy)
@@ -26,10 +26,10 @@
 import java.util.HashSet;
 
 /**
- * Default implementation of a page signature. It calculates an MD5 hash
- * of the raw binary content of a page. In case there is no content, it
- * calculates a hash from the page's URL.
- *
+ * Default implementation of a page signature. It calculates an MD5 hash of the
+ * raw binary content of a page. In case there is no content, it calculates a
+ * hash from the page's URL.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class MD5Signature extends Signature {
@@ -52,8 +52,7 @@
         data = null;
         of = 0;
         cb = 0;
-      }
-      else {
+      } else {
         data = baseUrl.getBytes();
         of = 0;
         cb = baseUrl.length();
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -26,12 +26,12 @@
 
   static {
     CLASSES = (Class<? extends Writable>[]) new Class<?>[] {
-      org.apache.nutch.scoring.ScoreDatum.class,
-      org.apache.nutch.util.WebPageWritable.class
-    };
+        org.apache.nutch.scoring.ScoreDatum.class,
+        org.apache.nutch.util.WebPageWritable.class };
   }
 
-  public NutchWritable() { }
+  public NutchWritable() {
+  }
 
   public NutchWritable(Writable instance) {
     set(instance);
Index: src/java/org/apache/nutch/crawl/SignatureComparator.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureComparator.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/SignatureComparator.java	(working copy)
@@ -21,27 +21,38 @@
 
 public class SignatureComparator {
   public static int compare(byte[] data1, byte[] data2) {
-    if (data1 == null && data2 == null) return 0;
-    if (data1 == null) return -1;
-    if (data2 == null) return 1;
+    if (data1 == null && data2 == null)
+      return 0;
+    if (data1 == null)
+      return -1;
+    if (data2 == null)
+      return 1;
     return _compare(data1, 0, data1.length, data2, 0, data2.length);
   }
 
   public static int compare(ByteBuffer buf1, ByteBuffer buf2) {
-    if (buf1 == null && buf2 == null) return 0;
-    if (buf1 == null) return -1;
-    if (buf2 == null) return 1;
-    return _compare(buf1.array(), buf1.arrayOffset() + buf1.position(), buf1.remaining(),
-                    buf2.array(), buf2.arrayOffset() + buf2.position(), buf2.remaining());
+    if (buf1 == null && buf2 == null)
+      return 0;
+    if (buf1 == null)
+      return -1;
+    if (buf2 == null)
+      return 1;
+    return _compare(buf1.array(), buf1.arrayOffset() + buf1.position(),
+        buf1.remaining(), buf2.array(), buf2.arrayOffset() + buf2.position(),
+        buf2.remaining());
   }
-  
-  public static int _compare(byte[] data1, int s1, int l1, byte[] data2, int s2, int l2) {
-    if (l2 > l1) return -1;
-    if (l2 < l1) return 1;
+
+  public static int _compare(byte[] data1, int s1, int l1, byte[] data2,
+      int s2, int l2) {
+    if (l2 > l1)
+      return -1;
+    if (l2 < l1)
+      return 1;
     int res = 0;
     for (int i = 0; i < l1; i++) {
       res = (data1[s1 + i] - data2[s2 + i]);
-      if (res != 0) return res;
+      if (res != 0)
+        return res;
     }
     return 0;
   }
Index: src/java/org/apache/nutch/crawl/SignatureFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/SignatureFactory.java	(working copy)
@@ -28,26 +28,28 @@
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current Configuration configuration. This newly created instance is cached in the
- * Configuration instance, so that it could be later retrieved.
- *
+ * current Configuration configuration. This newly created instance is cached in
+ * the Configuration instance, so that it could be later retrieved.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class SignatureFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(SignatureFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SignatureFactory.class);
 
-  private SignatureFactory() {}                   // no public ctor
+  private SignatureFactory() {
+  } // no public ctor
 
   /** Return the default Signature implementation. */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    Signature impl = (Signature)objectCache.getObject(clazz);
+    Signature impl = (Signature) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using Signature impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (Signature)implClass.newInstance();
+        impl = (Signature) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/TextProfileSignature.java
===================================================================
--- src/java/org/apache/nutch/crawl/TextProfileSignature.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/TextProfileSignature.java	(working copy)
@@ -29,28 +29,33 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * <p>An implementation of a page signature. It calculates an MD5 hash
- * of a plain text "profile" of a page. In case there is no text, it
- * calculates a hash using the {@link MD5Signature}.</p>
- * <p>The algorithm to calculate a page "profile" takes the plain text version of
- * a page and performs the following steps:
+ * <p>
+ * An implementation of a page signature. It calculates an MD5 hash of a plain
+ * text "profile" of a page. In case there is no text, it calculates a hash
+ * using the {@link MD5Signature}.
+ * </p>
+ * <p>
+ * The algorithm to calculate a page "profile" takes the plain text version of a
+ * page and performs the following steps:
  * <ul>
  * <li>remove all characters except letters and digits, and bring all characters
  * to lower case,</li>
  * <li>split the text into tokens (all consecutive non-whitespace characters),</li>
- * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
+ * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2
+ * characters),</li>
  * <li>sort the list of tokens by decreasing frequency,</li>
- * <li>round down the counts of tokens to the nearest multiple of QUANT
- * (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
- * by default, and <code>maxFreq</code> is the maximum token frequency). If
- * <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
- * means that tokens with frequency 1 are always discarded).</li>
- * <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
- * <li>create a list of tokens and their quantized frequency, separated by spaces,
- * in the order of decreasing frequency.</li>
+ * <li>round down the counts of tokens to the nearest multiple of QUANT (
+ * <code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is
+ * 0.01f by default, and <code>maxFreq</code> is the maximum token frequency).
+ * If <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2
+ * (which means that tokens with frequency 1 are always discarded).</li>
+ * <li>tokens, which frequency after quantization falls below QUANT, are
+ * discarded.</li>
+ * <li>create a list of tokens and their quantized frequency, separated by
+ * spaces, in the order of decreasing frequency.</li>
  * </ul>
  * This list is then submitted to an MD5 hash calculation.
- *
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class TextProfileSignature extends Signature {
@@ -65,12 +70,16 @@
 
   @Override
   public byte[] calculate(WebPage page) {
-    int MIN_TOKEN_LEN = getConf().getInt("db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat("db.signature.text_profile.quant_rate", 0.01f);
+    int MIN_TOKEN_LEN = getConf().getInt(
+        "db.signature.text_profile.min_token_len", 2);
+    float QUANT_RATE = getConf().getFloat(
+        "db.signature.text_profile.quant_rate", 0.01f);
     HashMap<String, Token> tokens = new HashMap<String, Token>();
     String text = null;
-    if (page.getText() != null) text = page.getText().toString();
-    if (text == null || text.length() == 0) return fallback.calculate(page);
+    if (page.getText() != null)
+      text = page.getText().toString();
+    if (text == null || text.length() == 0)
+      return fallback.calculate(page);
     StringBuffer curToken = new StringBuffer();
     int maxFreq = 0;
     for (int i = 0; i < text.length(); i++) {
@@ -88,7 +97,8 @@
               tokens.put(s, tok);
             }
             tok.cnt++;
-            if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+            if (tok.cnt > maxFreq)
+              maxFreq = tok.cnt;
           }
           curToken.setLength(0);
         }
@@ -104,7 +114,8 @@
         tokens.put(s, tok);
       }
       tok.cnt++;
-      if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+      if (tok.cnt > maxFreq)
+        maxFreq = tok.cnt;
     }
     Iterator<Token> it = tokens.values().iterator();
     ArrayList<Token> profile = new ArrayList<Token>();
@@ -111,10 +122,12 @@
     // calculate the QUANT value
     int QUANT = Math.round(maxFreq * QUANT_RATE);
     if (QUANT < 2) {
-      if (maxFreq > 1) QUANT = 2;
-      else QUANT = 1;
+      if (maxFreq > 1)
+        QUANT = 2;
+      else
+        QUANT = 1;
     }
-    while(it.hasNext()) {
+    while (it.hasNext()) {
       Token t = it.next();
       // round down to the nearest QUANT
       t.cnt = (t.cnt / QUANT) * QUANT;
@@ -129,7 +142,8 @@
     it = profile.iterator();
     while (it.hasNext()) {
       Token t = it.next();
-      if (newText.length() > 0) newText.append("\n");
+      if (newText.length() > 0)
+        newText.append("\n");
       newText.append(t.toString());
     }
     return MD5Hash.digest(newText.toString()).getDigest();
Index: src/java/org/apache/nutch/crawl/URLPartitioner.java
===================================================================
--- src/java/org/apache/nutch/crawl/URLPartitioner.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/URLPartitioner.java	(working copy)
@@ -40,7 +40,8 @@
  * parameter 'partition.url.mode' which can be 'byHost', 'byDomain' or 'byIP'
  */
 public class URLPartitioner implements Configurable {
-  private static final Logger LOG = LoggerFactory.getLogger(URLPartitioner.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLPartitioner.class);
 
   public static final String PARTITION_MODE_KEY = "partition.url.mode";
 
@@ -47,7 +48,7 @@
   public static final String PARTITION_MODE_HOST = "byHost";
   public static final String PARTITION_MODE_DOMAIN = "byDomain";
   public static final String PARTITION_MODE_IP = "byIP";
-  
+
   public static final String PARTITION_URL_SEED = "partition.url.seed";
 
   private Configuration conf;
@@ -77,14 +78,15 @@
 
   public int getPartition(String urlString, int numReduceTasks) {
     if (numReduceTasks == 1) {
-      //this check can be removed when we use Hadoop with MAPREDUCE-1287
+      // this check can be removed when we use Hadoop with MAPREDUCE-1287
       return 0;
     }
-    
+
     int hashCode;
     URL url = null;
     try {
-      urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_PARTITION);
+      urlString = normalizers.normalize(urlString,
+          URLNormalizers.SCOPE_PARTITION);
       hashCode = urlString.hashCode();
       url = new URL(urlString);
     } catch (MalformedURLException e) {
@@ -91,7 +93,7 @@
       LOG.warn("Malformed URL: '" + urlString + "'");
       hashCode = urlString.hashCode();
     }
-    
+
     if (url != null) {
       if (mode.equals(PARTITION_MODE_HOST)) {
         hashCode = url.getHost().hashCode();
@@ -106,20 +108,20 @@
         }
       }
     }
-    
+
     // make hosts wind up in different partitions on different runs
     hashCode ^= seed;
     return (hashCode & Integer.MAX_VALUE) % numReduceTasks;
   }
-  
-  
-  public static class SelectorEntryPartitioner 
-      extends Partitioner<SelectorEntry, WebPage> implements Configurable {
+
+  public static class SelectorEntryPartitioner extends
+      Partitioner<SelectorEntry, WebPage> implements Configurable {
     private URLPartitioner partitioner = new URLPartitioner();
     private Configuration conf;
-    
+
     @Override
-    public int getPartition(SelectorEntry selectorEntry, WebPage page, int numReduces) {
+    public int getPartition(SelectorEntry selectorEntry, WebPage page,
+        int numReduces) {
       return partitioner.getPartition(selectorEntry.url, numReduces);
     }
 
@@ -130,23 +132,24 @@
 
     @Override
     public void setConf(Configuration conf) {
-      this.conf=conf;
+      this.conf = conf;
       partitioner.setConf(conf);
     }
   }
-  
-  public static class FetchEntryPartitioner
-      extends Partitioner<IntWritable, FetchEntry> implements Configurable {
+
+  public static class FetchEntryPartitioner extends
+      Partitioner<IntWritable, FetchEntry> implements Configurable {
     private URLPartitioner partitioner = new URLPartitioner();
     private Configuration conf;
-    
+
     @Override
-    public int getPartition(IntWritable intWritable, FetchEntry fetchEntry, int numReduces) {
+    public int getPartition(IntWritable intWritable, FetchEntry fetchEntry,
+        int numReduces) {
       String key = fetchEntry.getKey();
       String url = TableUtil.unreverseUrl(key);
       return partitioner.getPartition(url, numReduces);
     }
-    
+
     @Override
     public Configuration getConf() {
       return conf;
@@ -154,9 +157,9 @@
 
     @Override
     public void setConf(Configuration conf) {
-      this.conf=conf;
+      this.conf = conf;
       partitioner.setConf(conf);
     }
   }
-  
+
 }
Index: src/java/org/apache/nutch/crawl/UrlWithScore.java
===================================================================
--- src/java/org/apache/nutch/crawl/UrlWithScore.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/UrlWithScore.java	(working copy)
@@ -90,7 +90,7 @@
   public void setUrl(Text url) {
     this.url = url;
   }
-  
+
   public void setUrl(String url) {
     this.url.set(url);
   }
@@ -102,7 +102,7 @@
   public void setScore(FloatWritable score) {
     this.score = score;
   }
-  
+
   public void setScore(float score) {
     this.score.set(score);
   }
@@ -111,13 +111,12 @@
   public int compareTo(UrlWithScore other) {
     return comp.compare(this, other);
   }
-  
+
   @Override
   public String toString() {
     return "UrlWithScore [url=" + url + ", score=" + score + "]";
   }
 
-
   /**
    * A partitioner by {url}.
    */
@@ -144,7 +143,7 @@
       if (cmp != 0) {
         return cmp;
       }
-      //reverse order
+      // reverse order
       return -o1.getScore().compareTo(o2.getScore());
     }
 
@@ -159,9 +158,9 @@
         if (cmp != 0) {
           return cmp;
         }
-        //reverse order
-        return -floatComp.compare(b1, s1 + deptLen1, l1 - deptLen1, 
-                                  b2, s2 + deptLen2, l2 - deptLen2);
+        // reverse order
+        return -floatComp.compare(b1, s1 + deptLen1, l1 - deptLen1, b2, s2
+            + deptLen2, l2 - deptLen2);
       } catch (IOException e) {
         throw new IllegalArgumentException(e);
       }
Index: src/java/org/apache/nutch/crawl/WebTableReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/WebTableReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/crawl/WebTableReader.java	(working copy)
@@ -59,7 +59,8 @@
 
 public class WebTableReader extends NutchTool implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(WebTableReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(WebTableReader.class);
 
   public static class WebTableStatMapper extends
       GoraMapper<String, WebPage, Text, LongWritable> {
@@ -209,12 +210,12 @@
     if (LOG.isInfoEnabled()) {
       LOG.info("WebTable statistics start");
     }
-    
+
     run(ToolUtil.toArgMap(Nutch.ARG_SORT, sort));
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("Statistics for WebTable: ");
-      for (Entry<String,Object> e : results.entrySet()) {
+      for (Entry<String, Object> e : results.entrySet()) {
         LOG.info(e.getKey() + ":\t" + e.getValue());
       }
       LOG.info("WebTable statistics: done");
@@ -223,9 +224,10 @@
 
   /** Prints out the entry to the standard out **/
   private void read(String key, boolean dumpContent, boolean dumpHeaders,
-      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException, IOException, Exception {
-    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(getConf(),
-        String.class, WebPage.class);
+      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException,
+      IOException, Exception {
+    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(
+        getConf(), String.class, WebPage.class);
 
     Query<String, WebPage> query = datastore.newQuery();
     String reversedUrl = TableUtil.reverseUrl(key);
@@ -245,7 +247,7 @@
         String url = TableUtil.unreverseUrl(skey);
         System.out.println(getPageRepresentation(url, page, dumpContent,
             dumpHeaders, dumpLinks, dumpText));
-      }catch (Exception e) {
+      } catch (Exception e) {
         e.printStackTrace();
       }
     }
@@ -280,9 +282,10 @@
       // checks whether the Key passes the regex
       String url = TableUtil.unreverseUrl(key.toString());
       if (regex.matcher(url).matches()) {
-        context.write(new Text(url),
-            new Text(getPageRepresentation(key, value, dumpContent, dumpHeaders,
-                dumpLinks, dumpText)));
+        context.write(
+            new Text(url),
+            new Text(getPageRepresentation(key, value, dumpContent,
+                dumpHeaders, dumpLinks, dumpText)));
       }
     }
 
@@ -292,8 +295,10 @@
         throws IOException, InterruptedException {
       regex = Pattern.compile(context.getConfiguration().get(regexParamName,
           ".+"));
-      dumpContent = context.getConfiguration().getBoolean(contentParamName, false);
-      dumpHeaders = context.getConfiguration().getBoolean(headersParamName, false);
+      dumpContent = context.getConfiguration().getBoolean(contentParamName,
+          false);
+      dumpHeaders = context.getConfiguration().getBoolean(headersParamName,
+          false);
       dumpLinks = context.getConfiguration().getBoolean(linksParamName, false);
       dumpText = context.getConfiguration().getBoolean(textParamName, false);
     }
@@ -317,10 +322,10 @@
     cfg.setBoolean(WebTableRegexMapper.linksParamName, links);
     cfg.setBoolean(WebTableRegexMapper.textParamName, text);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(job
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        job.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
-    //remove the __g__dirty field since it is not stored
+    // remove the __g__dirty field since it is not stored
     String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1,
         WebPage._ALL_FIELDS.length);
     query.setFields(fields);
@@ -342,30 +347,37 @@
   }
 
   private static String getPageRepresentation(String key, WebPage page,
-      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks, boolean dumpText) {
+      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks,
+      boolean dumpText) {
     StringBuffer sb = new StringBuffer();
     sb.append("key:\t" + key).append("\n");
     sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
-    sb.append("status:\t").append(page.getStatus()).append(" (").append(
-        CrawlStatus.getName(page.getStatus().byteValue())).append(")\n");
+    sb.append("status:\t").append(page.getStatus()).append(" (")
+        .append(CrawlStatus.getName(page.getStatus().byteValue()))
+        .append(")\n");
     sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
     sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
-    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n"); 
-    sb.append("retriesSinceFetch:\t" + page.getRetriesSinceFetch()).append("\n");
+    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
+    sb.append("retriesSinceFetch:\t" + page.getRetriesSinceFetch())
+        .append("\n");
     sb.append("modifiedTime:\t" + page.getModifiedTime()).append("\n");
     sb.append("prevModifiedTime:\t" + page.getPrevModifiedTime()).append("\n");
-    sb.append("protocolStatus:\t" +
-        ProtocolStatusUtils.toString(page.getProtocolStatus())).append("\n");
+    sb.append(
+        "protocolStatus:\t"
+            + ProtocolStatusUtils.toString(page.getProtocolStatus())).append(
+        "\n");
     ByteBuffer prevSig = page.getPrevSignature();
-        if (prevSig != null) {
-      sb.append("prevSignature:\t" + StringUtil.toHexString(prevSig)).append("\n");
+    if (prevSig != null) {
+      sb.append("prevSignature:\t" + StringUtil.toHexString(prevSig)).append(
+          "\n");
     }
     ByteBuffer sig = page.getSignature();
     if (sig != null) {
       sb.append("signature:\t" + StringUtil.toHexString(sig)).append("\n");
     }
-    sb.append("parseStatus:\t" +
-        ParseStatusUtils.toString(page.getParseStatus())).append("\n");
+    sb.append(
+        "parseStatus:\t" + ParseStatusUtils.toString(page.getParseStatus()))
+        .append("\n");
     sb.append("title:\t" + page.getTitle()).append("\n");
     sb.append("score:\t" + page.getScore()).append("\n");
 
@@ -439,22 +451,29 @@
     System.exit(res);
   }
 
-  private static enum Op {READ, STAT, DUMP};
+  private static enum Op {
+    READ, STAT, DUMP
+  };
 
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
       System.err
           .println("Usage: WebTableReader (-stats | -url [url] | -dump <out_dir> [-regex regex]) \n \t \t      [-crawlId <id>] [-content] [-headers] [-links] [-text]");
-      System.err.println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t     (default: storage.crawl.id)");
-      System.err.println("    -stats [-sort] - print overall statistics to System.out");
+      System.err
+          .println("    -crawlId <id>  - the id to prefix the schemas to operate on, \n \t \t     (default: storage.crawl.id)");
+      System.err
+          .println("    -stats [-sort] - print overall statistics to System.out");
       System.err.println("    [-sort]        - list status sorted by host");
-      System.err.println("    -url <url>     - print information on <url> to System.out");
-      System.err.println("    -dump <out_dir> [-regex regex] - dump the webtable to a text file in \n \t \t     <out_dir>");
+      System.err
+          .println("    -url <url>     - print information on <url> to System.out");
+      System.err
+          .println("    -dump <out_dir> [-regex regex] - dump the webtable to a text file in \n \t \t     <out_dir>");
       System.err.println("    -content       - dump also raw content");
       System.err.println("    -headers       - dump protocol headers");
       System.err.println("    -links         - dump links");
       System.err.println("    -text          - dump extracted text");
-      System.err.println("    [-regex]       - filter on the URL of the webtable entry");
+      System.err
+          .println("    [-regex]       - filter on the URL of the webtable entry");
       return -1;
     }
     String param = null;
@@ -470,8 +489,8 @@
         if (args[i].equals("-url")) {
           param = args[++i];
           op = Op.READ;
-          //read(param);
-          //return 0;
+          // read(param);
+          // return 0;
         } else if (args[i].equals("-stats")) {
           op = Op.STAT;
         } else if (args[i].equals("-sort")) {
@@ -516,7 +535,7 @@
 
   // for now handles only -stat
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".")
         + "stat_tmp" + System.currentTimeMillis());
 
@@ -523,23 +542,25 @@
     numJobs = 1;
     currentJob = new NutchJob(getConf(), "db_stats");
 
-    currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
-    
-    Boolean sort = (Boolean)args.get(Nutch.ARG_SORT);
-    if (sort == null) sort = Boolean.FALSE;
+    currentJob.getConfiguration().setBoolean(
+        "mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+
+    Boolean sort = (Boolean) args.get(Nutch.ARG_SORT);
+    if (sort == null)
+      sort = Boolean.FALSE;
     currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
 
-    //remove the __g__dirty field since it is not stored
+    // remove the __g__dirty field since it is not stored
     String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1,
-            WebPage._ALL_FIELDS.length);
+        WebPage._ALL_FIELDS.length);
     query.setFields(fields);
 
-    GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class,
-        WebTableStatMapper.class, null, true);
+    GoraMapper.initMapperJob(currentJob, query, store, Text.class,
+        LongWritable.class, WebTableStatMapper.class, null, true);
 
     currentJob.setCombinerClass(WebTableStatCombiner.class);
     currentJob.setReducerClass(WebTableStatReducer.class);
@@ -596,7 +617,8 @@
     }
 
     LongWritable totalCnt = stats.get("T");
-    if (totalCnt==null)totalCnt=new LongWritable(0);
+    if (totalCnt == null)
+      totalCnt = new LongWritable(0);
     stats.remove("T");
     results.put("TOTAL urls", totalCnt.get());
     for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
@@ -615,14 +637,15 @@
         if (st.length > 2)
           results.put(st[2], val.get());
         else
-          results.put(st[0] + " " + code + " ("
-              + CrawlStatus.getName((byte) code) + ")", val.get());
+          results.put(
+              st[0] + " " + code + " (" + CrawlStatus.getName((byte) code)
+                  + ")", val.get());
       } else
         results.put(k, val.get());
     }
     // removing the tmp folder
     fileSystem.delete(tmpFolder, true);
-    
+
     return results;
   }
 }
Index: src/java/org/apache/nutch/fetcher/FetchEntry.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetchEntry.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetchEntry.java	(working copy)
@@ -66,6 +66,5 @@
   public String toString() {
     return "FetchEntry [key=" + key + ", page=" + page + "]";
   }
-  
-  
+
 }
Index: src/java/org/apache/nutch/fetcher/FetcherJob.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetcherJob.java	(working copy)
@@ -53,7 +53,7 @@
 
 /**
  * Multi-threaded fetcher.
- *
+ * 
  */
 public class FetcherJob extends NutchTool implements Tool {
 
@@ -80,8 +80,8 @@
    * Mapper class for Fetcher.
    * </p>
    * <p>
-   * This class reads the random integer written by {@link GeneratorJob} as its key
-   * while outputting the actual key and value arguments through a
+   * This class reads the random integer written by {@link GeneratorJob} as its
+   * key while outputting the actual key and value arguments through a
    * {@link FetchEntry} instance.
    * </p>
    * <p>
@@ -92,8 +92,8 @@
    * from other hosts as well.
    * </p>
    */
-  public static class FetcherMapper
-  extends GoraMapper<String, WebPage, IntWritable, FetchEntry> {
+  public static class FetcherMapper extends
+      GoraMapper<String, WebPage, IntWritable, FetchEntry> {
 
     private boolean shouldContinue;
 
@@ -105,7 +105,8 @@
     protected void setup(Context context) {
       Configuration conf = context.getConfiguration();
       shouldContinue = conf.getBoolean(RESUME_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
     }
 
     @Override
@@ -120,12 +121,13 @@
       }
       if (shouldContinue && Mark.FETCH_MARK.checkMark(page) != null) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; already fetched");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+              + "; already fetched");
         }
         return;
       }
-      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(context
-          .getConfiguration(), key, page));
+      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(
+          context.getConfiguration(), key, page));
     }
   }
 
@@ -145,7 +147,8 @@
       ParserJob parserJob = new ParserJob();
       fields.addAll(parserJob.getFields(job));
     }
-    ProtocolFactory protocolFactory = new ProtocolFactory(job.getConfiguration());
+    ProtocolFactory protocolFactory = new ProtocolFactory(
+        job.getConfiguration());
     fields.addAll(protocolFactory.getFields());
 
     return fields;
@@ -152,13 +155,13 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     checkConfiguration();
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Integer threads = (Integer)args.get(Nutch.ARG_THREADS);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Integer numTasks = (Integer)args.get(Nutch.ARG_NUMTASKS);
- 
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Integer threads = (Integer) args.get(Nutch.ARG_THREADS);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Integer numTasks = (Integer) args.get(Nutch.ARG_NUMTASKS);
+
     if (threads != null && threads > 0) {
       getConf().setInt(THREADS_KEY, threads);
     }
@@ -169,7 +172,7 @@
     if (shouldResume != null) {
       getConf().setBoolean(RESUME_KEY, shouldResume);
     }
-    
+
     LOG.info("FetcherJob: threads: " + getConf().getInt(THREADS_KEY, 10));
     LOG.info("FetcherJob: parsing: " + getConf().getBoolean(PARSE_KEY, false));
     LOG.info("FetcherJob: resuming: " + getConf().getBoolean(RESUME_KEY, false));
@@ -182,13 +185,14 @@
       timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000);
       getConf().setLong("fetcher.timelimit", timelimit);
     }
-    LOG.info("FetcherJob : timelimit set for : " + getConf().getLong("fetcher.timelimit", -1));
+    LOG.info("FetcherJob : timelimit set for : "
+        + getConf().getLong("fetcher.timelimit", -1));
     numJobs = 1;
     currentJob = new NutchJob(getConf(), "fetch");
-    
+
     // for politeness, don't permit parallel execution of a single task
     currentJob.setReduceSpeculativeExecution(false);
-    
+
     Collection<WebPage.Field> fields = getFields(currentJob);
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
     StorageUtils.initMapperJob(currentJob, fields, IntWritable.class,
@@ -196,8 +200,8 @@
         batchIdFilter, false);
     StorageUtils.initReducerJob(currentJob, FetcherReducer.class);
     if (numTasks == null || numTasks < 1) {
-      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt("mapred.map.tasks",
-          currentJob.getNumReduceTasks()));
+      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt(
+          "mapred.map.tasks", currentJob.getNumReduceTasks()));
     } else {
       currentJob.setNumReduceTasks(numTasks);
     }
@@ -219,19 +223,24 @@
     return filter;
   }
 
-    /**
+  /**
    * Run fetcher.
-   * @param batchId batchId (obtained from Generator) or null to fetch all generated fetchlists
-   * @param threads number of threads per map task
+   * 
+   * @param batchId
+   *          batchId (obtained from Generator) or null to fetch all generated
+   *          fetchlists
+   * @param threads
+   *          number of threads per map task
    * @param shouldResume
-   * @param numTasks number of fetching tasks (reducers). If set to < 1 then use the default,
-   * which is mapred.map.tasks.
+   * @param numTasks
+   *          number of fetching tasks (reducers). If set to < 1 then use the
+   *          default, which is mapred.map.tasks.
    * @return 0 on success
    * @throws Exception
    */
-  public int fetch(String batchId, int threads, boolean shouldResume, int numTasks)
-      throws Exception {
-    
+  public int fetch(String batchId, int threads, boolean shouldResume,
+      int numTasks) throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("FetcherJob: starting at " + sdf.format(start));
@@ -242,15 +251,13 @@
       LOG.info("FetcherJob: batchId: " + batchId);
     }
 
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_THREADS, threads,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_NUMTASKS, numTasks));
-    
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_THREADS, threads,
+        Nutch.ARG_RESUME, shouldResume, Nutch.ARG_NUMTASKS, numTasks));
+
     long finish = System.currentTimeMillis();
-    LOG.info("FetcherJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
-    
+    LOG.info("FetcherJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+
     return 0;
   }
 
@@ -273,13 +280,13 @@
     boolean shouldResume = false;
     String batchId;
 
-    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
-      "[-threads N] \n \t \t  [-resume] [-numTasks N]\n" +
-      "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
-      "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n" +
-      "    -threads N    - number of fetching threads per task\n" +
-      "    -resume       - resume interrupted job\n" +
-      "    -numTasks N   - if N > 0 then use this many reduce tasks for fetching \n \t \t    (default: mapred.map.tasks)";
+    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] "
+        + "[-threads N] \n \t \t  [-resume] [-numTasks N]\n"
+        + "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n"
+        + "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n"
+        + "    -threads N    - number of fetching threads per task\n"
+        + "    -resume       - resume interrupted job\n"
+        + "    -numTasks N   - if N > 0 then use this many reduce tasks for fetching \n \t \t    (default: mapred.map.tasks)";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -303,17 +310,19 @@
       } else if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
       } else {
-        throw new IllegalArgumentException("arg " +args[i]+ " not recognized");
+        throw new IllegalArgumentException("arg " + args[i] + " not recognized");
       }
     }
 
-    int fetchcode = fetch(batchId, threads, shouldResume, numTasks); // run the Fetcher
+    int fetchcode = fetch(batchId, threads, shouldResume, numTasks); // run the
+                                                                     // Fetcher
 
     return fetchcode;
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/fetcher/FetcherReducer.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/fetcher/FetcherReducer.java	(working copy)
@@ -46,8 +46,8 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-public class FetcherReducer
-extends GoraReducer<IntWritable, FetchEntry, String, WebPage> {
+public class FetcherReducer extends
+    GoraReducer<IntWritable, FetchEntry, String, WebPage> {
 
   public static final Logger LOG = FetcherJob.LOG;
 
@@ -54,12 +54,15 @@
   private final AtomicInteger activeThreads = new AtomicInteger(0);
   private final AtomicInteger spinWaiting = new AtomicInteger(0);
 
-  private final long start = System.currentTimeMillis(); // start time of fetcher run
+  private final long start = System.currentTimeMillis(); // start time of
+                                                         // fetcher run
   private final AtomicLong lastRequestStart = new AtomicLong(start);
 
-  private final AtomicLong bytes = new AtomicLong(0);        // total bytes fetched
-  private final AtomicInteger pages = new AtomicInteger(0);  // total pages fetched
-  private final AtomicInteger errors = new AtomicInteger(0); // total pages errored
+  private final AtomicLong bytes = new AtomicLong(0); // total bytes fetched
+  private final AtomicInteger pages = new AtomicInteger(0); // total pages
+                                                            // fetched
+  private final AtomicInteger errors = new AtomicInteger(0); // total pages
+                                                             // errored
 
   private QueueFeeder feeder;
 
@@ -89,9 +92,10 @@
       this.queueID = queueID;
     }
 
-    /** Create an item. Queue id will be created based on <code>queueMode</code>
-     * argument, either as a protocol + hostname pair, protocol + IP
-     * address pair or protocol+domain pair.
+    /**
+     * Create an item. Queue id will be created based on <code>queueMode</code>
+     * argument, either as a protocol + hostname pair, protocol + IP address
+     * pair or protocol+domain pair.
      */
     public static FetchItem create(String url, WebPage page, String queueMode) {
       String queueID;
@@ -113,19 +117,18 @@
           LOG.warn("Unable to resolve: " + u.getHost() + ", skipping.");
           return null;
         }
-      }
-      else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)){
+      } else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
         host = URLUtil.getDomainName(u);
         if (host == null) {
-          LOG.warn("Unknown domain for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          LOG.warn("Unknown domain for url: " + url
+              + ", using URL string as key");
+          host = u.toExternalForm();
         }
-      }
-      else {
+      } else {
         host = u.getHost();
         if (host == null) {
           LOG.warn("Unknown host for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          host = u.toExternalForm();
         }
       }
       queueID = proto + "://" + host.toLowerCase();
@@ -140,19 +143,22 @@
   }
 
   /**
-   * This class handles FetchItems which come from the same host ID (be it
-   * a proto/hostname or proto/IP pair). It also keeps track of requests in
+   * This class handles FetchItems which come from the same host ID (be it a
+   * proto/hostname or proto/IP pair). It also keeps track of requests in
    * progress and elapsed time between requests.
    */
   private static class FetchItemQueue {
-    List<FetchItem> queue = Collections.synchronizedList(new LinkedList<FetchItem>());
-    Set<FetchItem>  inProgress = Collections.synchronizedSet(new HashSet<FetchItem>());
+    List<FetchItem> queue = Collections
+        .synchronizedList(new LinkedList<FetchItem>());
+    Set<FetchItem> inProgress = Collections
+        .synchronizedSet(new HashSet<FetchItem>());
     AtomicLong nextFetchTime = new AtomicLong();
     long crawlDelay;
     long minCrawlDelay;
     int maxThreads;
 
-    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay, long minCrawlDelay) {
+    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay,
+        long minCrawlDelay) {
       this.maxThreads = maxThreads;
       this.crawlDelay = crawlDelay;
       this.minCrawlDelay = minCrawlDelay;
@@ -176,27 +182,34 @@
     }
 
     public void addFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       queue.add(it);
     }
 
     @SuppressWarnings("unused")
     public void addInProgressFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       inProgress.add(it);
     }
 
     public FetchItem getFetchItem() {
-      if (inProgress.size() >= maxThreads) return null;
+      if (inProgress.size() >= maxThreads)
+        return null;
       final long now = System.currentTimeMillis();
-      if (nextFetchTime.get() > now) return null;
+      if (nextFetchTime.get() > now)
+        return null;
       FetchItem it = null;
-      if (queue.size() == 0) return null;
+      if (queue.size() == 0)
+        return null;
       try {
         it = queue.remove(0);
         inProgress.add(it);
       } catch (final Exception e) {
-        LOG.error("Cannot remove FetchItem from queue or cannot add it to inProgress queue", e);
+        LOG.error(
+            "Cannot remove FetchItem from queue or cannot add it to inProgress queue",
+            e);
       }
       return it;
     }
@@ -220,11 +233,12 @@
 
     private void setEndTime(long endTime, boolean asap) {
       if (!asap)
-        nextFetchTime.set(endTime + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
+        nextFetchTime.set(endTime
+            + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
       else
         nextFetchTime.set(endTime);
     }
-    
+
     public synchronized int emptyQueue() {
       int presize = queue.size();
       queue.clear();
@@ -247,7 +261,7 @@
     long minCrawlDelay;
     Configuration conf;
     long timelimit = -1;
-    
+
     boolean useHostSettings = false;
     HostDb hostDb = null;
 
@@ -260,16 +274,19 @@
       this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
       queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(QUEUE_MODE_IP)
+          && !queueMode.equals(QUEUE_MODE_DOMAIN)
           && !queueMode.equals(QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
-      
-      // Optionally enable host specific queue behavior 
+      LOG.info("Using queue mode : " + queueMode);
+
+      // Optionally enable host specific queue behavior
       if (queueMode.equals(QUEUE_MODE_HOST)) {
-        useHostSettings = conf.getBoolean("fetcher.queue.use.host.settings", false);
+        useHostSettings = conf.getBoolean("fetcher.queue.use.host.settings",
+            false);
         if (useHostSettings) {
           LOG.info("Host specific queue settings enabled.");
           // Initialize the HostDb if we need it.
@@ -276,9 +293,10 @@
           hostDb = new HostDb(conf);
         }
       }
-      
+
       this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
-      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
+      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay",
+          0.0f) * 1000);
       this.timelimit = conf.getLong("fetcher.timelimit", -1);
     }
 
@@ -292,7 +310,8 @@
 
     public void addFetchItem(String url, WebPage page) {
       final FetchItem it = FetchItem.create(url, page, queueMode);
-      if (it != null) addFetchItem(it);
+      if (it != null)
+        addFetchItem(it);
     }
 
     public synchronized void addFetchItem(FetchItem it) {
@@ -321,19 +340,18 @@
         if (useHostSettings) {
           // Use host specific queue settings (if defined in the host table)
           try {
-            String hostname = id.substring(id.indexOf("://")+3);
+            String hostname = id.substring(id.indexOf("://") + 3);
             Host host = hostDb.getByHostName(hostname);
             if (host != null) {
-              fiq = new FetchItemQueue(conf,
-                                       host.getInt("q_mt", maxThreads),
-                                       host.getLong("q_cd", crawlDelay),
-                                       host.getLong("q_mcd", minCrawlDelay));
+              fiq = new FetchItemQueue(conf, host.getInt("q_mt", maxThreads),
+                  host.getLong("q_cd", crawlDelay), host.getLong("q_mcd",
+                      minCrawlDelay));
             }
-            
+
           } catch (IOException e) {
             LOG.error("Error while trying to access host settings", e);
           }
-        } 
+        }
         if (fiq == null) {
           // Use queue defaults
           fiq = new FetchItemQueue(conf, maxThreads, crawlDelay, minCrawlDelay);
@@ -344,8 +362,8 @@
     }
 
     public synchronized FetchItem getFetchItem() {
-      final Iterator<Map.Entry<String, FetchItemQueue>> it =
-        queues.entrySet().iterator();
+      final Iterator<Map.Entry<String, FetchItemQueue>> it = queues.entrySet()
+          .iterator();
       while (it.hasNext()) {
         final FetchItemQueue fiq = it.next().getValue();
         // reap empty queues
@@ -362,7 +380,7 @@
       }
       return null;
     }
-    
+
     public synchronized int checkTimelimit() {
       if (System.currentTimeMillis() >= timelimit && timelimit != -1) {
         return emptyQueues();
@@ -369,12 +387,12 @@
       }
       return 0;
     }
-    
 
     public synchronized void dump() {
       for (final String id : queues.keySet()) {
         final FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id);
         fiq.dump();
       }
@@ -383,11 +401,12 @@
     // empties the queues (used by timebomb and throughput threshold)
     public synchronized int emptyQueues() {
       int count = 0;
-      
+
       // emptying the queues
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id + " >> dropping! ");
         int deleted = fiq.emptyQueue();
         for (int i = 0; i < deleted; i++) {
@@ -398,7 +417,8 @@
       // there might also be a case where totalsize !=0 but number of queues
       // == 0
       // in which case we simply force it to 0 to avoid blocking
-      if (totalSize.get() != 0 && queues.size() == 0) totalSize.set(0);
+      if (totalSize.get() != 0 && queues.size() == 0)
+        totalSize.set(0);
 
       return count;
     }
@@ -420,8 +440,8 @@
     private final boolean ignoreExternalLinks;
 
     public FetcherThread(Context context, int num) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread" + num);        // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread" + num); // use an informative name
       this.context = context;
       Configuration conf = context.getConfiguration();
       this.urlFilters = new URLFilters(conf);
@@ -430,7 +450,8 @@
       this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000;
       // backward-compatible default setting
       this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true);
-      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
+      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links",
+          false);
     }
 
     @Override
@@ -446,13 +467,15 @@
           if (fit == null) {
             if (feeder.isAlive() || fetchQueues.getTotalSize() > 0) {
               if (LOG.isDebugEnabled()) {
-                LOG.debug(getName() + " fetchQueues.getFetchItem() was null, spin-waiting ...");
+                LOG.debug(getName()
+                    + " fetchQueues.getFetchItem() was null, spin-waiting ...");
               }
               // spin-wait.
               spinWaiting.incrementAndGet();
               try {
                 Thread.sleep(500);
-              } catch (final Exception e) {}
+              } catch (final Exception e) {
+              }
               spinWaiting.decrementAndGet();
               continue;
             } else {
@@ -467,12 +490,13 @@
             reprUrl = TableUtil.toString(fit.page.getReprUrl());
           }
           try {
-            LOG.info("fetching " + fit.url + " (queue crawl delay=" + 
-                      fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay + "ms)"); 
+            LOG.info("fetching " + fit.url + " (queue crawl delay="
+                + fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay + "ms)");
 
             // fetch the page
             final Protocol protocol = this.protocolFactory.getProtocol(fit.url);
-            final BaseRobotRules rules = protocol.getRobotRules(fit.url, fit.page);
+            final BaseRobotRules rules = protocol.getRobotRules(fit.url,
+                fit.page);
             if (!rules.isAllowed(fit.u.toString())) {
               // unblock
               fetchQueues.finishFetchItem(fit, true);
@@ -487,30 +511,38 @@
               if (rules.getCrawlDelay() > maxCrawlDelay && maxCrawlDelay >= 0) {
                 // unblock
                 fetchQueues.finishFetchItem(fit, true);
-                LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
-                output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED, CrawlStatus.STATUS_GONE);
+                LOG.debug("Crawl-Delay for " + fit.url + " too long ("
+                    + rules.getCrawlDelay() + "), skipping");
+                output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED,
+                    CrawlStatus.STATUS_GONE);
                 continue;
               } else {
-                final FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
+                final FetchItemQueue fiq = fetchQueues
+                    .getFetchItemQueue(fit.queueID);
                 fiq.crawlDelay = rules.getCrawlDelay();
                 if (LOG.isDebugEnabled()) {
-                  LOG.info("Crawl delay for queue: " + fit.queueID + " is set to " + fiq.crawlDelay + " as per robots.txt. url: " + fit.url);
+                  LOG.info("Crawl delay for queue: " + fit.queueID
+                      + " is set to " + fiq.crawlDelay
+                      + " as per robots.txt. url: " + fit.url);
                 }
               }
             }
-            final ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.page);
+            final ProtocolOutput output = protocol.getProtocolOutput(fit.url,
+                fit.page);
             final ProtocolStatus status = output.getStatus();
             final Content content = output.getContent();
             // unblock queue
             fetchQueues.finishFetchItem(fit);
 
-            context.getCounter("FetcherStatus", ProtocolStatusUtils.getName(status.getCode())).increment(1);
+            context.getCounter("FetcherStatus",
+                ProtocolStatusUtils.getName(status.getCode())).increment(1);
 
             int length = 0;
-            if (content!=null && content.getContent()!=null) length= content.getContent().length;
+            if (content != null && content.getContent() != null)
+              length = content.getContent().length;
             updateStatus(length);
 
-            switch(status.getCode()) {
+            switch (status.getCode()) {
 
             case ProtocolStatusCodes.WOULDBLOCK:
               // retry ?
@@ -517,11 +549,11 @@
               fetchQueues.addFetchItem(fit);
               break;
 
-            case ProtocolStatusCodes.SUCCESS:        // got a page
+            case ProtocolStatusCodes.SUCCESS: // got a page
               output(fit, content, status, CrawlStatus.STATUS_FETCHED);
               break;
 
-            case ProtocolStatusCodes.MOVED:         // redirect
+            case ProtocolStatusCodes.MOVED: // redirect
             case ProtocolStatusCodes.TEMP_MOVED:
               byte code;
               boolean temp;
@@ -533,18 +565,19 @@
                 temp = true;
               }
               final String newUrl = ProtocolStatusUtils.getMessage(status);
-              handleRedirect(fit.url, newUrl, temp,  FetcherJob.PROTOCOL_REDIR, fit.page);
+              handleRedirect(fit.url, newUrl, temp, FetcherJob.PROTOCOL_REDIR,
+                  fit.page);
               output(fit, content, status, code);
               break;
             case ProtocolStatusCodes.EXCEPTION:
               logFetchFailure(fit.url, ProtocolStatusUtils.getMessage(status));
               /* FALLTHROUGH */
-            case ProtocolStatusCodes.RETRY:          // retry
+            case ProtocolStatusCodes.RETRY: // retry
             case ProtocolStatusCodes.BLOCKED:
               output(fit, null, status, CrawlStatus.STATUS_RETRY);
               break;
 
-            case ProtocolStatusCodes.GONE:           // gone
+            case ProtocolStatusCodes.GONE: // gone
             case ProtocolStatusCodes.NOTFOUND:
             case ProtocolStatusCodes.ACCESS_DENIED:
             case ProtocolStatusCodes.ROBOTS_DENIED:
@@ -562,7 +595,7 @@
               output(fit, null, status, CrawlStatus.STATUS_RETRY);
             }
 
-          } catch (final Throwable t) {                 // unexpected exception
+          } catch (final Throwable t) { // unexpected exception
             // unblock
             fetchQueues.finishFetchItem(fit);
             LOG.error("Unexpected error for " + fit.url, t);
@@ -574,15 +607,17 @@
       } catch (final Throwable e) {
         LOG.error("fetcher throwable caught", e);
       } finally {
-        if (fit != null) fetchQueues.finishFetchItem(fit);
+        if (fit != null)
+          fetchQueues.finishFetchItem(fit);
         activeThreads.decrementAndGet(); // count threads
-        LOG.info("-finishing thread " + getName() + ", activeThreads=" + activeThreads);
+        LOG.info("-finishing thread " + getName() + ", activeThreads="
+            + activeThreads);
       }
     }
 
-    private void handleRedirect(String url, String newUrl,
-        boolean temp, String redirType, WebPage page)
-    throws URLFilterException, IOException, InterruptedException {
+    private void handleRedirect(String url, String newUrl, boolean temp,
+        String redirType, WebPage page) throws URLFilterException, IOException,
+        InterruptedException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
       if (newUrl == null || newUrl.equals(url)) {
@@ -590,7 +625,7 @@
       }
 
       if (ignoreExternalLinks) {
-        String toHost   = new URL(newUrl).getHost().toLowerCase();
+        String toHost = new URL(newUrl).getHost().toLowerCase();
         String fromHost = new URL(url).getHost().toLowerCase();
         if (toHost == null || !toHost.equals(fromHost)) {
           // external links
@@ -606,12 +641,11 @@
       } else {
         page.setReprUrl(new Utf8(reprUrl));
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect to " +
-              reprUrl + " (fetching later)");
+          LOG.debug(" - " + redirType + " redirect to " + reprUrl
+              + " (fetching later)");
         }
       }
     }
-    
 
     private void updateStatus(int bytesInPage) throws IOException {
       pages.incrementAndGet();
@@ -618,10 +652,9 @@
       bytes.addAndGet(bytesInPage);
     }
 
-    private void output(FetchItem fit, Content content,
-        ProtocolStatus pstatus, byte status)
-    throws IOException, InterruptedException {
-      fit.page.setStatus((int)status);
+    private void output(FetchItem fit, Content content, ProtocolStatus pstatus,
+        byte status) throws IOException, InterruptedException {
+      fit.page.setStatus((int) status);
       final long prevFetchTime = fit.page.getFetchTime();
       fit.page.setPrevFetchTime(prevFetchTime);
       fit.page.setFetchTime(System.currentTimeMillis());
@@ -638,13 +671,15 @@
       String key = TableUtil.reverseUrl(fit.url);
 
       if (parse) {
-        if (!skipTruncated || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
+        if (!skipTruncated
+            || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
           parseUtil.process(key, fit.page);
         }
       }
-      //remove content if storingContent is false. Content is added to fit.page above 
-      //for ParseUtil be able to parse it. 
-      if(content != null && !storingContent){
+      // remove content if storingContent is false. Content is added to fit.page
+      // above
+      // for ParseUtil be able to parse it.
+      if (content != null && !storingContent) {
         fit.page.setContent(ByteBuffer.wrap(new byte[0]));
       }
       context.write(key, fit.page);
@@ -656,10 +691,9 @@
     }
   }
 
-
   /**
-   * This class feeds the queues with input items, and re-fills them as
-   * items are consumed by FetcherThread-s.
+   * This class feeds the queues with input items, and re-fills them as items
+   * are consumed by FetcherThread-s.
    */
   private static class QueueFeeder extends Thread {
     private final Context context;
@@ -669,9 +703,8 @@
     boolean hasMore;
     private long timelimit = -1;
 
-    public QueueFeeder(Context context,
-        FetchItemQueues queues, int size)
-    throws IOException, InterruptedException {
+    public QueueFeeder(Context context, FetchItemQueues queues, int size)
+        throws IOException, InterruptedException {
       this.context = context;
       this.queues = queues;
       this.size = size;
@@ -681,8 +714,9 @@
       if (hasMore) {
         currentIter = context.getValues().iterator();
       }
-      // the value of the time limit is either -1 or the time where it should finish
-      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1); 
+      // the value of the time limit is either -1 or the time where it should
+      // finish
+      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1);
     }
 
     @Override
@@ -709,7 +743,9 @@
             // queues are full - spin-wait until they have some free space
             try {
               Thread.sleep(1000);
-            } catch (final Exception e) {};
+            } catch (final Exception e) {
+            }
+            ;
             continue;
           }
           if (LOG.isDebugEnabled()) {
@@ -717,8 +753,7 @@
           }
           while (feed > 0 && currentIter.hasNext()) {
             FetchEntry entry = currentIter.next();
-            final String url =
-              TableUtil.unreverseUrl(entry.getKey());
+            final String url = TableUtil.unreverseUrl(entry.getKey());
             queues.addFetchItem(url, entry.getWebPage());
             feed--;
             cnt++;
@@ -735,22 +770,27 @@
         LOG.error("QueueFeeder error reading input, record " + cnt, e);
         return;
       }
-      LOG.info("QueueFeeder finished: total " + cnt + " records. Hit by time limit :"
-          + timelimitcount);
-      context.getCounter("FetcherStatus","HitByTimeLimit-QueueFeeder").increment(timelimitcount);
+      LOG.info("QueueFeeder finished: total " + cnt
+          + " records. Hit by time limit :" + timelimitcount);
+      context.getCounter("FetcherStatus", "HitByTimeLimit-QueueFeeder")
+          .increment(timelimitcount);
     }
   }
 
-  private void reportAndLogStatus(Context context, float actualPages, 
+  private void reportAndLogStatus(Context context, float actualPages,
       int actualBytes, int totalSize) throws IOException {
     StringBuilder status = new StringBuilder();
-    long elapsed = (System.currentTimeMillis() - start)/1000;
-    status.append(spinWaiting).append("/").append(activeThreads).append(" spinwaiting/active, ");
+    long elapsed = (System.currentTimeMillis() - start) / 1000;
+    status.append(spinWaiting).append("/").append(activeThreads)
+        .append(" spinwaiting/active, ");
     status.append(pages).append(" pages, ").append(errors).append(" errors, ");
-    status.append(Math.round((((float)pages.get())*10)/elapsed)/10.0).append(" ");
-    status.append(Math.round((actualPages*10)/10.0)).append(" pages/s, ");
-    status.append(Math.round((((float)bytes.get())*8)/1024)/elapsed).append(" ");
-    status.append(Math.round(((float)actualBytes)*8)/1024).append(" kb/s, ");
+    status.append(Math.round((((float) pages.get()) * 10) / elapsed) / 10.0)
+        .append(" ");
+    status.append(Math.round((actualPages * 10) / 10.0)).append(" pages/s, ");
+    status.append(Math.round((((float) bytes.get()) * 8) / 1024) / elapsed)
+        .append(" ");
+    status.append(Math.round(((float) actualBytes) * 8) / 1024).append(
+        " kb/s, ");
     status.append(totalSize).append(" URLs in ");
     status.append(this.fetchQueues.getQueueCount()).append(" queues");
     String toString = status.toString();
@@ -759,30 +799,30 @@
   }
 
   @Override
-  public void run(Context context)
-  throws IOException, InterruptedException {
+  public void run(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     this.fetchQueues = new FetchItemQueues(conf);
     int threadCount = conf.getInt("fetcher.threads.fetch", 10);
     parse = conf.getBoolean(FetcherJob.PARSE_KEY, false);
-    storingContent=conf.getBoolean("fetcher.store.content", true);
+    storingContent = conf.getBoolean("fetcher.store.content", true);
     if (parse) {
-      skipTruncated=conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
+      skipTruncated = conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
       parseUtil = new ParseUtil(conf);
     }
     LOG.info("Fetcher: threads: " + threadCount);
 
     int maxFeedPerThread = conf.getInt("fetcher.queue.depth.multiplier", 50);
-    feeder = new QueueFeeder(context, fetchQueues, threadCount * maxFeedPerThread);
+    feeder = new QueueFeeder(context, fetchQueues, threadCount
+        * maxFeedPerThread);
     feeder.start();
 
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       FetcherThread ft = new FetcherThread(context, i);
       fetcherThreads.add(ft);
       ft.start();
     }
     // select a timeout that avoids a task timeout
-    final long timeout = conf.getInt("mapred.task.timeout", 10*60*1000)/2;
+    final long timeout = conf.getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
 
     // Used for threshold check, holds pages and bytes processed in the last sec
     float pagesLastSec;
@@ -790,48 +830,59 @@
 
     int throughputThresholdCurrentSequence = 0;
 
-    int throughputThresholdPages = conf.getInt("fetcher.throughput.threshold.pages", -1);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages); }
-    int throughputThresholdSequence = conf.getInt("fetcher.throughput.threshold.sequence", 5);
-    if (LOG.isInfoEnabled()) { 
-      LOG.info("Fetcher: throughput threshold sequence: " + throughputThresholdSequence); 
+    int throughputThresholdPages = conf.getInt(
+        "fetcher.throughput.threshold.pages", -1);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages);
     }
-    long throughputThresholdTimeLimit = conf.getLong("fetcher.throughput.threshold.check.after", -1);
-    
-    do {                                          // wait for threads to exit
+    int throughputThresholdSequence = conf.getInt(
+        "fetcher.throughput.threshold.sequence", 5);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold sequence: "
+          + throughputThresholdSequence);
+    }
+    long throughputThresholdTimeLimit = conf.getLong(
+        "fetcher.throughput.threshold.check.after", -1);
+
+    do { // wait for threads to exit
       pagesLastSec = pages.get();
-      bytesLastSec = (int)bytes.get();
+      bytesLastSec = (int) bytes.get();
       final int secondsToSleep = 5;
       try {
         Thread.sleep(secondsToSleep * 1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
-      pagesLastSec = (pages.get() - pagesLastSec)/secondsToSleep;
-      bytesLastSec = ((int)bytes.get() - bytesLastSec)/secondsToSleep;
+      pagesLastSec = (pages.get() - pagesLastSec) / secondsToSleep;
+      bytesLastSec = ((int) bytes.get() - bytesLastSec) / secondsToSleep;
 
       int fetchQueuesTotalSize = fetchQueues.getTotalSize();
-      reportAndLogStatus(context, pagesLastSec, bytesLastSec, fetchQueuesTotalSize);
-      
+      reportAndLogStatus(context, pagesLastSec, bytesLastSec,
+          fetchQueuesTotalSize);
+
       boolean feederAlive = feeder.isAlive();
       if (!feederAlive && fetchQueuesTotalSize < 5) {
         fetchQueues.dump();
       }
-      
+
       // check timelimit
       if (!feederAlive) {
         int hitByTimeLimit = fetchQueues.checkTimelimit();
         if (hitByTimeLimit != 0) {
-          context.getCounter("FetcherStatus","HitByTimeLimit-Queues").increment(hitByTimeLimit);
+          context.getCounter("FetcherStatus", "HitByTimeLimit-Queues")
+              .increment(hitByTimeLimit);
         }
       }
-      
+
       // if throughput threshold is enabled
-      if (throughputThresholdTimeLimit < System.currentTimeMillis() && throughputThresholdPages != -1) {
+      if (throughputThresholdTimeLimit < System.currentTimeMillis()
+          && throughputThresholdPages != -1) {
         // Check if we're dropping below the threshold
         if (pagesLastSec < throughputThresholdPages) {
           throughputThresholdCurrentSequence++;
-          LOG.warn(Integer.toString(throughputThresholdCurrentSequence) 
-              + ": dropping below configured threshold of " + Integer.toString(throughputThresholdPages) 
+          LOG.warn(Integer.toString(throughputThresholdCurrentSequence)
+              + ": dropping below configured threshold of "
+              + Integer.toString(throughputThresholdPages)
               + " pages per second");
 
           // Quit if we dropped below threshold too many times
@@ -841,17 +892,19 @@
             // Disable the threshold checker
             throughputThresholdPages = -1;
 
-            // Empty the queues cleanly and get number of items that were dropped
+            // Empty the queues cleanly and get number of items that were
+            // dropped
             int hitByThrougputThreshold = fetchQueues.emptyQueues();
 
-            if (hitByThrougputThreshold != 0) context.getCounter("FetcherStatus", 
-                "hitByThrougputThreshold").increment(hitByThrougputThreshold);
+            if (hitByThrougputThreshold != 0)
+              context.getCounter("FetcherStatus", "hitByThrougputThreshold")
+                  .increment(hitByThrougputThreshold);
           }
         } else {
           throughputThresholdCurrentSequence = 0;
         }
       }
-      
+
       // some requests seem to hang, despite all intentions
       if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) {
         if (LOG.isWarnEnabled() && activeThreads.get() > 0) {
@@ -859,7 +912,8 @@
           for (int i = 0; i < fetcherThreads.size(); i++) {
             FetcherThread thread = fetcherThreads.get(i);
             if (thread.isAlive()) {
-              LOG.warn("Thread #" + i + " hung while processing " + thread.reprUrl);
+              LOG.warn("Thread #" + i + " hung while processing "
+                  + thread.reprUrl);
               if (LOG.isDebugEnabled()) {
                 StackTraceElement[] stack = thread.getStackTrace();
                 StringBuilder sb = new StringBuilder();
@@ -879,4 +933,3 @@
     LOG.info("-activeThreads=" + activeThreads);
   }
 }
-
Index: src/java/org/apache/nutch/host/HostDb.java
===================================================================
--- src/java/org/apache/nutch/host/HostDb.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDb.java	(working copy)
@@ -37,22 +37,23 @@
 import com.google.common.cache.RemovalNotification;
 
 /**
- * A caching wrapper for the host datastore. 
+ * A caching wrapper for the host datastore.
  */
 public class HostDb implements Closeable {
   public static final Log LOG = LogFactory.getLog(HostDb.class);
-  
+
   private static final class CacheHost {
     private final Host host;
     private final long timestamp;
+
     public CacheHost(Host host, long timestamp) {
       this.host = host;
       this.timestamp = timestamp;
-    }   
+    }
   }
-  private final static CacheHost NULL_HOST = new CacheHost(null,0);
-  
 
+  private final static CacheHost NULL_HOST = new CacheHost(null, 0);
+
   private DataStore<String, Host> hostStore;
 
   public static final String HOSTDB_LRU_SIZE = "hostdb.lru.size";
@@ -61,7 +62,7 @@
   public static final int DEFAULT_HOSTDB_CONCURRENCY_LEVEL = 8;
 
   private Cache<String, CacheHost> cache;
-  
+
   private AtomicLong lastFlush;
 
   public HostDb(Configuration conf) throws GoraException {
@@ -73,47 +74,43 @@
 
     // Create a cache.
     // We add a removal listener to see if we need to flush the store,
-    // in order to adhere to the put-flush-get semantic 
+    // in order to adhere to the put-flush-get semantic
     // ("read your own write") of DataStore.
-    
+
     long lruSize = conf.getLong(HOSTDB_LRU_SIZE, DEFAULT_LRU_SIZE);
-    int concurrencyLevel = conf.getInt(HOSTDB_CONCURRENCY_LEVEL, 
+    int concurrencyLevel = conf.getInt(HOSTDB_CONCURRENCY_LEVEL,
         DEFAULT_HOSTDB_CONCURRENCY_LEVEL);
-    RemovalListener<String, CacheHost> listener = 
-        new RemovalListener<String, CacheHost>() {
-          @Override
-          public void onRemoval(
-              RemovalNotification<String, CacheHost> notification) {
-            CacheHost removeFromCacheHost = notification.getValue();
-            if (removeFromCacheHost != NULL_HOST) {
-              if (removeFromCacheHost.timestamp < lastFlush.get()) {
-                try {
-                  hostStore.flush();
-                } catch (Exception e) {
-                  throw new RuntimeException(e);
-                }
-                lastFlush.set(System.currentTimeMillis());
-              }
+    RemovalListener<String, CacheHost> listener = new RemovalListener<String, CacheHost>() {
+      @Override
+      public void onRemoval(RemovalNotification<String, CacheHost> notification) {
+        CacheHost removeFromCacheHost = notification.getValue();
+        if (removeFromCacheHost != NULL_HOST) {
+          if (removeFromCacheHost.timestamp < lastFlush.get()) {
+            try {
+              hostStore.flush();
+            } catch (Exception e) {
+              throw new RuntimeException(e);
             }
+            lastFlush.set(System.currentTimeMillis());
           }
+        }
+      }
     };
-    
-    cache=CacheBuilder.newBuilder().maximumSize(lruSize)
-        .removalListener(listener).concurrencyLevel(concurrencyLevel)
-        .build();
+
+    cache = CacheBuilder.newBuilder().maximumSize(lruSize)
+        .removalListener(listener).concurrencyLevel(concurrencyLevel).build();
     lastFlush = new AtomicLong(System.currentTimeMillis());
   }
 
-  
-  
   public Host get(final String key) throws IOException {
     Callable<CacheHost> valueLoader = new Callable<CacheHost>() {
       @Override
       public CacheHost call() throws Exception {
         Host host = hostStore.get(key);
-        if (host == null) return NULL_HOST;
+        if (host == null)
+          return NULL_HOST;
         return new CacheHost(host, System.currentTimeMillis());
-      }  
+      }
     };
     CacheHost cachedHost;
     try {
@@ -127,14 +124,11 @@
       return null;
     }
   }
- 
 
-
   public Host getByHostName(String hostName) throws IOException {
-   return get(TableUtil.reverseHost(hostName));
+    return get(TableUtil.reverseHost(hostName));
   }
-  
-  
+
   public void put(String key, Host host) throws IOException {
     cache.put(key, new CacheHost(host, System.currentTimeMillis()));
     hostStore.put(key, host);
Index: src/java/org/apache/nutch/host/HostDbReader.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDbReader.java	(working copy)
@@ -39,7 +39,8 @@
 public class HostDbReader extends Configured implements Tool {
   public static final Log LOG = LogFactory.getLog(HostDbReader.class);
 
-  private void read(String key) throws ClassNotFoundException, IOException, Exception {
+  private void read(String key) throws ClassNotFoundException, IOException,
+      Exception {
 
     DataStore<String, Host> datastore = StorageUtils.createWebStore(getConf(),
         String.class, Host.class);
Index: src/java/org/apache/nutch/host/HostDbUpdateJob.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbUpdateJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostDbUpdateJob.java	(working copy)
@@ -116,15 +116,14 @@
 
   @Override
   public int run(String[] args) throws Exception {
-    boolean linkDb=false;
+    boolean linkDb = false;
     for (int i = 0; i < args.length; i++) {
       if ("-linkDb".equals(args[i])) {
         linkDb = true;
       } else if ("-crawlId".equals(args[i])) {
         getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
-      }
-      else {
-        throw new IllegalArgumentException("unrecognized arg " + args[i] 
+      } else {
+        throw new IllegalArgumentException("unrecognized arg " + args[i]
             + " usage: (-linkDb) (-crawlId <crawlId>)");
       }
     }
Index: src/java/org/apache/nutch/host/HostDbUpdateReducer.java
===================================================================
--- src/java/org/apache/nutch/host/HostDbUpdateReducer.java	(revision 1650446)
+++ src/java/org/apache/nutch/host/HostDbUpdateReducer.java	(working copy)
@@ -30,36 +30,37 @@
 import java.util.Set;
 
 /**
- * Combines all WebPages with the same host key to create a Host object, 
- * with some statistics.
+ * Combines all WebPages with the same host key to create a Host object, with
+ * some statistics.
  */
-public class HostDbUpdateReducer extends GoraReducer<Text, WebPage, String, Host> {
-  
+public class HostDbUpdateReducer extends
+    GoraReducer<Text, WebPage, String, Host> {
+
   @Override
   protected void reduce(Text key, Iterable<WebPage> values, Context context)
-    throws IOException, InterruptedException {
-    
+      throws IOException, InterruptedException {
+
     int numPages = 0;
     int numFetched = 0;
     boolean buildLinkDb = true;
-    
+
     Histogram<String> inlinkCount = new Histogram<String>();
     Histogram<String> outlinkCount = new Histogram<String>();
-    
-    for (WebPage page: values) {
+
+    for (WebPage page : values) {
       // count number of pages
-      numPages++;     
+      numPages++;
       // count number of fetched pages
       if (page.getStatus() == CrawlStatus.STATUS_FETCHED) {
         numFetched++;
       }
-      
+
       // build host link db
       // TODO: limit number of links
       if (buildLinkDb) {
         if (page.getInlinks() != null) {
           Set<CharSequence> inlinks = page.getInlinks().keySet();
-          for (CharSequence inlink: inlinks) {
+          for (CharSequence inlink : inlinks) {
             String host = URLUtil.getHost(inlink.toString());
             inlinkCount.add(host);
           }
@@ -66,7 +67,7 @@
         }
         if (page.getOutlinks() != null) {
           Set<CharSequence> outlinks = page.getOutlinks().keySet();
-          for (CharSequence outlink: outlinks) {
+          for (CharSequence outlink : outlinks) {
             String host = URLUtil.getHost(outlink.toString());
             outlinkCount.add(host);
           }
@@ -73,20 +74,24 @@
         }
       }
     }
-    
+
     // output host data
     Host host = new Host();
-    host.getMetadata().put(new Utf8("p"),ByteBuffer.wrap(Integer.toString(numPages).getBytes()));
+    host.getMetadata().put(new Utf8("p"),
+        ByteBuffer.wrap(Integer.toString(numPages).getBytes()));
     if (numFetched > 0) {
-      host.getMetadata().put(new Utf8("f"),ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
+      host.getMetadata().put(new Utf8("f"),
+          ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
     }
-    for (String inlink: inlinkCount.getKeys()) {
-      host.getInlinks().put(new Utf8(inlink), new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
+    for (String inlink : inlinkCount.getKeys()) {
+      host.getInlinks().put(new Utf8(inlink),
+          new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
     }
-    for (String outlink: outlinkCount.getKeys()) {
-      host.getOutlinks().put(new Utf8(outlink), new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
+    for (String outlink : outlinkCount.getKeys()) {
+      host.getOutlinks().put(new Utf8(outlink),
+          new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
     }
-    
+
     context.write(key.toString(), host);
   }
 }
Index: src/java/org/apache/nutch/host/HostInjectorJob.java
===================================================================
--- src/java/org/apache/nutch/host/HostInjectorJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/HostInjectorJob.java	(working copy)
@@ -123,13 +123,14 @@
       while (keysIter.hasNext()) {
         String keymd = keysIter.next();
         String valuemd = metadata.get(keymd);
-        host.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
+        host.getMetadata().put(new Utf8(keymd),
+            ByteBuffer.wrap(valuemd.getBytes()));
       }
       String hostname;
-      if (url.indexOf("://")> -1) {
-        hostname=new URL(url).getHost();
+      if (url.indexOf("://") > -1) {
+        hostname = new URL(url).getHost();
       } else {
-        hostname=new URL("http://"+url).getHost();
+        hostname = new URL("http://" + url).getHost();
       }
       String hostkey = TableUtil.reverseHost(hostname);
       context.write(hostkey, host);
@@ -145,8 +146,8 @@
     job.setMapOutputKeyClass(String.class);
     job.setMapOutputValueClass(Host.class);
     job.setOutputFormatClass(GoraOutputFormat.class);
-    GoraOutputFormat.setOutput(job,
-        StorageUtils.createWebStore(job.getConfiguration(), String.class, Host.class), true);
+    GoraOutputFormat.setOutput(job, StorageUtils.createWebStore(
+        job.getConfiguration(), String.class, Host.class), true);
     job.setReducerClass(Reducer.class);
     job.setNumReduceTasks(0);
     return job.waitForCompletion(true);
Index: src/java/org/apache/nutch/host/package-info.java
===================================================================
--- src/java/org/apache/nutch/host/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/host/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Host database to store metadata per host.
  */
 package org.apache.nutch.host;
+
Index: src/java/org/apache/nutch/indexer/CleaningJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/CleaningJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/CleaningJob.java	(working copy)
@@ -44,26 +44,25 @@
 public class CleaningJob extends NutchTool implements Tool {
 
   public static final String ARG_COMMIT = "commit";
-  public static final Logger LOG = LoggerFactory
-      .getLogger(CleaningJob.class);
+  public static final Logger LOG = LoggerFactory.getLogger(CleaningJob.class);
   private Configuration conf;
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
-  
+
   static {
     FIELDS.add(WebPage.Field.STATUS);
   }
-  
+
   @Override
   public Configuration getConf() {
     return conf;
   }
-  
+
   @Override
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   public Collection<WebPage.Field> getFields(Job job) {
     Configuration conf = job.getConfiguration();
     Collection<WebPage.Field> columns = new HashSet<WebPage.Field>(FIELDS);
@@ -96,7 +95,7 @@
       }
     }
   }
-  
+
   public static class CleanReducer extends
       Reducer<String, WebPage, NullWritable, NullWritable> {
     private int numDeletes = 0;
@@ -128,12 +127,11 @@
       writers.close();
       if (numDeletes > 0 && commit) {
         writers.commit();
-      }   
+      }
       LOG.info("CleaningJob: deleted a total of " + numDeletes + " documents");
     }
   }
 
-
   @Override
   public Map<String, Object> run(Map<String, Object> args) throws Exception {
     getConf().setBoolean(ARG_COMMIT, (Boolean) args.get(ARG_COMMIT));
Index: src/java/org/apache/nutch/indexer/IndexCleaningFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexCleaningFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexCleaningFilter.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexCleaningFilter extends FieldPluggable, Configurable {
@@ -31,12 +31,12 @@
   /** The name of the extension point. */
   final static String X_POINT_ID = IndexCleaningFilter.class.getName();
 
-  /**   
-   * @param url page url
+  /**
+   * @param url
+   *          page url
    * @param page
    * @return true == remove false == keep
    * @throws IndexingException
    */
-  boolean remove(String url, WebPage page)
-  throws IndexingException;
+  boolean remove(String url, WebPage page) throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexCleaningFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexCleaningFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexCleaningFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.ObjectCache;
 
-/** Creates and caches {@link IndexCleaningFilter} implementing plugins.*/
+/** Creates and caches {@link IndexCleaningFilter} implementing plugins. */
 public class IndexCleaningFilters {
 
   public static final String IndexCleaningFilter_ORDER = "IndexCleaningFilterhbase.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexCleaningFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexCleaningFilters.class);
 
   private IndexCleaningFilter[] indexcleaningFilters;
 
@@ -60,10 +61,10 @@
         ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
             IndexCleaningFilter.X_POINT_ID);
         if (point == null)
-          throw new RuntimeException(IndexCleaningFilter.X_POINT_ID + " not found.");
+          throw new RuntimeException(IndexCleaningFilter.X_POINT_ID
+              + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexCleaningFilter> filterMap =
-          new HashMap<String, IndexCleaningFilter>();
+        HashMap<String, IndexCleaningFilter> filterMap = new HashMap<String, IndexCleaningFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexCleaningFilter filter = (IndexCleaningFilter) extension
@@ -78,20 +79,19 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexCleaningFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexCleaningFilter[0]));
+          objectCache.setObject(IndexCleaningFilter.class.getName(), filterMap
+              .values().toArray(new IndexCleaningFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexCleaningFilter> filters = new ArrayList<IndexCleaningFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-        	  IndexCleaningFilter filter = filterMap.get(orderedFilters[i]);
+            IndexCleaningFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexCleaningFilter.class.getName(), filters
-              .toArray(new IndexCleaningFilter[filters.size()]));
+          objectCache.setObject(IndexCleaningFilter.class.getName(),
+              filters.toArray(new IndexCleaningFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,13 +100,13 @@
           .getObject(IndexCleaningFilter.class.getName());
     }
   }
+
   /** Run all defined filters. */
-  public boolean remove(String url, WebPage page)
-  throws IndexingException {
+  public boolean remove(String url, WebPage page) throws IndexingException {
     for (IndexCleaningFilter indexcleaningFilter : indexcleaningFilters) {
-    	if(indexcleaningFilter.remove(url,page)){
-    		return true;
-    	}
+      if (indexcleaningFilter.remove(url, page)) {
+        return true;
+      }
     }
     return false;
   }
Index: src/java/org/apache/nutch/indexer/IndexUtil.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexUtil.java	(working copy)
@@ -26,37 +26,41 @@
 import org.apache.nutch.util.TableUtil;
 
 /**
- * Utility to create an indexed document from a webpage.  
- *
+ * Utility to create an indexed document from a webpage.
+ * 
  */
 public class IndexUtil {
   private static final Log LOG = LogFactory.getLog(new Object() {
   }.getClass().getEnclosingClass());
-  
-  
+
   private IndexingFilters filters;
   private ScoringFilters scoringFilters;
-  
+
   public IndexUtil(Configuration conf) {
     filters = new IndexingFilters(conf);
     scoringFilters = new ScoringFilters(conf);
   }
-  
+
   /**
    * Index a {@link Webpage}, here we add the following fields:
    * <ol>
    * <li><tt>id</tt>: default uniqueKey for the {@link NutchDocument}.</li>
-   * <li><tt>digest</tt>: Digest is used to identify pages (like unique ID) and is used to remove
-   * duplicates during the dedup procedure. It is calculated using {@link org.apache.nutch.crawl.MD5Signature} or
+   * <li><tt>digest</tt>: Digest is used to identify pages (like unique ID) and
+   * is used to remove duplicates during the dedup procedure. It is calculated
+   * using {@link org.apache.nutch.crawl.MD5Signature} or
    * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
-   * <li><tt>batchId</tt>: The page belongs to a unique batchId, this is its identifier.</li>
-   * <li><tt>boost</tt>: Boost is used to calculate document (field) score which can be used within
-   * queries submitted to the underlying indexing library to find the best results. It's part of the scoring algorithms. 
-   * See scoring.link, scoring.opic, scoring.tld, etc.</li>
+   * <li><tt>batchId</tt>: The page belongs to a unique batchId, this is its
+   * identifier.</li>
+   * <li><tt>boost</tt>: Boost is used to calculate document (field) score which
+   * can be used within queries submitted to the underlying indexing library to
+   * find the best results. It's part of the scoring algorithms. See
+   * scoring.link, scoring.opic, scoring.tld, etc.</li>
    * </ol>
    * 
-   * @param key The key of the page (reversed url).
-   * @param page The {@link Webpage}.
+   * @param key
+   *          The key of the page (reversed url).
+   * @param page
+   *          The {@link Webpage}.
    * @return The indexed document, or null if skipped by index filters.
    */
   public NutchDocument index(String key, WebPage page) {
@@ -66,7 +70,7 @@
     if (page.getBatchId() != null) {
       doc.add("batchId", page.getBatchId().toString());
     }
-    
+
     String url = TableUtil.unreverseUrl(key);
 
     if (LOG.isDebugEnabled()) {
@@ -76,12 +80,13 @@
     try {
       doc = filters.filter(doc, url, page);
     } catch (IndexingException e) {
-      LOG.warn("Error indexing "+key+": "+e);
+      LOG.warn("Error indexing " + key + ": " + e);
       return null;
     }
 
     // skip documents discarded by indexing filters
-    if (doc == null) return null;
+    if (doc == null)
+      return null;
 
     float boost = 1.0f;
     // run scoring filters
@@ -98,5 +103,5 @@
 
     return doc;
   }
-  
+
 }
Index: src/java/org/apache/nutch/indexer/IndexWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexWriter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexWriter.java	(working copy)
@@ -26,19 +26,22 @@
 public interface IndexWriter extends Configurable, Pluggable {
   /** The name of the extension point. */
   final static String X_POINT_ID = IndexWriter.class.getName();
-  
+
   public void open(Configuration job) throws IOException;
 
   public void write(NutchDocument doc) throws IOException;
-  
+
   public void delete(String key) throws IOException;
-  
+
   public void update(NutchDocument doc) throws IOException;
-  
+
   public void commit() throws IOException;
 
   public void close() throws IOException;
-  
-  /** Returns a String describing the IndexWriter instance and the specific parameters it can take */
+
+  /**
+   * Returns a String describing the IndexWriter instance and the specific
+   * parameters it can take
+   */
   public String describe();
 }
Index: src/java/org/apache/nutch/indexer/IndexWriters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexWriters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexWriters.java	(working copy)
@@ -32,8 +32,7 @@
 /** Creates and caches {@link IndexWriter} implementing plugins. */
 public class IndexWriters {
 
-  public final static Logger LOG = LoggerFactory
-      .getLogger(IndexWriters.class);
+  public final static Logger LOG = LoggerFactory.getLogger(IndexWriters.class);
 
   private IndexWriter[] indexWriters;
 
@@ -44,17 +43,15 @@
           .getObject(IndexWriter.class.getName());
       if (this.indexWriters == null) {
         try {
-          ExtensionPoint point = PluginRepository.get(conf)
-              .getExtensionPoint(IndexWriter.X_POINT_ID);
+          ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+              IndexWriter.X_POINT_ID);
           if (point == null)
-            throw new RuntimeException(IndexWriter.X_POINT_ID
-                + " not found.");
+            throw new RuntimeException(IndexWriter.X_POINT_ID + " not found.");
           Extension[] extensions = point.getExtensions();
           HashMap<String, IndexWriter> indexerMap = new HashMap<String, IndexWriter>();
           for (int i = 0; i < extensions.length; i++) {
             Extension extension = extensions[i];
-            IndexWriter writer = (IndexWriter) extension
-                .getExtensionInstance();
+            IndexWriter writer = (IndexWriter) extension.getExtensionInstance();
             LOG.info("Adding " + writer.getClass().getName());
             if (!indexerMap.containsKey(writer.getClass().getName())) {
               indexerMap.put(writer.getClass().getName(), writer);
Index: src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(working copy)
@@ -30,29 +30,29 @@
   public RecordWriter<String, NutchDocument> getRecordWriter(
       TaskAttemptContext job) throws IOException, InterruptedException {
 
-    //final IndexWriter[] writers =
-    //  NutchIndexWriterFactory.getNutchIndexWriters(job.getConfiguration());
+    // final IndexWriter[] writers =
+    // NutchIndexWriterFactory.getNutchIndexWriters(job.getConfiguration());
 
     final IndexWriters writers = new IndexWriters(job.getConfiguration());
-    
-//    for (final IndexWriter writer : writers) {
-//      writer.open(job);
-//    }
+
+    // for (final IndexWriter writer : writers) {
+    // writer.open(job);
+    // }
     writers.open(job.getConfiguration());
-    
+
     return new RecordWriter<String, NutchDocument>() {
 
       @Override
       public void write(String key, NutchDocument doc) throws IOException {
-        // TODO: Check Write Status for delete or write.  
+        // TODO: Check Write Status for delete or write.
         writers.write(doc);
       }
 
       @Override
       public void close(TaskAttemptContext context) throws IOException,
-      InterruptedException {
-          writers.close();
-        }
+          InterruptedException {
+        writers.close();
+      }
     };
   }
 
@@ -64,21 +64,26 @@
   @Override
   public OutputCommitter getOutputCommitter(TaskAttemptContext arg0)
       throws IOException, InterruptedException {
-    //return an empty outputcommitter
+    // return an empty outputcommitter
     return new OutputCommitter() {
       @Override
       public void setupTask(TaskAttemptContext arg0) throws IOException {
       }
+
       @Override
       public void setupJob(JobContext arg0) throws IOException {
       }
+
       @Override
-      public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+      public boolean needsTaskCommit(TaskAttemptContext arg0)
+          throws IOException {
         return false;
       }
+
       @Override
       public void commitTask(TaskAttemptContext arg0) throws IOException {
       }
+
       @Override
       public void abortTask(TaskAttemptContext arg0) throws IOException {
       }
Index: src/java/org/apache/nutch/indexer/IndexingFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFilter.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexingFilter extends FieldPluggable, Configurable {
@@ -33,15 +33,18 @@
 
   /**
    * Adds fields or otherwise modifies the document that will be indexed for a
-   * parse. Unwanted documents can be removed from indexing by returning a null value.
-   *
-   * @param doc document instance for collecting fields
-   * @param url page url
+   * parse. Unwanted documents can be removed from indexing by returning a null
+   * value.
+   * 
+   * @param doc
+   *          document instance for collecting fields
+   * @param url
+   *          page url
    * @param page
-   * @return modified (or a new) document instance, or null (meaning the document
-   * should be discarded)
+   * @return modified (or a new) document instance, or null (meaning the
+   *         document should be discarded)
    * @throws IndexingException
    */
   NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException;
+      throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexingFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.ObjectCache;
 
-/** Creates and caches {@link IndexingFilter} implementing plugins.*/
+/** Creates and caches {@link IndexingFilter} implementing plugins. */
 public class IndexingFilters {
 
   public static final String INDEXINGFILTER_ORDER = "indexingfilter.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexingFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexingFilters.class);
 
   private IndexingFilter[] indexingFilters;
 
@@ -62,8 +63,7 @@
         if (point == null)
           throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexingFilter> filterMap =
-          new HashMap<String, IndexingFilter>();
+        HashMap<String, IndexingFilter> filterMap = new HashMap<String, IndexingFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexingFilter filter = (IndexingFilter) extension
@@ -78,9 +78,8 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexingFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexingFilter[0]));
+          objectCache.setObject(IndexingFilter.class.getName(), filterMap
+              .values().toArray(new IndexingFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexingFilter> filters = new ArrayList<IndexingFilter>();
@@ -90,8 +89,8 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexingFilter.class.getName(), filters
-              .toArray(new IndexingFilter[filters.size()]));
+          objectCache.setObject(IndexingFilter.class.getName(),
+              filters.toArray(new IndexingFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,13 +99,15 @@
           .getObject(IndexingFilter.class.getName());
     }
   }
+
   /** Run all defined filters. */
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException {
+      throws IndexingException {
     for (IndexingFilter indexingFilter : indexingFilters) {
       doc = indexingFilter.filter(doc, url, page);
       // break the loop if an indexing filter discards the doc
-      if (doc == null) return null;
+      if (doc == null)
+        return null;
     }
 
     return doc;
@@ -113,10 +114,9 @@
   }
 
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   public Collection<WebPage.Field> getFields() {
     Collection<WebPage.Field> columns = new HashSet<WebPage.Field>();
Index: src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.indexer;
 
 import java.nio.ByteBuffer;
@@ -43,16 +43,19 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Reads and parses a URL and run the indexers on it. Displays the fields obtained and the first
- * 100 characters of their value
- *
- * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker http://www.lemonde.fr
+ * Reads and parses a URL and run the indexers on it. Displays the fields
+ * obtained and the first 100 characters of their value
+ * 
+ * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker
+ * http://www.lemonde.fr
+ * 
  * @author Julien Nioche
  **/
 
 public class IndexingFiltersChecker extends Configured implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(IndexingFiltersChecker.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexingFiltersChecker.class);
 
   public IndexingFiltersChecker() {
 
@@ -85,7 +88,7 @@
     ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
     page.setProtocolStatus(protocolOutput.getStatus());
     if (protocolOutput.getStatus().getCode() == ProtocolStatusCodes.SUCCESS) {
-      page.setStatus((int)CrawlStatus.STATUS_FETCHED);
+      page.setStatus((int) CrawlStatus.STATUS_FETCHED);
       page.setFetchTime(System.currentTimeMillis());
     } else {
       LOG.error("Fetch failed with protocol status: "
@@ -93,7 +96,7 @@
           + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
       return -1;
     }
-    
+
     Content content = protocolOutput.getContent();
     if (content == null) {
       LOG.warn("No content for " + url);
@@ -106,7 +109,7 @@
       return -1;
     }
     page.setContentType(new Utf8(contentType));
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("parsing: " + url);
       LOG.info("contentType: " + contentType);
@@ -136,7 +139,7 @@
       LOG.info("Document discarded by indexing filter");
       return 0;
     }
-    
+
     for (String fname : doc.getFieldNames()) {
       List<String> values = doc.getFieldValues(fname);
       if (values != null) {
Index: src/java/org/apache/nutch/indexer/IndexingJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/IndexingJob.java	(working copy)
@@ -180,7 +180,7 @@
 
     IndexWriters writers = new IndexWriters(getConf());
     LOG.info(writers.describe());
-    
+
     writers.open(getConf());
     if (getConf().getBoolean(SolrConstants.COMMIT_INDEX, true)) {
       writers.commit();
Index: src/java/org/apache/nutch/indexer/NutchDocument.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchDocument.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/NutchDocument.java	(working copy)
@@ -33,9 +33,9 @@
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.nutch.metadata.Metadata;
 
-/** A {@link NutchDocument} is the unit of indexing.*/
-public class NutchDocument
-implements Writable, Iterable<Entry<String, List<String>>> {
+/** A {@link NutchDocument} is the unit of indexing. */
+public class NutchDocument implements Writable,
+    Iterable<Entry<String, List<String>>> {
 
   public static final byte VERSION = 1;
 
@@ -139,11 +139,11 @@
   }
 
   /**
-   * A utility-like method which can easily be used to write
-   * any {@link org.apache.nutch.indexer.NutchDocument} object
-   * to string for simple debugging.
+   * A utility-like method which can easily be used to write any
+   * {@link org.apache.nutch.indexer.NutchDocument} object to string for simple
+   * debugging.
    */
-  public String toString() { 
+  public String toString() {
     StringBuilder sb = new StringBuilder();
     sb.append("doc {\n");
     for (Entry<String, List<String>> entry : fields.entrySet()) {
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -22,7 +22,7 @@
   public static final String SERVER_URL = SOLR_PREFIX + "server.url";
 
   public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
-  
+
   public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
@@ -32,15 +32,15 @@
   public static final String USERNAME = SOLR_PREFIX + "auth.username";
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -51,42 +51,44 @@
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 
-/** 
+/**
  * Utility class for deleting duplicate documents from a solr index.
- *
+ * 
  * The algorithm goes like follows:
  * 
  * Preparation:
  * <ol>
  * <li>Query the solr server for the number of documents (say, N)</li>
- * <li>Partition N among M map tasks. For example, if we have two map tasks
- * the first map task will deal with solr documents from 0 - (N / 2 - 1) and
- * the second will deal with documents from (N / 2) to (N - 1).</li>
+ * <li>Partition N among M map tasks. For example, if we have two map tasks the
+ * first map task will deal with solr documents from 0 - (N / 2 - 1) and the
+ * second will deal with documents from (N / 2) to (N - 1).</li>
  * </ol>
  * 
  * MapReduce:
  * <ul>
- * <li>Map: Identity map where keys are digests and values are {@link SolrRecord}
- * instances(which contain id, boost and timestamp)</li>
+ * <li>Map: Identity map where keys are digests and values are
+ * {@link SolrRecord} instances(which contain id, boost and timestamp)</li>
  * <li>Reduce: After map, {@link SolrRecord}s with the same digest will be
- * grouped together. Now, of these documents with the same digests, delete
- * all of them except the one with the highest score (boost field). If two
- * (or more) documents have the same score, then the document with the latest
- * timestamp is kept. Again, every other is deleted from solr index.
- * </li>
+ * grouped together. Now, of these documents with the same digests, delete all
+ * of them except the one with the highest score (boost field). If two (or more)
+ * documents have the same score, then the document with the latest timestamp is
+ * kept. Again, every other is deleted from solr index.</li>
  * </ul>
  * 
- * Note that we assume that two documents in
- * a solr index will never have the same URL. So this class only deals with
- * documents with <b>different</b> URLs but the same digest. 
+ * Note that we assume that two documents in a solr index will never have the
+ * same URL. So this class only deals with documents with <b>different</b> URLs
+ * but the same digest.
  */
 public class SolrDeleteDuplicates
-extends Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
-implements Tool {
+    extends
+    Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
+    implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SolrDeleteDuplicates.class);
 
-  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD + ":[* TO *]";
+  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD
+      + ":[* TO *]";
 
   private static final int NUM_MAX_DELETE_REQUEST = 1000;
 
@@ -96,7 +98,8 @@
     private long tstamp;
     private String id;
 
-    public SolrRecord() { }
+    public SolrRecord() {
+    }
 
     public SolrRecord(String id, float boost, long tstamp) {
       this.id = id;
@@ -117,10 +120,10 @@
     }
 
     public void readSolrDocument(SolrDocument doc) {
-      id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
-      boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
+      id = (String) doc.getFieldValue(SolrConstants.ID_FIELD);
+      boost = (Float) doc.getFieldValue(SolrConstants.BOOST_FIELD);
 
-      Date buffer = (Date)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+      Date buffer = (Date) doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
       tstamp = buffer.getTime();
     }
 
@@ -136,7 +139,7 @@
       Text.writeString(out, id);
       out.writeFloat(boost);
       out.writeLong(tstamp);
-    } 
+    }
   }
 
   public static class SolrInputSplit extends InputSplit implements Writable {
@@ -144,7 +147,8 @@
     private int docBegin;
     private int numDocs;
 
-    public SolrInputSplit() { }
+    public SolrInputSplit() {
+    }
 
     public SolrInputSplit(int docBegin, int numDocs) {
       this.docBegin = docBegin;
@@ -162,7 +166,7 @@
 
     @Override
     public String[] getLocations() throws IOException {
-      return new String[] {} ;
+      return new String[] {};
     }
 
     @Override
@@ -175,9 +179,9 @@
     public void write(DataOutput out) throws IOException {
       out.writeInt(docBegin);
       out.writeInt(numDocs);
-    } 
+    }
   }
-  
+
   public static class SolrRecordReader extends RecordReader<Text, SolrRecord> {
 
     private int currentDoc = 0;
@@ -185,21 +189,22 @@
     private Text text;
     private SolrRecord record;
     private SolrDocumentList solrDocs;
-    
+
     public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
       this.solrDocs = solrDocs;
       this.numDocs = numDocs;
     }
-    
+
     @Override
     public void initialize(InputSplit split, TaskAttemptContext context)
         throws IOException, InterruptedException {
       text = new Text();
-      record = new SolrRecord();   
+      record = new SolrRecord();
     }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {
+    }
 
     @Override
     public float getProgress() throws IOException {
@@ -231,14 +236,14 @@
       currentDoc++;
       return true;
     }
-   
+
   };
 
   public static class SolrInputFormat extends InputFormat<Text, SolrRecord> {
-    
+
     @Override
-    public List<InputSplit> getSplits(JobContext context)
-    throws IOException, InterruptedException {
+    public List<InputSplit> getSplits(JobContext context) throws IOException,
+        InterruptedException {
       Configuration conf = context.getConfiguration();
       int numSplits = context.getNumReduceTasks();
       SolrServer solr = SolrUtils.getHttpSolrServer(conf);
@@ -254,8 +259,8 @@
         throw new IOException(e);
       }
 
-      int numResults = (int)response.getResults().getNumFound();
-      int numDocsPerSplit = (numResults / numSplits); 
+      int numResults = (int) response.getResults().getNumFound();
+      int numDocsPerSplit = (numResults / numSplits);
       int currentDoc = 0;
       List<InputSplit> splits = new ArrayList<InputSplit>();
       for (int i = 0; i < numSplits - 1; i++) {
@@ -274,11 +279,10 @@
       SolrServer solr = SolrUtils.getHttpSolrServer(conf);
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = (int) solrSplit.getLength();
-      
+
       SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD, SolrConstants.BOOST_FIELD,
-                          SolrConstants.TIMESTAMP_FIELD,
-                          SolrConstants.DIGEST_FIELD);
+          SolrConstants.TIMESTAMP_FIELD, SolrConstants.DIGEST_FIELD);
       solrQuery.setStart(solrSplit.getDocBegin());
       solrQuery.setRows(numDocs);
 
@@ -318,7 +322,6 @@
     solr = SolrUtils.getHttpSolrServer(conf);
   }
 
-
   @Override
   public void cleanup(Context context) throws IOException {
     try {
@@ -334,14 +337,14 @@
 
   @Override
   public void reduce(Text key, Iterable<SolrRecord> values, Context context)
-  throws IOException {
+      throws IOException {
     Iterator<SolrRecord> iterator = values.iterator();
     SolrRecord recordToKeep = iterator.next();
     while (iterator.hasNext()) {
       SolrRecord solrRecord = iterator.next();
-      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
-          (solrRecord.getBoost() == recordToKeep.getBoost() && 
-              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
+      if (solrRecord.getBoost() > recordToKeep.getBoost()
+          || (solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord
+              .getTstamp() > recordToKeep.getTstamp())) {
         updateRequest.deleteById(recordToKeep.id);
         recordToKeep = solrRecord;
       } else {
@@ -360,13 +363,13 @@
     }
   }
 
-  public boolean dedup(String solrUrl)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public boolean dedup(String solrUrl) throws IOException,
+      InterruptedException, ClassNotFoundException {
     LOG.info("SolrDeleteDuplicates: starting...");
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
-    
+
     getConf().set(SolrConstants.SERVER_URL, solrUrl);
-    
+
     Job job = new Job(getConf(), "solrdedup");
 
     job.setInputFormatClass(SolrInputFormat.class);
@@ -376,11 +379,11 @@
     job.setMapperClass(Mapper.class);
     job.setReducerClass(SolrDeleteDuplicates.class);
 
-    return job.waitForCompletion(true);    
+    return job.waitForCompletion(true);
   }
 
-  public int run(String[] args)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public int run(String[] args) throws IOException, InterruptedException,
+      ClassNotFoundException {
     if (args.length != 1) {
       System.err.println("Usage: SolrDeleteDuplicates <solr url>");
       return 1;
Index: src/java/org/apache/nutch/indexer/solr/SolrUtils.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(working copy)
@@ -1,6 +1,5 @@
 package org.apache.nutch.indexer.solr;
 
-
 import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
@@ -18,7 +17,8 @@
 
   public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class);
 
-  public static HttpSolrServer getHttpSolrServer(Configuration job) throws MalformedURLException {
+  public static HttpSolrServer getHttpSolrServer(Configuration job)
+      throws MalformedURLException {
     DefaultHttpClient client = new DefaultHttpClient();
 
     // Check for username/password
@@ -27,10 +27,13 @@
 
       LOG.info("Authenticating as: " + username);
 
-      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT,
+          AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
 
-      client.getCredentialsProvider().setCredentials(scope, 
-          new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+      client.getCredentialsProvider().setCredentials(
+          scope,
+          new UsernamePasswordCredentials(username, job
+              .get(SolrConstants.PASSWORD)));
 
       HttpParams params = client.getParams();
       HttpClientParams.setAuthenticating(params, true);
@@ -48,12 +51,14 @@
     for (int i = 0; i < input.length(); i++) {
       ch = input.charAt(i);
 
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
+      // Strip all non-characters
+      // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line and
+      // carriage return
       if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
-              ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
-              (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
-              (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+          ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+          (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+          (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
 
         retval.append(ch);
       }
Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(working copy)
@@ -16,21 +16,20 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Creative Commons properties names.
- *
+ * 
  * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface CreativeCommons {
-  
+
   public final static String LICENSE_URL = "License-Url";
-  
+
   public final static String LICENSE_LOCATION = "License-Location";
-  
+
   public final static String WORK_TYPE = "Work-Type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(working copy)
@@ -16,62 +16,60 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Dublin Core metadata names.
- *
- * @see <a href="http://dublincore.org">dublincore.org</a> 
- *
+ * 
+ * @see <a href="http://dublincore.org">dublincore.org</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface DublinCore {
-  
-    
+
   /**
-   * Typically, Format may include the media-type or dimensions of the
-   * resource. Format may be used to determine the software, hardware or other
-   * equipment needed to display or operate the resource. Examples of
-   * dimensions include size and duration. Recommended best practice is to
-   * select a value from a controlled vocabulary (for example, the list of
-   * Internet Media Types [MIME] defining computer media formats).
+   * Typically, Format may include the media-type or dimensions of the resource.
+   * Format may be used to determine the software, hardware or other equipment
+   * needed to display or operate the resource. Examples of dimensions include
+   * size and duration. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+   * defining computer media formats).
    */
   public static final String FORMAT = "format";
-  
+
   /**
-   * Recommended best practice is to identify the resource by means of a
-   * string or number conforming to a formal identification system. Example
-   * formal identification systems include the Uniform Resource Identifier
-   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Recommended best practice is to identify the resource by means of a string
+   * or number conforming to a formal identification system. Example formal
+   * identification systems include the Uniform Resource Identifier (URI)
+   * (including the Uniform Resource Locator (URL)), the Digital Object
    * Identifier (DOI) and the International Standard Book Number (ISBN).
    */
   public static final String IDENTIFIER = "identifier";
-  
+
   /**
    * Date on which the resource was changed.
    */
   public static final String MODIFIED = "modified";
-  
+
   /**
    * An entity responsible for making contributions to the content of the
-   * resource. Examples of a Contributor include a person, an organisation, or
-   * a service. Typically, the name of a Contributor should be used to
-   * indicate the entity.
+   * resource. Examples of a Contributor include a person, an organisation, or a
+   * service. Typically, the name of a Contributor should be used to indicate
+   * the entity.
    */
   public static final String CONTRIBUTOR = "contributor";
-  
+
   /**
-   * The extent or scope of the content of the resource. Coverage will
-   * typically include spatial location (a place name or geographic
-   * coordinates), temporal period (a period label, date, or date range) or
-   * jurisdiction (such as a named administrative entity). Recommended best
-   * practice is to select a value from a controlled vocabulary (for example,
-   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
-   * named places or time periods be used in preference to numeric identifiers
-   * such as sets of coordinates or date ranges.
+   * The extent or scope of the content of the resource. Coverage will typically
+   * include spatial location (a place name or geographic coordinates), temporal
+   * period (a period label, date, or date range) or jurisdiction (such as a
+   * named administrative entity). Recommended best practice is to select a
+   * value from a controlled vocabulary (for example, the Thesaurus of
+   * Geographic Names [TGN]) and that, where appropriate, named places or time
+   * periods be used in preference to numeric identifiers such as sets of
+   * coordinates or date ranges.
    */
   public static final String COVERAGE = "coverage";
-  
+
   /**
    * An entity primarily responsible for making the content of the resource.
    * Examples of a Creator include a person, an organisation, or a service.
@@ -78,16 +76,15 @@
    * Typically, the name of a Creator should be used to indicate the entity.
    */
   public static final String CREATOR = "creator";
-  
+
   /**
    * A date associated with an event in the life cycle of the resource.
-   * Typically, Date will be associated with the creation or availability of
-   * the resource. Recommended best practice for encoding the date value is
-   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
-   * format.
+   * Typically, Date will be associated with the creation or availability of the
+   * resource. Recommended best practice for encoding the date value is defined
+   * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
    */
   public static final String DATE = "date";
-  
+
   /**
    * An account of the content of the resource. Description may include but is
    * not limited to: an abstract, table of contents, reference to a graphical
@@ -94,16 +91,16 @@
    * representation of content or a free-text account of the content.
    */
   public static final String DESCRIPTION = "description";
-  
+
   /**
    * A language of the intellectual content of the resource. Recommended best
    * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
-   * [ISO639], defines two- and three-letter primary language tags with
-   * optional subtags. Examples include "en" or "eng" for English, "akk" for
-   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   * [ISO639], defines two- and three-letter primary language tags with optional
+   * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+   * and "en-GB" for English used in the United Kingdom.
    */
   public static final String LANGUAGE = "language";
-  
+
   /**
    * An entity responsible for making the resource available. Examples of a
    * Publisher include a person, an organisation, or a service. Typically, the
@@ -110,7 +107,7 @@
    * name of a Publisher should be used to indicate the entity.
    */
   public static final String PUBLISHER = "publisher";
-  
+
   /**
    * A reference to a related resource. Recommended best practice is to
    * reference the resource by means of a string or number conforming to a
@@ -117,48 +114,48 @@
    * formal identification system.
    */
   public static final String RELATION = "relation";
-  
+
   /**
-   * Information about rights held in and over the resource. Typically, a
-   * Rights element will contain a rights management statement for the
-   * resource, or reference a service providing such information. Rights
-   * information often encompasses Intellectual Property Rights (IPR),
-   * Copyright, and various Property Rights. If the Rights element is absent,
-   * no assumptions can be made about the status of these and other rights
-   * with respect to the resource.
+   * Information about rights held in and over the resource. Typically, a Rights
+   * element will contain a rights management statement for the resource, or
+   * reference a service providing such information. Rights information often
+   * encompasses Intellectual Property Rights (IPR), Copyright, and various
+   * Property Rights. If the Rights element is absent, no assumptions can be
+   * made about the status of these and other rights with respect to the
+   * resource.
    */
   public static final String RIGHTS = "rights";
-  
+
   /**
    * A reference to a resource from which the present resource is derived. The
    * present resource may be derived from the Source resource in whole or in
-   * part. Recommended best practice is to reference the resource by means of
-   * a string or number conforming to a formal identification system.
+   * part. Recommended best practice is to reference the resource by means of a
+   * string or number conforming to a formal identification system.
    */
   public static final String SOURCE = "source";
-  
+
   /**
    * The topic of the content of the resource. Typically, a Subject will be
-   * expressed as keywords, key phrases or classification codes that describe
-   * a topic of the resource. Recommended best practice is to select a value
-   * from a controlled vocabulary or formal classification scheme.
+   * expressed as keywords, key phrases or classification codes that describe a
+   * topic of the resource. Recommended best practice is to select a value from
+   * a controlled vocabulary or formal classification scheme.
    */
   public static final String SUBJECT = "subject";
-  
+
   /**
    * A name given to the resource. Typically, a Title will be a name by which
    * the resource is formally known.
    */
   public static final String TITLE = "title";
-  
+
   /**
    * The nature or genre of the content of the resource. Type includes terms
-   * describing general categories, functions, genres, or aggregation levels
-   * for content. Recommended best practice is to select a value from a
-   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
-   * To describe the physical or digital manifestation of the resource, use
-   * the Format element.
+   * describing general categories, functions, genres, or aggregation levels for
+   * content. Recommended best practice is to select a value from a controlled
+   * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+   * the physical or digital manifestation of the resource, use the Format
+   * element.
    */
   public static final String TYPE = "type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(working copy)
@@ -16,14 +16,12 @@
  */
 package org.apache.nutch.metadata;
 
-
-
 /**
  * A collection of HTTP header names.
- *
- * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
- *      Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
+ * 
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol
+ *      -- HTTP/1.1 (RFC 2616)</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/metadata/MetaWrapper.java
===================================================================
--- src/java/org/apache/nutch/metadata/MetaWrapper.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/MetaWrapper.java	(working copy)
@@ -28,28 +28,29 @@
 /**
  * This is a simple decorator that adds metadata to any Writable-s that can be
  * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be
- * temporarily enriched during processing, but this
- * temporary metadata doesn't need to be permanently stored after the job is done.
+ * temporarily enriched during processing, but this temporary metadata doesn't
+ * need to be permanently stored after the job is done.
  * 
  * @author Andrzej Bialecki
  */
 public class MetaWrapper extends NutchWritable {
   private Metadata metadata;
-  
+
   public MetaWrapper() {
     super();
     metadata = new Metadata();
   }
-  
+
   public MetaWrapper(Writable instance, Configuration conf) {
     super(instance);
     metadata = new Metadata();
     setConf(conf);
   }
-  
+
   public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) {
     super(instance);
-    if (metadata == null) metadata = new Metadata();
+    if (metadata == null)
+      metadata = new Metadata();
     this.metadata = metadata;
     setConf(conf);
   }
@@ -60,18 +61,24 @@
   public Metadata getMetadata() {
     return metadata;
   }
-  
+
   /**
-   * Add metadata. See {@link Metadata#add(String, String)} for more information.
-   * @param name metadata name
-   * @param value metadata value
+   * Add metadata. See {@link Metadata#add(String, String)} for more
+   * information.
+   * 
+   * @param name
+   *          metadata name
+   * @param value
+   *          metadata value
    */
   public void addMeta(String name, String value) {
     metadata.add(name, value);
   }
-  
+
   /**
-   * Set metadata. See {@link Metadata#set(String, String)} for more information.
+   * Set metadata. See {@link Metadata#set(String, String)} for more
+   * information.
+   * 
    * @param name
    * @param value
    */
@@ -78,9 +85,10 @@
   public void setMeta(String name, String value) {
     metadata.set(name, value);
   }
-  
+
   /**
    * Get metadata. See {@link Metadata#get(String)} for more information.
+   * 
    * @param name
    * @return metadata value
    */
@@ -87,9 +95,11 @@
   public String getMeta(String name) {
     return metadata.get(name);
   }
-  
+
   /**
-   * Get multiple metadata. See {@link Metadata#getValues(String)} for more information.
+   * Get multiple metadata. See {@link Metadata#getValues(String)} for more
+   * information.
+   * 
    * @param name
    * @return multiple values
    */
@@ -96,7 +106,7 @@
   public String[] getMetaValues(String name) {
     return metadata.getValues(name);
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
     metadata = new Metadata();
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(working copy)
@@ -27,16 +27,15 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-
 /**
  * A multi-valued metadata container.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  */
-public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Feed {
+public class Metadata implements Writable, CreativeCommons, DublinCore,
+    HttpHeaders, Nutch, Feed {
 
   /**
    * A map of all metadata attributes.
@@ -43,7 +42,6 @@
    */
   private Map<String, String[]> metadata = null;
 
-
   /**
    * Constructs a new, empty metadata.
    */
@@ -53,9 +51,10 @@
 
   /**
    * Returns true if named value is multivalued.
-   * @param name name of metadata
-   * @return true is named value is multivalued, false if single
-   * value or null
+   * 
+   * @param name
+   *          name of metadata
+   * @return true is named value is multivalued, false if single value or null
    */
   public boolean isMultiValued(final String name) {
     return metadata.get(name) != null && metadata.get(name).length > 1;
@@ -63,6 +62,7 @@
 
   /**
    * Returns an array of the names contained in the metadata.
+   * 
    * @return Metadata names
    */
   public String[] names() {
@@ -70,11 +70,11 @@
   }
 
   /**
-   * Get the value associated to a metadata name.
-   * If many values are assiociated to the specified name, then the first
-   * one is returned.
-   *
-   * @param name of the metadata.
+   * Get the value associated to a metadata name. If many values are assiociated
+   * to the specified name, then the first one is returned.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the value associated to the specified metadata name.
    */
   public String get(final String name) {
@@ -88,13 +88,15 @@
 
   /**
    * Get the values associated to a metadata name.
-   * @param name of the metadata.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the values associated to a metadata name.
    */
   public String[] getValues(final String name) {
     return _getValues(name);
   }
-  
+
   private String[] _getValues(final String name) {
     String[] values = metadata.get(name);
     if (values == null) {
@@ -104,12 +106,13 @@
   }
 
   /**
-   * Add a metadata name/value mapping.
-   * Add the specified value to the list of values associated to the
-   * specified metadata name.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Add a metadata name/value mapping. Add the specified value to the list of
+   * values associated to the specified metadata name.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void add(final String name, final String value) {
     String[] values = metadata.get(name);
@@ -125,31 +128,37 @@
 
   /**
    * Copy All key-value pairs from properties.
-   * @param properties properties to copy from
+   * 
+   * @param properties
+   *          properties to copy from
    */
   public void setAll(Properties properties) {
     Enumeration<?> names = properties.propertyNames();
     while (names.hasMoreElements()) {
       String name = (String) names.nextElement();
-      metadata.put(name, new String[]{properties.getProperty(name)});
+      metadata.put(name, new String[] { properties.getProperty(name) });
     }
   }
 
   /**
-   * Set metadata name/value.
-   * Associate the specified value to the specified metadata name. If some
-   * previous values were associated to this name, they are removed.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Set metadata name/value. Associate the specified value to the specified
+   * metadata name. If some previous values were associated to this name, they
+   * are removed.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void set(String name, String value) {
-    metadata.put(name, new String[]{value});
+    metadata.put(name, new String[] { value });
   }
 
   /**
    * Remove a metadata and all its associated values.
-   * @param name metadata name to remove
+   * 
+   * @param name
+   *          metadata name to remove
    */
   public void remove(String name) {
     metadata.remove(name);
@@ -157,12 +166,13 @@
 
   /**
    * Returns the number of metadata names in this metadata.
+   * 
    * @return number of metadata names
    */
   public int size() {
     return metadata.size();
   }
-  
+
   /** Remove all mappings from metadata. */
   public void clear() {
     metadata.clear();
@@ -170,7 +180,9 @@
 
   public boolean equals(Object o) {
 
-    if (o == null) { return false; }
+    if (o == null) {
+      return false;
+    }
 
     Metadata other = null;
     try {
@@ -179,7 +191,9 @@
       return false;
     }
 
-    if (other.size() != size()) { return false; }
+    if (other.size() != size()) {
+      return false;
+    }
 
     String[] names = names();
     for (int i = 0; i < names.length; i++) {
@@ -203,10 +217,7 @@
     for (int i = 0; i < names.length; i++) {
       String[] values = _getValues(names[i]);
       for (int j = 0; j < values.length; j++) {
-        buf.append(names[i])
-           .append("=")
-           .append(values[j])
-           .append(" ");
+        buf.append(names[i]).append("=").append(values[j]).append(" ");
       }
     }
     return buf.toString();
Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(working copy)
@@ -19,20 +19,17 @@
 import org.apache.avro.util.Utf8;
 import org.apache.hadoop.io.Text;
 
-
 /**
  * A collection of Nutch internal metadata constants.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Nutch {
 
-  public static final String ORIGINAL_CHAR_ENCODING =
-          "OriginalCharEncoding";
+  public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding";
 
-  public static final String CHAR_ENCODING_FOR_CONVERSION =
-          "CharEncodingForConversion";
+  public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion";
 
   public static final String SIGNATURE_KEY = "nutch.content.digest";
 
@@ -42,20 +39,26 @@
 
   public static final String GENERATE_TIME_KEY = "_ngt_";
 
-  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY);
+  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
+      GENERATE_TIME_KEY);
 
   public static final String PROTO_STATUS_KEY = "_pst_";
 
-  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY);
+  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(
+      PROTO_STATUS_KEY);
 
   public static final String FETCH_TIME_KEY = "_ftk_";
 
   public static final String FETCH_STATUS_KEY = "_fst_";
 
-  /** Sites may request that search engines don't provide access to cached documents. */
+  /**
+   * Sites may request that search engines don't provide access to cached
+   * documents.
+   */
   public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden";
 
-  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(CACHING_FORBIDDEN_KEY);
+  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(
+      CACHING_FORBIDDEN_KEY);
 
   /** Show both original forbidden content and summaries (default). */
   public static final String CACHING_FORBIDDEN_NONE = "none";
@@ -75,8 +78,7 @@
   public static final Utf8 ALL_CRAWL_ID = new Utf8(ALL_BATCH_ID_STR);
 
   public static final String CRAWL_ID_KEY = "storage.crawl.id";
-  
-  
+
   // short constants for cmd-line args
   /** Batch id to select. */
   public static final String ARG_BATCH = "batch";
@@ -110,7 +112,7 @@
   public static final String ARG_CLASS = "class";
   /** Depth (number of cycles) of a crawl. */
   public static final String ARG_DEPTH = "depth";
-  
+
   // short constants for status / results fields
   /** Status / result message. */
   public static final String STAT_MESSAGE = "msg";
Index: src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(revision 1650444)
+++ src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(working copy)
@@ -33,7 +33,7 @@
 
   /**
    * Treshold divider.
-   *
+   * 
    * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code>
    */
   private static final int TRESHOLD_DIVIDER = 3;
@@ -52,7 +52,7 @@
 
     // Uses following array to fill the metanames index and the
     // metanames list.
-    Class<?>[] spellthese = {HttpHeaders.class};
+    Class<?>[] spellthese = { HttpHeaders.class };
 
     for (Class<?> spellCheckedNames : spellthese) {
       for (Field field : spellCheckedNames.getFields()) {
@@ -73,7 +73,7 @@
 
   /**
    * Normalizes String.
-   *
+   * 
    * @param str
    *          the string to normalize
    * @return normalized String
@@ -102,7 +102,7 @@
    * </ul>
    * If no matching with a well-known metadata name is found, then the original
    * name is returned.
-   *
+   * 
    * @param name
    *          Name to normalize
    * @return normalized name
Index: src/java/org/apache/nutch/net/URLFilter.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilter.java	(working copy)
@@ -23,10 +23,9 @@
 // Nutch imports
 import org.apache.nutch.plugin.Pluggable;
 
-
 /**
- * Interface used to limit which URLs enter Nutch.
- * Used by the injector and the db updater.
+ * Interface used to limit which URLs enter Nutch. Used by the injector and the
+ * db updater.
  */
 
 public interface URLFilter extends Pluggable, Configurable {
@@ -33,7 +32,9 @@
   /** The name of the extension point. */
   public final static String X_POINT_ID = URLFilter.class.getName();
 
-  /* Interface for a filter that transforms a URL: it can pass the
-     original URL through or "delete" the URL by returning null */
+  /*
+   * Interface for a filter that transforms a URL: it can pass the original URL
+   * through or "delete" the URL by returning null
+   */
   public String filter(String urlString);
 }
Index: src/java/org/apache/nutch/net/URLFilterChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilterChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilterChecker.java	(working copy)
@@ -38,23 +38,23 @@
   private Configuration conf;
 
   public URLFilterChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String filterName) throws Exception {
     URLFilter filter = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLFilter.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLFilter.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      filter = (URLFilter)extension.getExtensionInstance();
+      filter = (URLFilter) extension.getExtensionInstance();
       if (filter.getClass().getName().equals(filterName)) {
         break;
       } else {
@@ -63,19 +63,19 @@
     }
 
     if (filter == null)
-      throw new RuntimeException("Filter "+filterName+" not found.");
+      throw new RuntimeException("Filter " + filterName + " not found.");
 
     // jerome : should we keep this behavior?
-    //if (LogFormatter.hasLoggedSevere())
-    //  throw new RuntimeException("Severe error encountered.");
+    // if (LogFormatter.hasLoggedSevere())
+    // throw new RuntimeException("Severe error encountered.");
 
-    System.out.println("Checking URLFilter "+filterName);
+    System.out.println("Checking URLFilter " + filterName);
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
@@ -90,10 +90,10 @@
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       URLFilters filters = new URLFilters(this.conf);
       String out = filters.filter(line);
-      if(out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/java/org/apache/nutch/net/URLFilters.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLFilters.java	(working copy)
@@ -28,7 +28,8 @@
 import org.apache.nutch.util.ObjectCache;
 
 import org.apache.hadoop.conf.Configuration;
-/** Creates and caches {@link URLFilter} implementing plugins.*/
+
+/** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
@@ -37,7 +38,8 @@
   public URLFilters(Configuration conf) {
     String order = conf.get(URLFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,8 +62,8 @@
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(URLFilter.class.getName(), filterMap.values().toArray(
-              new URLFilter[0]));
+          objectCache.setObject(URLFilter.class.getName(), filterMap.values()
+              .toArray(new URLFilter[0]));
         } else {
           ArrayList<URLFilter> filters = new ArrayList<URLFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -70,13 +72,14 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(URLFilter.class.getName(), filters
-              .toArray(new URLFilter[filters.size()]));
+          objectCache.setObject(URLFilter.class.getName(),
+              filters.toArray(new URLFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+          .getName());
     }
   }
 
Index: src/java/org/apache/nutch/net/URLNormalizer.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizer.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizer.java	(working copy)
@@ -21,13 +21,17 @@
 
 import org.apache.hadoop.conf.Configurable;
 
-/** Interface used to convert URLs to normal form and optionally perform substitutions */
+/**
+ * Interface used to convert URLs to normal form and optionally perform
+ * substitutions
+ */
 public interface URLNormalizer extends Configurable {
-  
+
   /* Extension ID */
   public static final String X_POINT_ID = URLNormalizer.class.getName();
-  
+
   /* Interface for URL normalization */
-  public String normalize(String urlString, String scope) throws MalformedURLException;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException;
 
 }
Index: src/java/org/apache/nutch/net/URLNormalizerChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizerChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizerChecker.java	(working copy)
@@ -36,23 +36,23 @@
   private Configuration conf;
 
   public URLNormalizerChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String normalizerName, String scope) throws Exception {
     URLNormalizer normalizer = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLNormalizer.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      normalizer = (URLNormalizer)extension.getExtensionInstance();
+      normalizer = (URLNormalizer) extension.getExtensionInstance();
       if (normalizer.getClass().getName().equals(normalizerName)) {
         break;
       } else {
@@ -61,7 +61,8 @@
     }
 
     if (normalizer == null)
-      throw new RuntimeException("URLNormalizer "+normalizerName+" not found.");
+      throw new RuntimeException("URLNormalizer " + normalizerName
+          + " not found.");
 
     System.out.println("Checking URLNormalizer " + normalizerName);
 
@@ -79,7 +80,7 @@
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
     URLNormalizers normalizers = new URLNormalizers(conf, scope);
-    while((line = in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       String out = normalizers.normalize(line, scope);
       System.out.println(out);
     }
@@ -88,7 +89,7 @@
   public static void main(String[] args) throws Exception {
 
     String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
-      + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
+        + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
 
     String normalizerName = null;
     String scope = URLNormalizers.SCOPE_DEFAULT;
@@ -103,7 +104,8 @@
       }
     }
 
-    URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create());
+    URLNormalizerChecker checker = new URLNormalizerChecker(
+        NutchConfiguration.create());
     if (normalizerName != null) {
       checker.checkOne(normalizerName, scope);
     } else {
Index: src/java/org/apache/nutch/net/URLNormalizers.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizers.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/URLNormalizers.java	(working copy)
@@ -43,40 +43,55 @@
  * contexts where they are used (note however that they need to be activated
  * first through <tt>plugin.include</tt> property).
  * 
- * <p>There is one global scope defined by default, which consists of all
- * active normalizers. The order in which these normalizers
- * are executed may be defined in "urlnormalizer.order" property, which lists
- * space-separated implementation classes (if this property is missing normalizers
- * will be run in random order). If there are more
- * normalizers activated than explicitly named on this list, the remaining ones
- * will be run in random order after the ones specified on the list are executed.</p>
- * <p>You can define a set of contexts (or scopes) in which normalizers may be
+ * <p>
+ * There is one global scope defined by default, which consists of all active
+ * normalizers. The order in which these normalizers are executed may be defined
+ * in "urlnormalizer.order" property, which lists space-separated implementation
+ * classes (if this property is missing normalizers will be run in random
+ * order). If there are more normalizers activated than explicitly named on this
+ * list, the remaining ones will be run in random order after the ones specified
+ * on the list are executed.
+ * </p>
+ * <p>
+ * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
  * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
  * "urlnormalizer.order.<scope_name>" property). If any of these properties are
- * missing, default settings are used for the global scope.</p>
- * <p>In case no normalizers are required for any given scope, a
- * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p>
- * <p>Each normalizer may further select among many configurations, depending on
- * the scope in which it is called, because the scope name is passed as a parameter
- * to each normalizer. You can also use the same normalizer for many scopes.</p>
- * <p>Several scopes have been defined, and various Nutch tools will attempt using
- * scope-specific normalizers first (and fall back to default config if scope-specific
- * configuration is missing).</p>
- * <p>Normalizers may be run several times, to ensure that modifications introduced
+ * missing, default settings are used for the global scope.
+ * </p>
+ * <p>
+ * In case no normalizers are required for any given scope, a
+ * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should
+ * be used.
+ * </p>
+ * <p>
+ * Each normalizer may further select among many configurations, depending on
+ * the scope in which it is called, because the scope name is passed as a
+ * parameter to each normalizer. You can also use the same normalizer for many
+ * scopes.
+ * </p>
+ * <p>
+ * Several scopes have been defined, and various Nutch tools will attempt using
+ * scope-specific normalizers first (and fall back to default config if
+ * scope-specific configuration is missing).
+ * </p>
+ * <p>
+ * Normalizers may be run several times, to ensure that modifications introduced
  * by normalizers at the end of the list can be further reduced by normalizers
- * executed at the beginning. By default this loop is executed just once - if you want
- * to ensure that all possible combinations have been applied you may want to run
- * this loop up to the number of activated normalizers. This loop count can be configured
- * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is
- * unchanged the loop will stop and return the result.</p>
+ * executed at the beginning. By default this loop is executed just once - if
+ * you want to ensure that all possible combinations have been applied you may
+ * want to run this loop up to the number of activated normalizers. This loop
+ * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
+ * As soon as the url is unchanged the loop will stop and return the result.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
 public final class URLNormalizers {
-  
-  /** Default scope. If no scope properties are defined then the configuration for
-   * this scope will be used.
+
+  /**
+   * Default scope. If no scope properties are defined then the configuration
+   * for this scope will be used.
    */
   public static final String SCOPE_DEFAULT = "default";
   /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */
@@ -83,7 +98,8 @@
   public static final String SCOPE_PARTITION = "partition";
   /** Scope used by {@link org.apache.nutch.crawl.GeneratorJob}. */
   public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
-  /** Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
+  /**
+   * Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
    * redirect URLs.
    */
   public static final String SCOPE_FETCHER = "fetcher";
@@ -93,15 +109,18 @@
   public static final String SCOPE_LINKDB = "linkdb";
   /** Scope used by {@link org.apache.nutch.crawl.InjectorJob}. */
   public static final String SCOPE_INJECT = "inject";
-  /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */
+  /**
+   * Scope used when constructing new {@link org.apache.nutch.parse.Outlink}
+   * instances.
+   */
   public static final String SCOPE_OUTLINK = "outlink";
-  
 
-  public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(URLNormalizers.class);
 
   /* Empty extension list for caching purposes. */
   private final List<Extension> EMPTY_EXTENSION_LIST = Collections.emptyList();
-  
+
   private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
 
   private Configuration conf;
@@ -109,37 +128,39 @@
   private ExtensionPoint extensionPoint;
 
   private URLNormalizer[] normalizers;
-  
+
   private int loopCount;
 
   public URLNormalizers(Configuration conf, String scope) {
     this.conf = conf;
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
-            URLNormalizer.X_POINT_ID);
+        URLNormalizer.X_POINT_ID);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (this.extensionPoint == null) {
       throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
-              + " not found.");
+          + " not found.");
     }
 
-    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
+    normalizers = (URLNormalizer[]) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_" + scope);
     if (normalizers == null) {
       normalizers = getURLNormalizers(scope);
     }
     if (normalizers == EMPTY_NORMALIZERS) {
-      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
+      normalizers = (URLNormalizer[]) objectCache
+          .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
       if (normalizers == null) {
         normalizers = getURLNormalizers(SCOPE_DEFAULT);
       }
     }
-    
+
     loopCount = conf.getInt("urlnormalizer.loop.count", 1);
   }
 
   /**
-   * Function returns an array of {@link URLNormalizer}s for a given scope,
-   * with a specified order.
+   * Function returns an array of {@link URLNormalizer}s for a given scope, with
+   * a specified order.
    * 
    * @param scope
    *          The scope to return the <code>Array</code> of
@@ -151,13 +172,14 @@
   URLNormalizer[] getURLNormalizers(String scope) {
     List<Extension> extensions = getExtensions(scope);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (extensions == EMPTY_EXTENSION_LIST) {
       return EMPTY_NORMALIZERS;
     }
-    
-    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());
 
+    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(
+        extensions.size());
+
     Iterator<Extension> it = extensions.iterator();
     while (it.hasNext()) {
       Extension ext = it.next();
@@ -174,14 +196,13 @@
       } catch (PluginRuntimeException e) {
         e.printStackTrace();
         LOG.warn("URLNormalizers:PluginRuntimeException when "
-                + "initializing url normalizer plugin "
-                + ext.getDescriptor().getPluginId()
-                + " instance in getURLNormalizers "
-                + "function: attempting to continue instantiating plugins");
+            + "initializing url normalizer plugin "
+            + ext.getDescriptor().getPluginId()
+            + " instance in getURLNormalizers "
+            + "function: attempting to continue instantiating plugins");
       }
     }
-    return normalizers.toArray(new URLNormalizer[normalizers
-            .size()]);
+    return normalizers.toArray(new URLNormalizer[normalizers.size()]);
   }
 
   /**
@@ -196,9 +217,8 @@
   @SuppressWarnings("unchecked")
   private List<Extension> getExtensions(String scope) {
     ObjectCache objectCache = ObjectCache.get(conf);
-    List<Extension> extensions = 
-      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
-                                                + scope);
+    List<Extension> extensions = (List<Extension>) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope);
 
     // Just compare the reference:
     // if this is the empty list, we know we will find no extension.
@@ -209,11 +229,13 @@
     if (extensions == null) {
       extensions = findExtensions(scope);
       if (extensions != null) {
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            extensions);
       } else {
         // Put the empty extension list into cache
         // to remember we don't know any related extension.
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            EMPTY_EXTENSION_LIST);
         extensions = EMPTY_EXTENSION_LIST;
       }
     }
@@ -233,7 +255,8 @@
 
     String[] orders = null;
     String orderlist = conf.get("urlnormalizer.order." + scope);
-    if (orderlist == null) orderlist = conf.get("urlnormalizer.order");
+    if (orderlist == null)
+      orderlist = conf.get("urlnormalizer.order");
     if (orderlist != null && !orderlist.trim().equals("")) {
       orders = orderlist.split("\\s+");
     }
@@ -271,13 +294,17 @@
 
   /**
    * Normalize
-   * @param urlString The URL string to normalize.
-   * @param scope The given scope.
+   * 
+   * @param urlString
+   *          The URL string to normalize.
+   * @param scope
+   *          The given scope.
    * @return A normalized String, using the given <code>scope</code>
-   * @throws MalformedURLException If the given URL string is malformed.
+   * @throws MalformedURLException
+   *           If the given URL string is malformed.
    */
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     // optionally loop several times, and break if no further changes
     String initialString = urlString;
     for (int k = 0; k < loopCount; k++) {
@@ -286,7 +313,8 @@
           return null;
         urlString = this.normalizers[i].normalize(urlString, scope);
       }
-      if (initialString.equals(urlString)) break;
+      if (initialString.equals(urlString))
+        break;
       initialString = urlString;
     }
     return urlString;
Index: src/java/org/apache/nutch/net/package-info.java
===================================================================
--- src/java/org/apache/nutch/net/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * and {@link org.apache.nutch.net.URLNormalizer normalizers}.
  */
 package org.apache.nutch.net;
+
Index: src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(working copy)
@@ -26,15 +26,15 @@
 
 /**
  * class to handle HTTP dates.
- *
+ * 
  * Modified from FastHttpDateFormat.java in jakarta-tomcat.
- *
+ * 
  * @author John Xing
  */
 public class HttpDateFormat {
 
-  protected static SimpleDateFormat format = 
-    new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+  protected static SimpleDateFormat format = new SimpleDateFormat(
+      "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
   /**
    * HTTP date uses TimeZone GMT
@@ -43,29 +43,29 @@
     format.setTimeZone(TimeZone.getTimeZone("GMT"));
   }
 
-  //HttpDate (long t) {
-  //}
+  // HttpDate (long t) {
+  // }
 
-  //HttpDate (String s) {
-  //}
+  // HttpDate (String s) {
+  // }
 
-//  /**
-//   * Get the current date in HTTP format.
-//   */
-//  public static String getCurrentDate() {
-//
-//    long now = System.currentTimeMillis();
-//    if ((now - currentDateGenerated) > 1000) {
-//        synchronized (format) {
-//            if ((now - currentDateGenerated) > 1000) {
-//                currentDateGenerated = now;
-//                currentDate = format.format(new Date(now));
-//            }
-//        }
-//    }
-//    return currentDate;
-//
-//  }
+  // /**
+  // * Get the current date in HTTP format.
+  // */
+  // public static String getCurrentDate() {
+  //
+  // long now = System.currentTimeMillis();
+  // if ((now - currentDateGenerated) > 1000) {
+  // synchronized (format) {
+  // if ((now - currentDateGenerated) > 1000) {
+  // currentDateGenerated = now;
+  // currentDate = format.format(new Date(now));
+  // }
+  // }
+  // }
+  // return currentDate;
+  //
+  // }
 
   /**
    * Get the HTTP format of the specified date.
Index: src/java/org/apache/nutch/net/protocols/ProtocolException.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/ProtocolException.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/ProtocolException.java	(working copy)
@@ -21,6 +21,7 @@
 
 /**
  * Base exception for all protocol handlers
+ * 
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
 @Deprecated
@@ -27,7 +28,6 @@
 @SuppressWarnings("serial")
 public class ProtocolException extends Exception implements Serializable {
 
-
   public ProtocolException() {
     super();
   }
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -23,12 +23,11 @@
 import org.apache.nutch.metadata.HttpHeaders;
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * A response interface.  Makes all protocols model HTTP.
+ * A response interface. Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
-  
+
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -40,7 +39,7 @@
 
   /** Returns all the headers. */
   public Metadata getHeaders();
-  
+
   /** Returns the full content of the response. */
   public byte[] getContent();
 
Index: src/java/org/apache/nutch/net/protocols/package-info.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/net/protocols/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * interface, sea also {@link org.apache.nutch.protocol}.
  */
 package org.apache.nutch.net.protocols;
+
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(working copy)
@@ -24,8 +24,8 @@
 import org.apache.nutch.metadata.Metadata;
 
 /**
- * This class holds the information about HTML "meta" tags extracted from 
- * a page. Some special tags have convenience methods for easy checking.
+ * This class holds the information about HTML "meta" tags extracted from a
+ * page. Some special tags have convenience methods for easy checking.
  */
 public class HTMLMetaTags {
   private boolean noIndex = false;
@@ -156,8 +156,8 @@
   }
 
   /**
-   * A convenience method. Returns the current value of <code>refreshTime</code>.
-   * The value may be invalid if {@link #getRefresh()}returns
+   * A convenience method. Returns the current value of <code>refreshTime</code>
+   * . The value may be invalid if {@link #getRefresh()}returns
    * <code>false</code>.
    */
   public int getRefreshTime() {
@@ -179,16 +179,12 @@
   public Properties getHttpEquivTags() {
     return httpEquivTags;
   }
-  
+
   public String toString() {
     StringBuffer sb = new StringBuffer();
-    sb.append("base=" + baseHref
-            + ", noCache=" + noCache
-            + ", noFollow=" + noFollow
-            + ", noIndex=" + noIndex
-            + ", refresh=" + refresh
-            + ", refreshHref=" + refreshHref + "\n"
-            );
+    sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow="
+        + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh
+        + ", refreshHref=" + refreshHref + "\n");
     sb.append(" * general tags:\n");
     String[] names = generalTags.names();
     for (String name : names) {
@@ -198,7 +194,7 @@
     sb.append(" * http-equiv tags:\n");
     Iterator<Object> it = httpEquivTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + httpEquivTags.get(key) + "\n");
     }
     return sb.toString();
Index: src/java/org/apache/nutch/parse/Outlink.java
===================================================================
--- src/java/org/apache/nutch/parse/Outlink.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Outlink.java	(working copy)
@@ -28,11 +28,13 @@
   private String toUrl;
   private String anchor;
 
-  public Outlink() {}
+  public Outlink() {
+  }
 
   public Outlink(String toUrl, String anchor) throws MalformedURLException {
     this.toUrl = toUrl;
-    if (anchor == null) anchor = "";
+    if (anchor == null)
+      anchor = "";
     this.anchor = anchor;
   }
 
@@ -43,8 +45,8 @@
 
   /** Skips over one Outlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip toUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip toUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -58,21 +60,24 @@
     return outlink;
   }
 
-  public String getToUrl() { return toUrl; }
-  public String getAnchor() { return anchor; }
+  public String getToUrl() {
+    return toUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof Outlink))
       return false;
-    Outlink other = (Outlink)o;
-    return
-      this.toUrl.equals(other.toUrl) &&
-      this.anchor.equals(other.anchor);
+    Outlink other = (Outlink) o;
+    return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor);
   }
 
   public String toString() {
-    return "toUrl: " + toUrl + " anchor: " + anchor;  // removed "\n". toString, not printLine... WD.
+    return "toUrl: " + toUrl + " anchor: " + anchor; // removed "\n". toString,
+                                                     // not printLine... WD.
   }
 
 }
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java	(working copy)
@@ -34,8 +34,8 @@
 import org.apache.oro.text.regex.Perl5Matcher;
 
 /**
- * Extractor to extract {@link org.apache.nutch.parse.Outlink}s 
- * / URLs from plain text using Regular Expressions.
+ * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from
+ * plain text using Regular Expressions.
  * 
  * @see <a
  *      href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison
@@ -48,7 +48,8 @@
  * @since 0.7
  */
 public class OutlinkExtractor {
-  private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(OutlinkExtractor.class);
 
   /**
    * Regex pattern to get URLs within a plain text.
@@ -55,17 +56,19 @@
    * 
    * @see <a
    *      href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html
+
    *      </a>
    */
-  private static final String URL_PATTERN = 
-    "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
+  private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
 
   /**
-   * Extracts <code>Outlink</code> from given plain text.
-   * Applying this method to non-plain-text can result in extremely lengthy
-   * runtimes for parasitic cases (postscript is a known example).
-   * @param plainText  the plain text from wich URLs should be extracted.
+   * Extracts <code>Outlink</code> from given plain text. Applying this method
+   * to non-plain-text can result in extremely lengthy runtimes for parasitic
+   * cases (postscript is a known example).
    * 
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
   public static Outlink[] getOutlinks(final String plainText, Configuration conf) {
@@ -73,15 +76,18 @@
   }
 
   /**
-   * Extracts <code>Outlink</code> from given plain text and adds anchor
-   * to the extracted <code>Outlink</code>s
+   * Extracts <code>Outlink</code> from given plain text and adds anchor to the
+   * extracted <code>Outlink</code>s
    * 
-   * @param plainText the plain text from wich URLs should be extracted.
-   * @param anchor    the anchor of the url
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * @param anchor
+   *          the anchor of the url
    * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
-  public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
+  public static Outlink[] getOutlinks(final String plainText, String anchor,
+      Configuration conf) {
     long start = System.currentTimeMillis();
     final List<Outlink> outlinks = new ArrayList<Outlink>();
 
@@ -97,11 +103,11 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         // if this is taking too long, stop matching
-        //   (SHOULD really check cpu time used so that heavily loaded systems
-        //   do not unnecessarily hit this limit.)
+        // (SHOULD really check cpu time used so that heavily loaded systems
+        // do not unnecessarily hit this limit.)
         if (System.currentTimeMillis() - start >= 60000L) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("Time limit exceeded for getOutLinks");
@@ -117,13 +123,16 @@
         }
       }
     } catch (Exception ex) {
-      // if the matcher fails (perhaps a malformed URL) we just log it and move on
-      if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
+      // if the matcher fails (perhaps a malformed URL) we just log it and move
+      // on
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getOutlinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -132,7 +141,6 @@
 
     return retval;
   }
-  
 
   /**
    * Extracts outlinks from a plain text. <br />
@@ -162,7 +170,7 @@
     // url = re.getParen(0);
     //
     // if (LOG.isTraceEnabled()) {
-    //   LOG.trace("Extracted url: " + url);
+    // LOG.trace("Extracted url: " + url);
     // }
     //
     // try {
@@ -192,9 +200,8 @@
   }
 
   /**
-   * Extracts outlinks from a plain text.
-   * </p>
-   * This Method takes the JDK5 Regexp API.
+   * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp
+   * API.
    * 
    * @param plainText
    * 
@@ -243,5 +250,5 @@
     //
     // return retval;
   }
- 
+
 }
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -16,7 +16,6 @@
  ******************************************************************************/
 package org.apache.nutch.parse;
 
-
 public class Parse {
 
   private String text;
Index: src/java/org/apache/nutch/parse/ParseCallable.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseCallable.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseCallable.java	(working copy)
@@ -24,7 +24,7 @@
   private Parser p;
   private WebPage content;
   private String url;
-  
+
   public ParseCallable(Parser p, WebPage content, String url) {
     this.p = p;
     this.content = content;
@@ -34,5 +34,5 @@
   @Override
   public Parse call() throws Exception {
     return p.getParse(url, content);
-  }    
+  }
 }
Index: src/java/org/apache/nutch/parse/ParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseFilter.java	(working copy)
@@ -22,18 +22,19 @@
 import org.apache.nutch.storage.WebPage;
 import org.w3c.dom.DocumentFragment;
 
-
-/** Extension point for DOM-based parsers.  Permits one to add additional
- * metadata to parses provided by the html or tika plugins.  All plugins found which implement this extension
- * point are run sequentially on the parse.
+/**
+ * Extension point for DOM-based parsers. Permits one to add additional metadata
+ * to parses provided by the html or tika plugins. All plugins found which
+ * implement this extension point are run sequentially on the parse.
  */
 public interface ParseFilter extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = ParseFilter.class.getName();
 
-  /** Adds metadata or otherwise modifies a parse, given
-   * the DOM tree of a page. */
-  Parse filter(String url, WebPage page, Parse parse,
-                    HTMLMetaTags metaTags, DocumentFragment doc);
+  /**
+   * Adds metadata or otherwise modifies a parse, given the DOM tree of a page.
+   */
+  Parse filter(String url, WebPage page, Parse parse, HTMLMetaTags metaTags,
+      DocumentFragment doc);
 
 }
Index: src/java/org/apache/nutch/parse/ParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseFilters.java	(working copy)
@@ -31,7 +31,7 @@
 import org.apache.nutch.util.ObjectCache;
 import org.w3c.dom.DocumentFragment;
 
-/** Creates and caches {@link ParseFilter} implementing plugins.*/
+/** Creates and caches {@link ParseFilter} implementing plugins. */
 public class ParseFilters {
 
   private ParseFilter[] parseFilters;
@@ -41,7 +41,8 @@
   public ParseFilters(Configuration conf) {
     String order = conf.get(HTMLPARSEFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class
+        .getName());
     if (parseFilters == null) {
       /*
        * If ordered filters are required, prepare array of filters based on
@@ -51,21 +52,23 @@
       if (order != null && !order.trim().equals("")) {
         orderedFilters = order.split("\\s+");
       }
-      HashMap<String, ParseFilter> filterMap =
-        new HashMap<String, ParseFilter>();
+      HashMap<String, ParseFilter> filterMap = new HashMap<String, ParseFilter>();
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ParseFilter.X_POINT_ID);
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ParseFilter.X_POINT_ID);
         if (point == null)
           throw new RuntimeException(ParseFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ParseFilter parseFilter = (ParseFilter) extension.getExtensionInstance();
+          ParseFilter parseFilter = (ParseFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(parseFilter.getClass().getName())) {
             filterMap.put(parseFilter.getClass().getName(), parseFilter);
           }
         }
-        ParseFilter[] htmlParseFilters = filterMap.values().toArray(new ParseFilter[filterMap.size()]);
+        ParseFilter[] htmlParseFilters = filterMap.values().toArray(
+            new ParseFilter[filterMap.size()]);
         /*
          * If no ordered filters required, just get the filters in an
          * indeterminate order
@@ -77,19 +80,19 @@
         else {
           ArrayList<ParseFilter> filters = new ArrayList<ParseFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-            ParseFilter filter = filterMap
-            .get(orderedFilters[i]);
+            ParseFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(ParseFilter.class.getName(), filters
-              .toArray(new ParseFilter[filters.size()]));
+          objectCache.setObject(ParseFilter.class.getName(),
+              filters.toArray(new ParseFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+      this.parseFilters = (ParseFilter[]) objectCache
+          .getObject(ParseFilter.class.getName());
     }
   }
 
Index: src/java/org/apache/nutch/parse/ParsePluginList.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginList.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParsePluginList.java	(working copy)
@@ -22,25 +22,23 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
  * This class represents a natural ordering for which parsing plugin should get
  * called for a particular mimeType. It provides methods to store the
  * parse-plugins.xml data, and methods to retreive the name of the appropriate
  * parsing plugin for a contentType.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginList {
-  
+
   /* a map to link mimeType to an ordered list of parsing plugins */
   private Map<String, List<String>> fMimeTypeToPluginMap = null;
-  
+
   /* A list of aliases */
   private Map<String, String> aliases = null;
-  
-  
+
   /**
    * Constructs a new ParsePluginList
    */
@@ -48,7 +46,7 @@
     fMimeTypeToPluginMap = new HashMap<String, List<String>>();
     aliases = new HashMap<String, String>();
   }
-  
+
   public List<String> getPluginList(String mimeType) {
     return fMimeTypeToPluginMap.get(mimeType);
   }
@@ -56,18 +54,18 @@
   void setAliases(Map<String, String> aliases) {
     this.aliases = aliases;
   }
-  
+
   public Map<String, String> getAliases() {
     return aliases;
   }
-  
+
   void setPluginList(String mimeType, List<String> l) {
     fMimeTypeToPluginMap.put(mimeType, l);
   }
-  
+
   List<String> getSupportedMimeTypes() {
-    return Arrays.asList(fMimeTypeToPluginMap.keySet().toArray(
-            new String[] {}));
+    return Arrays
+        .asList(fMimeTypeToPluginMap.keySet().toArray(new String[] {}));
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParsePluginsReader.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginsReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParsePluginsReader.java	(working copy)
@@ -42,19 +42,19 @@
 // Nutch imports
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * A reader to load the information stored in the
  * <code>$NUTCH_HOME/conf/parse-plugins.xml</code> file.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginsReader {
-  
+
   /* our log stream */
-  public static final Logger LOG = LoggerFactory.getLogger(ParsePluginsReader.class);
-  
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ParsePluginsReader.class);
+
   /** The property name of the parse-plugins location */
   private static final String PP_FILE_PROP = "parse.plugin.file";
 
@@ -61,31 +61,31 @@
   /** the parse-plugins file */
   private String fParsePluginsFile = null;
 
-  
   /**
    * Constructs a new ParsePluginsReader
    */
-  public ParsePluginsReader() { }
-  
+  public ParsePluginsReader() {
+  }
+
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
    * {@link #ParsePluginList} defined by it.
-   *
+   * 
    * @return A {@link #ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
    * @throws Exception
-   *             If any parsing error occurs.
+   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
-    
+
     ParsePluginList pList = new ParsePluginList();
-    
+
     // open up the XML file
     DocumentBuilderFactory factory = null;
     DocumentBuilder parser = null;
     Document document = null;
     InputSource inputSource = null;
-    
+
     InputStream ppInputStream = null;
     if (fParsePluginsFile != null) {
       URL parsePluginUrl = null;
@@ -94,18 +94,17 @@
         ppInputStream = parsePluginUrl.openStream();
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Unable to load parse plugins file from URL " +
-                   "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+          LOG.warn("Unable to load parse plugins file from URL " + "["
+              + fParsePluginsFile + "]. Reason is [" + e + "]");
         }
         return pList;
       }
     } else {
-      ppInputStream = conf.getConfResourceAsInputStream(
-                          conf.get(PP_FILE_PROP));
+      ppInputStream = conf.getConfResourceAsInputStream(conf.get(PP_FILE_PROP));
     }
-    
+
     inputSource = new InputSource(ppInputStream);
-    
+
     try {
       factory = DocumentBuilderFactory.newInstance();
       parser = factory.newDocumentBuilder();
@@ -112,30 +111,30 @@
       document = parser.parse(inputSource);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
-                 "Reason is [" + e + "]");
+        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." + "Reason is ["
+            + e + "]");
       }
       return null;
     }
-    
+
     Element parsePlugins = document.getDocumentElement();
-    
+
     // build up the alias hash map
     Map<String, String> aliases = getAliases(parsePlugins);
     // And store it on the parse plugin list
     pList.setAliases(aliases);
-     
+
     // get all the mime type nodes
     NodeList mimeTypes = parsePlugins.getElementsByTagName("mimeType");
-    
+
     // iterate through the mime types
     for (int i = 0; i < mimeTypes.getLength(); i++) {
       Element mimeType = (Element) mimeTypes.item(i);
       String mimeTypeStr = mimeType.getAttribute("name");
-      
+
       // for each mimeType, get the plugin list
       NodeList pluginList = mimeType.getElementsByTagName("plugin");
-      
+
       // iterate through the plugins, add them in order read
       // OR if they have a special order="" attribute, then hold those in
       // a separate list, and then insert them into the final list at the
@@ -142,8 +141,8 @@
       // order specified
       if (pluginList != null && pluginList.getLength() > 0) {
         List<String> plugList = new ArrayList<String>(pluginList.getLength());
-        
-        for (int j = 0; j<pluginList.getLength(); j++) {
+
+        for (int j = 0; j < pluginList.getLength(); j++) {
           Element plugin = (Element) pluginList.item(j);
           String pluginId = plugin.getAttribute("id");
           String extId = aliases.get(pluginId);
@@ -163,65 +162,65 @@
             plugList.add(extId);
           }
         }
-        
+
         // now add the plugin list and map it to this mimeType
         pList.setPluginList(mimeTypeStr, plugList);
-        
+
       } else if (LOG.isWarnEnabled()) {
         LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
-                 + mimeTypeStr + ", continuing parse");
+            + mimeTypeStr + ", continuing parse");
       }
     }
     return pList;
   }
-  
+
   /**
    * Tests parsing of the parse-plugins.xml file. An alternative name for the
-   * file can be specified via the <code>--file</code> option, although the
-   * file must be located in the <code>$NUTCH_HOME/conf</code> directory.
-   *
+   * file can be specified via the <code>--file</code> option, although the file
+   * must be located in the <code>$NUTCH_HOME/conf</code> directory.
+   * 
    * @param args
-   *            Currently only the --file argument to specify an alternative
-   *            name for the parse-plugins.xml file is supported.
+   *          Currently only the --file argument to specify an alternative name
+   *          for the parse-plugins.xml file is supported.
    */
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
-    
-    if (( args.length != 0 && args.length != 2 )
+
+    if ((args.length != 0 && args.length != 2)
         || (args.length == 2 && !"--file".equals(args[0]))) {
       System.err.println(usage);
       System.exit(1);
     }
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {
         parsePluginFile = args[++i];
       }
     }
-    
+
     ParsePluginsReader reader = new ParsePluginsReader();
-    
+
     if (parsePluginFile != null) {
       reader.setFParsePluginsFile(parsePluginFile);
     }
-    
+
     ParsePluginList prefs = reader.parse(NutchConfiguration.create());
-    
+
     for (String mimeType : prefs.getSupportedMimeTypes()) {
-      
+
       System.out.println("MIMETYPE: " + mimeType);
       List<String> plugList = prefs.getPluginList(mimeType);
-      
+
       System.out.println("EXTENSION IDs:");
-      
+
       for (String j : plugList) {
         System.out.println(j);
       }
     }
-    
+
   }
-  
+
   /**
    * @return Returns the fParsePluginsFile.
    */
@@ -228,20 +227,20 @@
   public String getFParsePluginsFile() {
     return fParsePluginsFile;
   }
-  
+
   /**
    * @param parsePluginsFile
-   *            The fParsePluginsFile to set.
+   *          The fParsePluginsFile to set.
    */
   public void setFParsePluginsFile(String parsePluginsFile) {
     fParsePluginsFile = parsePluginsFile;
   }
-  
+
   private Map<String, String> getAliases(Element parsePluginsRoot) {
 
     Map<String, String> aliases = new HashMap<String, String>();
     NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
-	  
+
     if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("No aliases defined in parse-plugins.xml!");
@@ -248,7 +247,7 @@
       }
       return aliases;
     }
-	  
+
     if (aliasRoot.getLength() > 1) {
       // log a warning, but try and continue processing
       if (LOG.isWarnEnabled()) {
@@ -255,18 +254,18 @@
         LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
       }
     }
-	  
-    Element aliasRootElem = (Element)aliasRoot.item(0);
+
+    Element aliasRootElem = (Element) aliasRoot.item(0);
     NodeList aliasElements = aliasRootElem.getElementsByTagName("alias");
-	  
+
     if (aliasElements != null && aliasElements.getLength() > 0) {
-      for (int i=0; i<aliasElements.getLength(); i++) {
-        Element aliasElem = (Element)aliasElements.item(i);
-	String parsePluginId = aliasElem.getAttribute("name");
-	String extensionId = aliasElem.getAttribute("extension-id");
+      for (int i = 0; i < aliasElements.getLength(); i++) {
+        Element aliasElem = (Element) aliasElements.item(i);
+        String parsePluginId = aliasElem.getAttribute("name");
+        String extensionId = aliasElem.getAttribute("extension-id");
         if (LOG.isTraceEnabled()) {
-          LOG.trace("Found alias: plugin-id: " + parsePluginId +
-                    ", extension-id: " + extensionId);
+          LOG.trace("Found alias: plugin-id: " + parsePluginId
+              + ", extension-id: " + extensionId);
         }
         if (parsePluginId != null && extensionId != null) {
           aliases.put(parsePluginId, extensionId);
@@ -275,5 +274,5 @@
     }
     return aliases;
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParseStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusCodes.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseStatusCodes.java	(working copy)
@@ -22,44 +22,52 @@
   // Primary status codes:
 
   /** Parsing was not performed. */
-  public static final byte NOTPARSED       = 0;
+  public static final byte NOTPARSED = 0;
   /** Parsing succeeded. */
-  public static final byte SUCCESS         = 1;
+  public static final byte SUCCESS = 1;
   /** General failure. There may be a more specific error message in arguments. */
-  public static final byte FAILED          = 2;
+  public static final byte FAILED = 2;
 
-  public static final String[] majorCodes = {
-    "notparsed",
-    "success",
-    "failed"
-  };
+  public static final String[] majorCodes = { "notparsed", "success", "failed" };
 
   // Secondary success codes go here:
 
-  public static final short SUCCESS_OK                = 0;
+  public static final short SUCCESS_OK = 0;
 
-  /** Parsed content contains a directive to redirect to another URL.
-   * The target URL can be retrieved from the arguments.
+  /**
+   * Parsed content contains a directive to redirect to another URL. The target
+   * URL can be retrieved from the arguments.
    */
-  public static final short SUCCESS_REDIRECT          = 100;
+  public static final short SUCCESS_REDIRECT = 100;
 
   // Secondary failure codes go here:
 
-  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
-  public static final short FAILED_EXCEPTION          = 200;
-  /** Parsing failed. Content was truncated, but the parser cannot handle incomplete content. */
-  public static final short FAILED_TRUNCATED          = 202;
-  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
-  public static final short FAILED_INVALID_FORMAT     = 203;
-  /** Parsing failed. Other related parts of the content are needed to complete
+  /**
+   * Parsing failed. An Exception occured (which may be retrieved from the
+   * arguments).
+   */
+  public static final short FAILED_EXCEPTION = 200;
+  /**
+   * Parsing failed. Content was truncated, but the parser cannot handle
+   * incomplete content.
+   */
+  public static final short FAILED_TRUNCATED = 202;
+  /**
+   * Parsing failed. Invalid format - the content may be corrupted or of wrong
+   * type.
+   */
+  public static final short FAILED_INVALID_FORMAT = 203;
+  /**
+   * Parsing failed. Other related parts of the content are needed to complete
    * parsing. The list of URLs to missing parts may be provided in arguments.
    * The Fetcher may decide to fetch these parts at once, then put them into
    * Content.metadata, and supply them for re-parsing.
    */
-  public static final short FAILED_MISSING_PARTS      = 204;
-  /** Parsing failed. There was no content to be parsed - probably caused
-   * by errors at protocol stage.
+  public static final short FAILED_MISSING_PARTS = 204;
+  /**
+   * Parsing failed. There was no content to be parsed - probably caused by
+   * errors at protocol stage.
    */
-  public static final short FAILED_MISSING_CONTENT    = 205;
-  
+  public static final short FAILED_MISSING_CONTENT = 205;
+
 }
Index: src/java/org/apache/nutch/parse/ParseStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseStatusUtils.java	(working copy)
@@ -29,10 +29,10 @@
 public class ParseStatusUtils {
 
   public static ParseStatus STATUS_SUCCESS = ParseStatus.newBuilder().build();
-  public static final HashMap<Short,String> minorCodes = new HashMap<Short,String>();
+  public static final HashMap<Short, String> minorCodes = new HashMap<Short, String>();
 
   static {
-    STATUS_SUCCESS.setMajorCode((int)ParseStatusCodes.SUCCESS);
+    STATUS_SUCCESS.setMajorCode((int) ParseStatusCodes.SUCCESS);
     minorCodes.put(ParseStatusCodes.SUCCESS_OK, "ok");
     minorCodes.put(ParseStatusCodes.SUCCESS_REDIRECT, "redirect");
     minorCodes.put(ParseStatusCodes.FAILED_EXCEPTION, "exception");
@@ -49,8 +49,9 @@
     return status.getMajorCode() == ParseStatusCodes.SUCCESS;
   }
 
-  /** A convenience method. Return a String representation of the first
-   * argument, or null.
+  /**
+   * A convenience method. Return a String representation of the first argument,
+   * or null.
    */
   public static String getMessage(ParseStatus status) {
     List<CharSequence> args = status.getArgs();
@@ -77,29 +78,30 @@
 
   public static Parse getEmptyParse(Exception e, Configuration conf) {
     ParseStatus status = ParseStatus.newBuilder().build();
-    status.setMajorCode((int)ParseStatusCodes.FAILED);
-    status.setMinorCode((int)ParseStatusCodes.FAILED_EXCEPTION);
+    status.setMajorCode((int) ParseStatusCodes.FAILED);
+    status.setMinorCode((int) ParseStatusCodes.FAILED_EXCEPTION);
     status.getArgs().add(new Utf8(e.toString()));
 
     return new Parse("", "", new Outlink[0], status);
   }
 
-  public static Parse getEmptyParse(int minorCode, String message, Configuration conf) {
+  public static Parse getEmptyParse(int minorCode, String message,
+      Configuration conf) {
     ParseStatus status = ParseStatus.newBuilder().build();
-    status.setMajorCode((int)ParseStatusCodes.FAILED);
+    status.setMajorCode((int) ParseStatusCodes.FAILED);
     status.setMinorCode(minorCode);
     status.getArgs().add(new Utf8(message));
 
     return new Parse("", "", new Outlink[0], status);
   }
-  
+
   public static String toString(ParseStatus status) {
     if (status == null) {
       return "(null)";
     }
     StringBuilder sb = new StringBuilder();
-    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] +
-        "/" + minorCodes.get(status.getMinorCode().shortValue()));
+    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] + "/"
+        + minorCodes.get(status.getMinorCode().shortValue()));
     sb.append(" (" + status.getMajorCode() + "/" + status.getMinorCode() + ")");
     sb.append(", args=[");
     List<CharSequence> args = status.getArgs();
@@ -107,7 +109,8 @@
       int i = 0;
       Iterator<CharSequence> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(working copy)
@@ -49,7 +49,7 @@
  * A Utility class containing methods to simply perform parsing utilities such
  * as iterating through a preferred list of {@link Parser}s to obtain
  * {@link Parse} objects.
- *
+ * 
  * @author mattmann
  * @author J&eacute;r&ocirc;me Charron
  * @author S&eacute;bastien Le Callonnec
@@ -60,7 +60,7 @@
   public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
 
   private static final int DEFAULT_MAX_PARSE_TIME = 30;
-  
+
   private Configuration conf;
   private Signature sig;
   private URLFilters filters;
@@ -71,9 +71,9 @@
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int maxParseTime;
   private ExecutorService executorService;
-  
+
   /**
-   *
+   * 
    * @param conf
    */
   public ParseUtil(Configuration conf) {
@@ -90,15 +90,16 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     parserFactory = new ParserFactory(conf);
-    maxParseTime=conf.getInt("parser.timeout", DEFAULT_MAX_PARSE_TIME);
+    maxParseTime = conf.getInt("parser.timeout", DEFAULT_MAX_PARSE_TIME);
     sig = SignatureFactory.getSignature(conf);
     filters = new URLFilters(conf);
     normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
     int maxOutlinksPerPage = conf.getInt("db.max.outlinks.per.page", 100);
-    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE : maxOutlinksPerPage;
+    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
+        : maxOutlinksPerPage;
     ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
     executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
-      .setNameFormat("parse-%d").setDaemon(true).build());
+        .setNameFormat("parse-%d").setDaemon(true).build());
   }
 
   /**
@@ -106,11 +107,13 @@
    * until a successful parse is performed and a {@link Parse} object is
    * returned. If the parse is unsuccessful, a message is logged to the
    * <code>WARNING</code> level, and an empty parse is returned.
-   *
-   * @throws ParserNotFound If there is no suitable parser found. 
-   * @throws ParseException If there is an error parsing.
+   * 
+   * @throws ParserNotFound
+   *           If there is no suitable parser found.
+   * @throws ParseException
+   *           If there is an error parsing.
    */
-  public Parse parse(String url, WebPage page) throws ParserNotFound, 
+  public Parse parse(String url, WebPage page) throws ParserNotFound,
       ParseException {
     Parser[] parsers = null;
 
@@ -118,28 +121,29 @@
 
     parsers = this.parserFactory.getParsers(contentType, url);
 
-    for (int i=0; i<parsers.length; i++) {
+    for (int i = 0; i < parsers.length; i++) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Parsing [" + url + "] with [" + parsers[i] + "]");
       }
       Parse parse = null;
-      
-      if (maxParseTime!=-1)
-    	  parse = runParser(parsers[i], url, page);
-      else 
-    	  parse = parsers[i].getParse(url, page);
-      
-      if (parse!=null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
+
+      if (maxParseTime != -1)
+        parse = runParser(parsers[i], url, page);
+      else
+        parse = parsers[i].getParse(url, page);
+
+      if (parse != null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
         return parse;
       }
     }
 
-    LOG.warn("Unable to successfully parse content " + url +
-        " of type " + contentType);
-    return ParseStatusUtils.getEmptyParse(new ParseException("Unable to successfully parse content"), null);
+    LOG.warn("Unable to successfully parse content " + url + " of type "
+        + contentType);
+    return ParseStatusUtils.getEmptyParse(new ParseException(
+        "Unable to successfully parse content"), null);
   }
-  
-  private Parse runParser(Parser p, String url, WebPage page) {    
+
+  private Parse runParser(Parser p, String url, WebPage page) {
     ParseCallable pc = new ParseCallable(p, page, url);
     Future<Parse> task = executorService.submit(pc);
     Parse res = null;
@@ -155,8 +159,9 @@
   }
 
   /**
-   * Parses given web page and stores parsed content within page. Puts
-   * a meta-redirect to outlinks.
+   * Parses given web page and stores parsed content within page. Puts a
+   * meta-redirect to outlinks.
+   * 
    * @param key
    * @param page
    */
@@ -165,7 +170,8 @@
     byte status = page.getStatus().byteValue();
     if (status != CrawlStatus.STATUS_FETCHED) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Skipping " + url + " as status is: " + CrawlStatus.getName(status));
+        LOG.debug("Skipping " + url + " as status is: "
+            + CrawlStatus.getName(status));
       }
       return;
     }
@@ -213,7 +219,8 @@
           return;
         }
         page.getOutlinks().put(new Utf8(newUrl), new Utf8());
-        page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
+        page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED,
+            TableUtil.YES_VAL);
         if (newUrl == null || newUrl.equals(url)) {
           String reprUrl = URLUtil.chooseRepr(url, newUrl,
               refreshTime < FetcherJob.PERM_REFRESH_TIME);
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -22,9 +22,10 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-/** A parser for content generated by a {@link org.apache.nutch.protocol.Protocol}
- * implementation.  This interface is implemented by extensions.  Nutch's core
- * contains no page parsing code.
+/**
+ * A parser for content generated by a
+ * {@link org.apache.nutch.protocol.Protocol} implementation. This interface is
+ * implemented by extensions. Nutch's core contains no page parsing code.
  */
 public interface Parser extends FieldPluggable, Configurable {
   /** The name of the extension point. */
@@ -34,8 +35,9 @@
    * <p>
    * This method parses content in WebPage instance
    * </p>
-   *
-   * @param url Page's URL
+   * 
+   * @param url
+   *          Page's URL
    * @param page
    */
   Parse getParse(String url, WebPage page);
Index: src/java/org/apache/nutch/parse/ParserChecker.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserChecker.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserChecker.java	(working copy)
@@ -37,16 +37,16 @@
 import java.util.Map.Entry;
 
 /**
- * Parser checker, useful for testing parser.
- * It also accurately reports possible fetching and 
- * parsing failures and presents protocol status signals to aid 
- * debugging. The tool enables us to retrieve the following data from 
- * any url:
+ * Parser checker, useful for testing parser. It also accurately reports
+ * possible fetching and parsing failures and presents protocol status signals
+ * to aid debugging. The tool enables us to retrieve the following data from any
+ * url:
  * <ol>
- * <li><tt>contentType</tt>: The URL {@link org.apache.nutch.protocol.Content} type.</li>
- * <li><tt>signature</tt>: Digest is used to identify pages (like unique ID) and is used to remove
- * duplicates during the dedup procedure. 
- * It is calculated using {@link org.apache.nutch.crawl.MD5Signature} or
+ * <li><tt>contentType</tt>: The URL {@link org.apache.nutch.protocol.Content}
+ * type.</li>
+ * <li><tt>signature</tt>: Digest is used to identify pages (like unique ID) and
+ * is used to remove duplicates during the dedup procedure. It is calculated
+ * using {@link org.apache.nutch.crawl.MD5Signature} or
  * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
  * <li><tt>Version</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
  * <li><tt>Status</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
@@ -53,12 +53,14 @@
  * <li><tt>Title</tt>: of the URL</li>
  * <li><tt>Outlinks</tt>: associated with the URL</li>
  * <li><tt>Content Metadata</tt>: such as <i>X-AspNet-Version</i>, <i>Date</i>,
- * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>, <i>Cache-Control</>, etc.</li>
+ * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>,
+ * <i>Cache-Control</>, etc.</li>
  * <li><tt>Parse Metadata</tt>: such as <i>CharEncodingForConversion</i>,
  * <i>OriginalCharEncoding</i>, <i>language</i>, etc.</li>
- * <li><tt>ParseText</tt>: The page parse text which varies in length depdnecing on 
- * <code>content.length</code> configuration.</li>
+ * <li><tt>ParseText</tt>: The page parse text which varies in length depdnecing
+ * on <code>content.length</code> configuration.</li>
  * </ol>
+ * 
  * @author John Xing
  */
 
@@ -107,7 +109,7 @@
 
     ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
 
-    if(!protocolOutput.getStatus().isSuccess()) {
+    if (!protocolOutput.getStatus().isSuccess()) {
       LOG.error("Fetch failed with protocol status: "
           + ProtocolStatusUtils.getName(protocolOutput.getStatus().getCode())
           + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
@@ -155,7 +157,6 @@
       LOG.info("signature: " + StringUtil.toHexString(signature));
     }
 
-
     LOG.info("---------\nUrl\n---------------\n");
     System.out.print(url + "\n");
     LOG.info("---------\nMetadata\n---------\n");
@@ -167,7 +168,7 @@
       while (iterator.hasNext()) {
         Entry<CharSequence, ByteBuffer> entry = iterator.next();
         sb.append(entry.getKey().toString()).append(" : \t")
-        .append(Bytes.toString(entry.getValue())).append("\n");
+            .append(Bytes.toString(entry.getValue())).append("\n");
       }
       System.out.print(sb.toString());
     }
@@ -182,12 +183,12 @@
       Map<CharSequence, CharSequence> headers = page.getHeaders();
       StringBuffer headersb = new StringBuffer();
       if (metadata != null) {
-        Iterator<Entry<CharSequence, CharSequence>> iterator = headers.entrySet()
-            .iterator();
+        Iterator<Entry<CharSequence, CharSequence>> iterator = headers
+            .entrySet().iterator();
         while (iterator.hasNext()) {
           Entry<CharSequence, CharSequence> entry = iterator.next();
           headersb.append(entry.getKey().toString()).append(" : \t")
-          .append(entry.getValue()).append("\n");
+              .append(entry.getValue()).append("\n");
         }
         System.out.print(headersb.toString());
       }
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -34,8 +34,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
-
-/** Creates and caches {@link Parser} plugins.*/
+/** Creates and caches {@link Parser} plugins. */
 public final class ParserFactory {
 
   public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
@@ -44,8 +43,7 @@
   public static final String DEFAULT_PLUGIN = "*";
 
   /** Empty extension list for caching purposes. */
-  private final List<Extension> EMPTY_EXTENSION_LIST =
-    new ArrayList<Extension>();
+  private final List<Extension> EMPTY_EXTENSION_LIST = new ArrayList<Extension>();
 
   private final Configuration conf;
   private final ExtensionPoint extensionPoint;
@@ -56,10 +54,12 @@
     ObjectCache objectCache = ObjectCache.get(conf);
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
         Parser.X_POINT_ID);
-    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -71,33 +71,34 @@
     }
   }
 
-
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
+      throws ParserNotFound {
 
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
@@ -107,7 +108,7 @@
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -118,49 +119,50 @@
     }
 
     parsers = new ArrayList<Parser>(parserExts.size());
-    for (Extension ext : parserExts){
+    for (Extension ext : parserExts) {
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers: ", e);
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers: ", e);
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
 
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
-   *
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
+   * 
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -184,7 +186,7 @@
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -192,9 +194,9 @@
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
@@ -212,7 +214,7 @@
           columns.addAll(pluginFields);
         }
       } catch (PluginRuntimeException e) {
-        LOG.error("PluginRuntimeException",e);
+        LOG.error("PluginRuntimeException", e);
       }
     }
     return columns;
@@ -220,10 +222,11 @@
 
   /**
    * Finds the best-suited parse plugin for a given contentType.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   @SuppressWarnings("unchecked")
   protected List<Extension> getExtensions(String contentType) {
@@ -246,8 +249,8 @@
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-      	// Put the empty extension list into cache
-      	// to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
@@ -256,12 +259,14 @@
 
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
 
@@ -268,10 +273,10 @@
     Extension[] extensions = this.extensionPoint.getExtensions();
 
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
@@ -284,20 +289,23 @@
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
+      Extension[] extensions, String contentType) {
 
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
@@ -315,7 +323,7 @@
         // in either case, LOG the appropriate error message to WARN level
 
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
 
           if (LOG.isWarnEnabled()) {
@@ -323,17 +331,17 @@
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -353,12 +361,12 @@
       // any extensions where this is the case, throw a
       // NotMappedParserException
 
-      for (int i=0; i<extensions.length; i++) {
-      	if ("*".equals(extensions[i].getAttribute("contentType"))){
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-      	else if (extensions[i].getAttribute("contentType") != null
-            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
@@ -367,21 +375,23 @@
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-        	  if (!isFirst) extensionsIDs.append(" - ");
-        	  else isFirst=false;
-        	  extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-    	  extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
 
@@ -389,23 +399,22 @@
   }
 
   private String escapeContentType(String contentType) {
-  	// Escapes contentType in order to use as a regex 
-  	// (and keep backwards compatibility).
-  	// This enables to accept multiple types for a single parser. 
-  	return contentType.replace("+", "\\+").replace(".", "\\.");
-	}
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
-
-	private boolean match(Extension extension, String id, String type) {
-    return (id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") ||
-             type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN));
+  private boolean match(Extension extension, String id, String type) {
+    return (id.equals(extension.getId()))
+        && (extension.getAttribute("contentType").equals("*")
+            || type.matches(escapeContentType(extension
+                .getAttribute("contentType"))) || type.equals(DEFAULT_PLUGIN));
   }
 
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
@@ -414,7 +423,7 @@
   }
 
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
Index: src/java/org/apache/nutch/parse/ParserJob.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserJob.java	(working copy)
@@ -58,9 +58,9 @@
 
   private static final String RESUME_KEY = "parse.job.resume";
   private static final String FORCE_KEY = "parse.job.force";
-  
+
   public static final String SKIP_TRUNCATED = "parser.skip.truncated";
-  
+
   private static final Utf8 REPARSE = new Utf8("-reparse");
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
@@ -79,9 +79,8 @@
     FIELDS.add(WebPage.Field.HEADERS);
   }
 
-
-  public static class ParserMapper 
-      extends GoraMapper<String, WebPage, String, WebPage> {
+  public static class ParserMapper extends
+      GoraMapper<String, WebPage, String, WebPage> {
     private ParseUtil parseUtil;
 
     private boolean shouldResume;
@@ -91,7 +90,7 @@
     private Utf8 batchId;
 
     private boolean skipTruncated;
-    
+
     @Override
     public void setup(Context context) throws IOException {
       Configuration conf = context.getConfiguration();
@@ -98,8 +97,9 @@
       parseUtil = new ParseUtil(conf);
       shouldResume = conf.getBoolean(RESUME_KEY, false);
       force = conf.getBoolean(FORCE_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
-      skipTruncated=conf.getBoolean(SKIP_TRUNCATED, true);
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      skipTruncated = conf.getBoolean(SKIP_TRUNCATED, true);
     }
 
     @Override
@@ -131,7 +131,6 @@
       if (skipTruncated && isTruncated(unreverseKey, page)) {
         return;
       }
-      
 
       parseUtil.process(key, page);
       ParseStatus pstatus = page.getParseStatus();
@@ -141,9 +140,9 @@
       }
 
       context.write(key, page);
-    }    
+    }
   }
-  
+
   public ParserJob() {
 
   }
@@ -151,13 +150,14 @@
   public ParserJob(Configuration conf) {
     setConf(conf);
   }
-  
+
   /**
    * Checks if the page's content is truncated.
-   * @param url 
+   * 
+   * @param url
    * @param page
-   * @return If the page is truncated <code>true</code>. When it is not,
-   * or when it could be determined, <code>false</code>. 
+   * @return If the page is truncated <code>true</code>. When it is not, or when
+   *         it could be determined, <code>false</code>.
    */
   public static boolean isTruncated(String url, WebPage page) {
     ByteBuffer content = page.getContent();
@@ -164,7 +164,8 @@
     if (content == null) {
       return false;
     }
-    CharSequence lengthUtf8 = page.getHeaders().get(new Utf8(HttpHeaders.CONTENT_LENGTH));
+    CharSequence lengthUtf8 = page.getHeaders().get(
+        new Utf8(HttpHeaders.CONTENT_LENGTH));
     if (lengthUtf8 == null) {
       return false;
     }
@@ -186,7 +187,8 @@
       return true;
     }
     if (LOG.isDebugEnabled()) {
-      LOG.debug(url + " actualSize=" + actualSize + " inHeaderSize=" + inHeaderSize);
+      LOG.debug(url + " actualSize=" + actualSize + " inHeaderSize="
+          + inHeaderSize);
     }
     return false;
   }
@@ -198,8 +200,8 @@
     ParseFilters parseFilters = new ParseFilters(conf);
 
     Collection<WebPage.Field> parsePluginFields = parserFactory.getFields();
-    Collection<WebPage.Field> signaturePluginFields =
-      SignatureFactory.getFields(conf);
+    Collection<WebPage.Field> signaturePluginFields = SignatureFactory
+        .getFields(conf);
     Collection<WebPage.Field> htmlParsePluginFields = parseFilters.getFields();
 
     if (parsePluginFields != null) {
@@ -226,11 +228,11 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Boolean force = (Boolean)args.get(Nutch.ARG_FORCE);
-    
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Boolean force = (Boolean) args.get(Nutch.ARG_FORCE);
+
     if (batchId != null) {
       getConf().set(GeneratorJob.BATCH_ID, batchId);
     }
@@ -241,7 +243,8 @@
       getConf().setBoolean(FORCE_KEY, force);
     }
     LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
-    LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
+    LOG.info("ParserJob: forced reparse:\t"
+        + getConf().getBoolean(FORCE_KEY, false));
     if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
       LOG.info("ParserJob: parsing all");
     } else {
@@ -248,10 +251,10 @@
       LOG.info("ParserJob: batchId:\t" + batchId);
     }
     currentJob = new NutchJob(getConf(), "parse");
-    
+
     Collection<WebPage.Field> fields = getFields(currentJob);
     MapFieldValueFilter<String, WebPage> batchIdFilter = getBatchIdFilter(batchId);
-	StorageUtils.initMapperJob(currentJob, fields, String.class, WebPage.class,
+    StorageUtils.initMapperJob(currentJob, fields, String.class, WebPage.class,
         ParserMapper.class, batchIdFilter);
     StorageUtils.initReducerJob(currentJob, IdentityPageReducer.class);
     currentJob.setNumReduceTasks(0);
@@ -275,20 +278,20 @@
     return filter;
   }
 
-  public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
-    
+  public int parse(String batchId, boolean shouldResume, boolean force)
+      throws Exception {
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("ParserJob: starting at " + sdf.format(start));
 
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_FORCE, force));
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_RESUME,
+        shouldResume, Nutch.ARG_FORCE, force));
     LOG.info("ParserJob: success");
-    
+
     long finish = System.currentTimeMillis();
-    LOG.info("ParserJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
+    LOG.info("ParserJob: finished at " + sdf.format(finish)
+        + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
     return 0;
   }
 
@@ -298,12 +301,18 @@
     String batchId = null;
 
     if (args.length < 1) {
-      System.err.println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
-      System.err.println("    <batchId>     - symbolic batch ID created by Generator");
-      System.err.println("    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)");
-      System.err.println("    -all          - consider pages from all crawl jobs");
-      System.err.println("    -resume       - resume a previous incomplete job");
-      System.err.println("    -force        - force re-parsing even if a page is already parsed");
+      System.err
+          .println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
+      System.err
+          .println("    <batchId>     - symbolic batch ID created by Generator");
+      System.err
+          .println("    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)");
+      System.err
+          .println("    -all          - consider pages from all crawl jobs");
+      System.err
+          .println("    -resume       - resume a previous incomplete job");
+      System.err
+          .println("    -force        - force re-parsing even if a page is already parsed");
       return -1;
     }
     for (int i = 0; i < args.length; i++) {
Index: src/java/org/apache/nutch/parse/ParserNotFound.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserNotFound.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/ParserNotFound.java	(working copy)
@@ -18,17 +18,17 @@
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }
Index: src/java/org/apache/nutch/parse/package-info.java
===================================================================
--- src/java/org/apache/nutch/parse/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/parse/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.parse.Parse Parse} interface and related classes.
  */
 package org.apache.nutch.parse;
+
Index: src/java/org/apache/nutch/plugin/CircularDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/CircularDependencyException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/CircularDependencyException.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Extension.java	(working copy)
@@ -94,8 +94,10 @@
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -105,7 +107,8 @@
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -115,7 +118,8 @@
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
@@ -147,10 +151,10 @@
     // The same is in PluginRepository.getPluginInstance().
     // Suggested by Stefan Groschupf <sg@media-style.com>
     synchronized (getId()) {
-      try {      
+      try {
         PluginRepository pluginRepository = PluginRepository.get(conf);
-        Class extensionClazz = 
-          pluginRepository.getCachedClass(fDescriptor, getClazz());
+        Class extensionClazz = pluginRepository.getCachedClass(fDescriptor,
+            getClazz());
         // lazy loading of Plugin in case there is no instance of the plugin
         // already.
         pluginRepository.getPluginInstance(getDescriptor());
Index: src/java/org/apache/nutch/plugin/ExtensionPoint.java
===================================================================
--- src/java/org/apache/nutch/plugin/ExtensionPoint.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/ExtensionPoint.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;
Index: src/java/org/apache/nutch/plugin/MissingDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/MissingDependencyException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/MissingDependencyException.java	(working copy)
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/plugin/Pluggable.java
===================================================================
--- src/java/org/apache/nutch/plugin/Pluggable.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Pluggable.java	(working copy)
@@ -17,15 +17,14 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
 }
Index: src/java/org/apache/nutch/plugin/Plugin.java
===================================================================
--- src/java/org/apache/nutch/plugin/Plugin.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/Plugin.java	(working copy)
@@ -33,8 +33,8 @@
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;
Index: src/java/org/apache/nutch/plugin/PluginClassLoader.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginClassLoader.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginClassLoader.java	(working copy)
@@ -45,11 +45,11 @@
    */
   public PluginClassLoader(URL[] urls, ClassLoader parent) {
     super(urls, parent);
-    
+
     this.urls = urls;
     this.parent = parent;
   }
-  
+
   @Override
   public int hashCode() {
     final int PRIME = 31;
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -30,12 +30,11 @@
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  */
 public class PluginDescriptor {
   private String fPluginPath;
@@ -51,7 +50,8 @@
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -204,7 +204,8 @@
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
@@ -217,7 +218,8 @@
    */
   public void addExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL();
+    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI()
+        .toURL();
     fExportedLibs.add(url);
   }
 
@@ -246,7 +248,8 @@
    */
   public void addNotExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL();
+    URL url = new File(getPluginPath() + File.separator + pLibPath).toURI()
+        .toURL();
     fNotExportedLibs.add(url);
   }
 
@@ -283,8 +286,8 @@
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -306,7 +309,7 @@
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -39,8 +39,8 @@
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -93,7 +93,8 @@
             PluginDescriptor p = parseManifestFile(manifestPath);
             map.put(p.getPluginId(), p);
           } catch (Exception e) {
-            LOG.warn("Error while loading plugin `" + manifestPath + "` " + e.toString());
+            LOG.warn("Error while loading plugin `" + manifestPath + "` "
+                + e.toString());
           }
         }
       }
@@ -182,7 +183,7 @@
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -289,8 +290,8 @@
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java	(working copy)
@@ -50,13 +50,13 @@
   private HashMap<String, ExtensionPoint> fExtensionPoints;
 
   private HashMap<String, Plugin> fActivatedPlugins;
-  
-  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE =
-    new HashMap<String, Map<PluginClassLoader,Class>>();
 
+  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<String, Map<PluginClassLoader, Class>>();
+
   private Configuration conf;
 
-  public static final Logger LOG = LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -68,7 +68,8 @@
     this.conf = new Configuration(conf);
     this.auto = conf.getBoolean("plugin.auto-activation", true);
     String[] pluginFolders = conf.getStrings("plugin.folders");
-    PluginManifestParser manifestParser = new PluginManifestParser(this.conf, this);
+    PluginManifestParser manifestParser = new PluginManifestParser(this.conf,
+        this);
     Map<String, PluginDescriptor> allPlugins = manifestParser
         .parsePluginFolder(pluginFolders);
     if (allPlugins.isEmpty()) {
@@ -85,7 +86,7 @@
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -112,8 +113,8 @@
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -128,7 +129,7 @@
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -156,7 +157,7 @@
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
@@ -271,7 +272,8 @@
       // The same is in Extension.getExtensionInstance().
       // Suggested by Stefan Groschupf <sg@media-style.com>
       synchronized (pDescriptor) {
-        Class<?> pluginClass = getCachedClass(pDescriptor, pDescriptor.getPluginClass());
+        Class<?> pluginClass = getCachedClass(pDescriptor,
+            pDescriptor.getPluginClass());
         Constructor<?> constructor = pluginClass.getConstructor(new Class<?>[] {
             PluginDescriptor.class, Configuration.class });
         Plugin plugin = (Plugin) constructor.newInstance(new Object[] {
@@ -312,9 +314,9 @@
       plugin.shutDown();
     }
   }
-  
+
   public Class getCachedClass(PluginDescriptor pDescriptor, String className)
-  throws ClassNotFoundException {
+      throws ClassNotFoundException {
     Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
     if (descMap == null) {
       descMap = new HashMap<PluginClassLoader, Class>();
Index: src/java/org/apache/nutch/plugin/PluginRuntimeException.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(revision 1650444)
+++ src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(working copy)
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -41,7 +41,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -85,7 +85,7 @@
     this.mimeTypes = new MimeUtil(conf);
     this.contentType = getContentType(contentType, url, content);
   }
-  
+
   public Content(String url, String base, byte[] content, String contentType,
       Metadata metadata, MimeUtil mimeTypes) {
 
@@ -141,11 +141,11 @@
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -163,14 +163,14 @@
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -204,8 +204,9 @@
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -220,7 +221,9 @@
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/">
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -276,9 +279,9 @@
       System.out.println("usage:" + usage);
       return;
     }
-    
-    GenericOptionsParser optParser =
-      new GenericOptionsParser(NutchConfiguration.create(), args);
+
+    GenericOptionsParser optParser = new GenericOptionsParser(
+        NutchConfiguration.create(), args);
     String[] argv = optParser.getRemainingArgs();
     Configuration conf = optParser.getConfiguration();
 
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -25,7 +25,7 @@
 
 import crawlercommons.robots.BaseRobotRules;
 
-/** A retriever of url content.  Implemented by protocol extensions. */
+/** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
@@ -55,7 +55,9 @@
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
+   * 
+   * @param url
+   *          url to check
    * @param page
    * @return robot rules (specific for this url or default), never null
    */
Index: src/java/org/apache/nutch/protocol/ProtocolNotFound.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(working copy)
@@ -22,7 +22,7 @@
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }
Index: src/java/org/apache/nutch/protocol/ProtocolOutput.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolOutput.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolOutput.java	(working copy)
@@ -17,10 +17,10 @@
 
 package org.apache.nutch.protocol;
 
-
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ProtocolOutput {
Index: src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(working copy)
@@ -19,38 +19,42 @@
 public interface ProtocolStatusCodes {
 
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. */
-  public static final int FAILED               = 2;
+  public static final int FAILED = 2;
 
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
+  public static final int BLOCKED = 23;
 }
Index: src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(working copy)
@@ -100,7 +100,7 @@
     }
     return TableUtil.toString(args.iterator().next());
   }
-  
+
   public static String toString(ProtocolStatus status) {
     if (status == null) {
       return "(null)";
@@ -113,7 +113,8 @@
       int i = 0;
       Iterator<CharSequence> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/protocol/RobotRules.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRules.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/RobotRules.java	(working copy)
@@ -35,9 +35,8 @@
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 
Index: src/java/org/apache/nutch/protocol/RobotRulesParser.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRulesParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/RobotRulesParser.java	(working copy)
@@ -43,35 +43,38 @@
 import crawlercommons.robots.SimpleRobotRulesParser;
 
 /**
- * This class uses crawler-commons for handling the parsing of {@code robots.txt} files.
- * It emits SimpleRobotRules objects, which describe the download permissions
- * as described in SimpleRobotRulesParser.
+ * This class uses crawler-commons for handling the parsing of
+ * {@code robots.txt} files. It emits SimpleRobotRules objects, which describe
+ * the download permissions as described in SimpleRobotRulesParser.
  */
 public abstract class RobotRulesParser implements Configurable {
 
-  public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(RobotRulesParser.class);
 
-  protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable<String, BaseRobotRules> ();
+  protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable<String, BaseRobotRules>();
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use
-   *  when the {@code robots.txt} file is empty or missing;
-   *  all requests are allowed.
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is empty or missing; all requests are allowed.
    */
-  public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(RobotRulesMode.ALLOW_ALL);
+  public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_ALL);
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use when the 
-   *  {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
-   *  response; all requests are disallowed. 
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
+   * response; all requests are disallowed.
    */
-  public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(RobotRulesMode.ALLOW_NONE);
+  public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_NONE);
 
   private static SimpleRobotRulesParser robotParser = new SimpleRobotRulesParser();
   private Configuration conf;
   protected String agentNames;
 
-  public RobotRulesParser() { }
+  public RobotRulesParser() {
+  }
 
   public RobotRulesParser(Configuration conf) {
     setConf(conf);
@@ -90,9 +93,10 @@
     }
     agentNames = agentName;
 
-    // If there are any other agents specified, append those to the list of agents
+    // If there are any other agents specified, append those to the list of
+    // agents
     String otherAgents = conf.get("http.robots.agents");
-    if(otherAgents != null && !otherAgents.trim().isEmpty()) {
+    if (otherAgents != null && !otherAgents.trim().isEmpty()) {
       StringTokenizer tok = new StringTokenizer(otherAgents, ",");
       StringBuilder sb = new StringBuilder(agentNames);
       while (tok.hasMoreTokens()) {
@@ -99,7 +103,8 @@
         String str = tok.nextToken().trim();
         if (str.equals("*") || str.equals(agentName)) {
           // skip wildcard "*" or agent name itself
-          // (required for backward compatibility, cf. NUTCH-1715 and NUTCH-1718)
+          // (required for backward compatibility, cf. NUTCH-1715 and
+          // NUTCH-1718)
         } else {
           sb.append(",").append(str);
         }
@@ -117,16 +122,23 @@
   }
 
   /**
-   * Parses the robots content using the {@link SimpleRobotRulesParser} from crawler commons
-   *    
-   * @param url A string containing url
-   * @param content Contents of the robots file in a byte array 
-   * @param contentType The content type of the robots file
-   * @param robotName A string containing all the robots agent names used by parser for matching
-   * @return BaseRobotRules object 
+   * Parses the robots content using the {@link SimpleRobotRulesParser} from
+   * crawler commons
+   * 
+   * @param url
+   *          A string containing url
+   * @param content
+   *          Contents of the robots file in a byte array
+   * @param contentType
+   *          The content type of the robots file
+   * @param robotName
+   *          A string containing all the robots agent names used by parser for
+   *          matching
+   * @return BaseRobotRules object
    */
-  public BaseRobotRules parseRules (String url, byte[] content, String contentType, String robotName) {
-    return robotParser.parseContent(url, content, contentType, robotName); 
+  public BaseRobotRules parseRules(String url, byte[] content,
+      String contentType, String robotName) {
+    return robotParser.parseContent(url, content, contentType, robotName);
   }
 
   public BaseRobotRules getRobotRulesSet(Protocol protocol, String url) {
@@ -145,23 +157,29 @@
   public static void main(String[] argv) {
 
     if (argv.length != 3) {
-      System.err.println("Usage: RobotRulesParser <robots-file> <url-file> <agent-names>\n");
-      System.err.println("    <robots-file> - Input robots.txt file which will be parsed.");
-      System.err.println("    <url-file>    - Contains input URLs (1 per line) which are tested against the rules.");
-      System.err.println("    <agent-names> - Input agent names. Multiple agent names can be provided using");
-      System.err.println("                    comma as a delimiter without any spaces.");
+      System.err
+          .println("Usage: RobotRulesParser <robots-file> <url-file> <agent-names>\n");
+      System.err
+          .println("    <robots-file> - Input robots.txt file which will be parsed.");
+      System.err
+          .println("    <url-file>    - Contains input URLs (1 per line) which are tested against the rules.");
+      System.err
+          .println("    <agent-names> - Input agent names. Multiple agent names can be provided using");
+      System.err
+          .println("                    comma as a delimiter without any spaces.");
       System.exit(-1);
     }
 
     try {
       byte[] robotsBytes = Files.toByteArray(new File(argv[0]));
-      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes, "text/plain", argv[2]);
+      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes,
+          "text/plain", argv[2]);
 
       LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
       String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ? "allowed" : "not allowed") +
-            ":\t" + testPath);
+        System.out.println((rules.isAllowed(testPath) ? "allowed"
+            : "not allowed") + ":\t" + testPath);
         testPath = testsIn.readLine();
       }
       testsIn.close();
Index: src/java/org/apache/nutch/protocol/package-info.java
===================================================================
--- src/java/org/apache/nutch/protocol/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/protocol/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * see also {@link org.apache.nutch.net.protocols}.
  */
 package org.apache.nutch.protocol;
+
Index: src/java/org/apache/nutch/scoring/ScoreDatum.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoreDatum.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoreDatum.java	(working copy)
@@ -35,9 +35,10 @@
   private String anchor;
   private int distance;
   private Map<String, byte[]> metaData = new HashMap<String, byte[]>();
-  
-  public ScoreDatum() { }
-  
+
+  public ScoreDatum() {
+  }
+
   public ScoreDatum(float score, String url, String anchor, int depth) {
     this.score = score;
     this.url = url;
@@ -52,13 +53,13 @@
     anchor = Text.readString(in);
     distance = WritableUtils.readVInt(in);
     metaData.clear();
-    
+
     int size = WritableUtils.readVInt(in);
     for (int i = 0; i < size; i++) {
       String key = Text.readString(in);
       byte[] value = Bytes.readByteArray(in);
       metaData.put(key, value);
-    }    
+    }
   }
 
   @Override
@@ -67,7 +68,7 @@
     Text.writeString(out, url);
     Text.writeString(out, anchor);
     WritableUtils.writeVInt(out, distance);
-    
+
     WritableUtils.writeVInt(out, metaData.size());
     for (Entry<String, byte[]> e : metaData.entrySet()) {
       Text.writeString(out, e.getKey());
@@ -74,23 +75,23 @@
       Bytes.writeByteArray(out, e.getValue());
     }
   }
-  
+
   public byte[] getMeta(String key) {
     return metaData.get(key);
   }
-  
+
   public void setMeta(String key, byte[] value) {
     metaData.put(key, value);
   }
-  
+
   public byte[] deleteMeta(String key) {
     return metaData.remove(key);
   }
-  
+
   public float getScore() {
     return score;
   }
-  
+
   public void setScore(float score) {
     this.score = score;
   }
@@ -98,7 +99,7 @@
   public String getUrl() {
     return url;
   }
-  
+
   public void setUrl(String url) {
     this.url = url;
   }
@@ -106,7 +107,7 @@
   public String getAnchor() {
     return anchor;
   }
-  
+
   public int getDistance() {
     return distance;
   }
@@ -114,8 +115,7 @@
   @Override
   public String toString() {
     return "ScoreDatum [score=" + score + ", url=" + url + ", anchor=" + anchor
-        + ", distance="+distance + ", metaData=" + metaData + "]";
+        + ", distance=" + distance + ", metaData=" + metaData + "]";
   }
-  
-  
+
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilter.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilter.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilter.java	(working copy)
@@ -26,11 +26,11 @@
 
 /**
  * A contract defining behavior of scoring plugins.
- *
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
- *
+ * 
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
+ * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, FieldPluggable {
@@ -39,74 +39,101 @@
 
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param page new page. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param page
+   *          new page. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(String url, WebPage page) throws ScoringFilterException;
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered pages
-   * have at least one inlink with its score contribution, so filter implementations
-   * may choose to set initial score to zero (unknown value), and then the inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the new
+   * page.
+   * 
+   * @param url
+   *          url of the page
    * @param page
    * @throws ScoringFilterException
    */
-  public void initialScore(String url, WebPage page) throws ScoringFilterException;
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page row. Modifications will be persisted.
-   * @param initSort initial sort value, or a value from previous filters in chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page row. Modifications will be persisted.
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(String url, WebPage page, float initSort) throws ScoringFilterException;
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException;
 
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param row page row
-   * @param scoreData A list of {@link OutlinkedScoreDatum}s for every outlink.
-   * These {@link OutlinkedScoreDatum}s will be passed to
-   * {@link #updateScore(String, OldWebTableRow, List)}
-   * for every outlinked URL.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param row
+   *          page row
+   * @param scoreData
+   *          A list of {@link OutlinkedScoreDatum}s for every outlink. These
+   *          {@link OutlinkedScoreDatum}s will be passed to
+   *          {@link #updateScore(String, OldWebTableRow, List)} for every
+   *          outlinked URL.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @throws ScoringFilterException
    */
-  public void distributeScoreToOutlinks(String fromUrl,
-      WebPage page, Collection<ScoreDatum> scoreData,
-      int allCount) throws ScoringFilterException;
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException;
 
   /**
-   * This method calculates a new score during table update, based on the values contributed
-   * by inlinked pages.
-   * @param url url of the page
+   * This method calculates a new score during table update, based on the values
+   * contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param inlinked list of {@link OutlinkedScoreDatum}s for all inlinks pointing to this URL.
+   * @param inlinked
+   *          list of {@link OutlinkedScoreDatum}s for all inlinks pointing to
+   *          this URL.
    * @throws ScoringFilterException
    */
-  public void updateScore(String url, WebPage page, List<ScoreDatum> inlinkedScoreData)
-  throws ScoringFilterException;
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException;
 
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc document. NOTE: this already contains all information collected
-   * by indexing filters. Implementations may modify this instance, in order to store/remove
-   * some information.
-   * @param row page row
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an argument
-   * to the next scoring filter in chain. NOTE: implementations may also express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          document. NOTE: this already contains all information collected by
+   *          indexing filters. Implementations may modify this instance, in
+   *          order to store/remove some information.
+   * @param row
+   *          page row
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
-  public float indexerScore(String url, NutchDocument doc, WebPage page, float initScore)
-  throws ScoringFilterException;
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException;
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilterException.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilterException.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilterException.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**
Index: src/java/org/apache/nutch/scoring/ScoringFilters.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilters.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/ScoringFilters.java	(working copy)
@@ -35,7 +35,7 @@
 
 /**
  * Creates and caches {@link ScoringFilter} implementing plugins.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class ScoringFilters extends Configured implements ScoringFilter {
@@ -46,7 +46,8 @@
     super(conf);
     ObjectCache objectCache = ObjectCache.get(conf);
     String order = conf.get("scoring.filter.order");
-    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -55,20 +56,23 @@
       }
 
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ScoringFilter.X_POINT_ID);
-        if (point == null) throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ScoringFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, ScoringFilter> filterMap =
-          new HashMap<String, ScoringFilter>();
+        HashMap<String, ScoringFilter> filterMap = new HashMap<String, ScoringFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ScoringFilter filter = (ScoringFilter) extension.getExtensionInstance();
+          ScoringFilter filter = (ScoringFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(filter.getClass().getName())) {
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(ScoringFilter.class.getName(), filterMap.values().toArray(new ScoringFilter[0]));
+          objectCache.setObject(ScoringFilter.class.getName(), filterMap
+              .values().toArray(new ScoringFilter[0]));
         } else {
           ScoringFilter[] filter = new ScoringFilter[orderedFilters.length];
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -79,7 +83,8 @@
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+      this.filters = (ScoringFilter[]) objectCache
+          .getObject(ScoringFilter.class.getName());
     }
   }
 
@@ -86,7 +91,7 @@
   /** Calculate a sort value for Generate. */
   @Override
   public float generatorSortValue(String url, WebPage row, float initSort)
-  throws ScoringFilterException {
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       initSort = filter.generatorSortValue(url, row, initSort);
     }
@@ -95,7 +100,8 @@
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
   @Override
-  public void initialScore(String url, WebPage row) throws ScoringFilterException {
+  public void initialScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.initialScore(url, row);
     }
@@ -103,7 +109,8 @@
 
   /** Calculate a new initial score, used when injecting new pages. */
   @Override
-  public void injectedScore(String url, WebPage row) throws ScoringFilterException {
+  public void injectedScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.injectedScore(url, row);
     }
Index: src/java/org/apache/nutch/scoring/package-info.java
===================================================================
--- src/java/org/apache/nutch/scoring/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/scoring/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
  */
 package org.apache.nutch.scoring;
+
Index: src/java/org/apache/nutch/storage/Host.java
===================================================================
--- src/java/org/apache/nutch/storage/Host.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/Host.java	(working copy)
@@ -1,25 +1,25 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage; 
+package org.apache.nutch.storage;
 
 import org.apache.avro.util.Utf8;
 import org.apache.nutch.util.Bytes;
@@ -26,15 +26,15 @@
 
 @SuppressWarnings("all")
 /** Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet */
-public class Host extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
+public class Host extends org.apache.gora.persistency.impl.PersistentBase
+    implements org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    METADATA(0, "metadata"),
-    OUTLINKS(1, "outlinks"),
-    INLINKS(2, "inlinks"),
-    ;
+    METADATA(0, "metadata"), OUTLINKS(1, "outlinks"), INLINKS(2, "inlinks"), ;
     /**
      * Field's index.
      */
@@ -47,38 +47,51 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "metadata",
-  "outlinks",
-  "inlinks",
-  };
+  public static final String[] _ALL_FIELDS = { "metadata", "outlinks",
+      "inlinks", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -85,103 +98,160 @@
     return Host._ALL_FIELDS.length;
   }
 
-  /** A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc */
-  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-  /** Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-  /** Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+  /**
+   * A multivalued metadata container used for storing a wide variety of host
+   * metadata such as structured web server characterists etc
+   */
+  private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+  /**
+   * Hyperlinks which direct outside of the current host domain these can used
+   * in a histogram style manner to generate host statistics
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+  /**
+   * Hyperlinks which link to pages within the current host domain these can
+   * used in a histogram style manner to generate host statistics
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return metadata;
-    case 1: return outlinks;
-    case 2: return inlinks;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return metadata;
+    case 1:
+      return outlinks;
+    case 2:
+      return inlinks;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 1: outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 2: inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 1:
+      outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 2:
+      inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'metadata' field.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   */
-  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+   * Gets the value of the 'metadata' field. A multivalued metadata container
+   * used for storing a wide variety of host metadata such as structured web
+   * server characterists etc
+   */
+  public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
     return metadata;
   }
 
   /**
-   * Sets the value of the 'metadata' field.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   * @param value the value to set.
+   * Sets the value of the 'metadata' field. A multivalued metadata container
+   * used for storing a wide variety of host metadata such as structured web
+   * server characterists etc * @param value the value to set.
    */
-  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMetadata(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc   * @param value the value to set.
+   * Checks the dirty status of the 'metadata' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * multivalued metadata container used for storing a wide variety of host
+   * metadata such as structured web server characterists etc * @param value the
+   * value to set.
    */
-  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+  public boolean isMetadataDirty(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
     return isDirty(0);
   }
 
   /**
-   * Gets the value of the 'outlinks' field.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+   * Gets the value of the 'outlinks' field. Hyperlinks which direct outside of
+   * the current host domain these can used in a histogram style manner to
+   * generate host statistics
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
     return outlinks;
   }
 
   /**
-   * Sets the value of the 'outlinks' field.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Sets the value of the 'outlinks' field. Hyperlinks which direct outside of
+   * the current host domain these can used in a histogram style manner to
+   * generate host statistics * @param value the value to set.
    */
-  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setOutlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * Hyperlinks which direct outside of the current host domain these can used
+   * in a histogram style manner to generate host statistics * @param value the
+   * value to set.
    */
-  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isOutlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(1);
   }
 
   /**
-   * Gets the value of the 'inlinks' field.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+   * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
+   * within the current host domain these can used in a histogram style manner
+   * to generate host statistics
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
     return inlinks;
   }
 
   /**
-   * Sets the value of the 'inlinks' field.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
+   * within the current host domain these can used in a histogram style manner
+   * to generate host statistics * @param value the value to set.
    */
-  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setInlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics   * @param value the value to set.
+   * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * Hyperlinks which link to pages within the current host domain these can
+   * used in a histogram style manner to generate host statistics * @param value
+   * the value to set.
    */
-  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isInlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(2);
   }
 
@@ -189,17 +259,19 @@
   public static org.apache.nutch.storage.Host.Builder newBuilder() {
     return new org.apache.nutch.storage.Host.Builder();
   }
-  
+
   /** Creates a new Host RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host.Builder other) {
+  public static org.apache.nutch.storage.Host.Builder newBuilder(
+      org.apache.nutch.storage.Host.Builder other) {
     return new org.apache.nutch.storage.Host.Builder(other);
   }
-  
+
   /** Creates a new Host RecordBuilder by copying an existing Host instance */
-  public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host other) {
+  public static org.apache.nutch.storage.Host.Builder newBuilder(
+      org.apache.nutch.storage.Host other) {
     return new org.apache.nutch.storage.Host.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -222,62 +294,67 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for Host instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Host>
-    implements org.apache.avro.data.RecordBuilder<Host> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<Host> implements
+      org.apache.avro.data.RecordBuilder<Host> {
 
-    private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
+    private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
 
     /** Creates a new Builder */
     private Builder() {
       super(org.apache.nutch.storage.Host.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.Host.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing Host instance */
     private Builder(org.apache.nutch.storage.Host other) {
-            super(org.apache.nutch.storage.Host.SCHEMA$);
+      super(org.apache.nutch.storage.Host.SCHEMA$);
       if (isValidValue(fields()[0], other.metadata)) {
-        this.metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[0].schema(), other.metadata);
+        this.metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) data()
+            .deepCopy(fields()[0].schema(), other.metadata);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.outlinks)) {
-        this.outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[1].schema(), other.outlinks);
+        this.outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[1].schema(), other.outlinks);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.inlinks)) {
-        this.inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[2].schema(), other.inlinks);
+        this.inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[2].schema(), other.inlinks);
         fieldSetFlags()[2] = true;
       }
     }
 
     /** Gets the value of the 'metadata' field */
-    public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
       return metadata;
     }
-    
+
     /** Sets the value of the 'metadata' field */
-    public org.apache.nutch.storage.Host.Builder setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+    public org.apache.nutch.storage.Host.Builder setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
       validate(fields()[0], value);
       this.metadata = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'metadata' field has been set */
     public boolean hasMetadata() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'metadata' field */
     public org.apache.nutch.storage.Host.Builder clearMetadata() {
       metadata = null;
@@ -284,25 +361,26 @@
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'outlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
       return outlinks;
     }
-    
+
     /** Sets the value of the 'outlinks' field */
-    public org.apache.nutch.storage.Host.Builder setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.Host.Builder setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[1], value);
       this.outlinks = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'outlinks' field has been set */
     public boolean hasOutlinks() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'outlinks' field */
     public org.apache.nutch.storage.Host.Builder clearOutlinks() {
       outlinks = null;
@@ -309,25 +387,26 @@
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'inlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
       return inlinks;
     }
-    
+
     /** Sets the value of the 'inlinks' field */
-    public org.apache.nutch.storage.Host.Builder setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.Host.Builder setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[2], value);
       this.inlinks = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'inlinks' field has been set */
     public boolean hasInlinks() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'inlinks' field */
     public org.apache.nutch.storage.Host.Builder clearInlinks() {
       inlinks = null;
@@ -334,14 +413,20 @@
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public Host build() {
       try {
         Host record = new Host();
-        record.metadata = fieldSetFlags()[0] ? this.metadata : (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[0]));
-        record.outlinks = fieldSetFlags()[1] ? this.outlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[1]));
-        record.inlinks = fieldSetFlags()[2] ? this.inlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[2]));
+        record.metadata = fieldSetFlags()[0] ? this.metadata
+            : (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[0]));
+        record.outlinks = fieldSetFlags()[1] ? this.outlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[1]));
+        record.inlinks = fieldSetFlags()[2] ? this.inlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[2]));
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -348,115 +433,155 @@
       }
     }
   }
-  
-  public Host.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public Host.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public Host newInstance(){
+  public Host newInstance() {
     return newBuilder().build();
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public boolean contains(String key) {
     return metadata.containsKey(new Utf8(key));
   }
-  
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public String getValue(String key, String defaultValue) {
-    if (!contains(key)) return defaultValue;
+    if (!contains(key))
+      return defaultValue;
     return Bytes.toString(metadata.get(new Utf8(key)));
   }
-  
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public int getInt(String key, int defaultValue) {
-    if (!contains(key)) return defaultValue;
-    return Integer.parseInt(getValue(key,null));
+    if (!contains(key))
+      return defaultValue;
+    return Integer.parseInt(getValue(key, null));
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   public long getLong(String key, long defaultValue) {
-    if (!contains(key)) return defaultValue;
-    return Long.parseLong(getValue(key,null));
+    if (!contains(key))
+      return defaultValue;
+    return Long.parseLong(getValue(key, null));
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends Host implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'metadata' field.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   */
-	  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'metadata' field.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   * @param value the value to set.
-	   */
-	  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc	   * @param value the value to set.
-	   */
-	  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'outlinks' field.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'outlinks' field.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'inlinks' field.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'inlinks' field.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics	   * @param value the value to set.
-	   */
-	  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends Host implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'metadata' field. A multivalued metadata container
+     * used for storing a wide variety of host metadata such as structured web
+     * server characterists etc
+     */
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'metadata' field. A multivalued metadata container
+     * used for storing a wide variety of host metadata such as structured web
+     * server characterists etc * @param value the value to set.
+     */
+    public void setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'metadata' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * multivalued metadata container used for storing a wide variety of host
+     * metadata such as structured web server characterists etc * @param value
+     * the value to set.
+     */
+    public boolean isMetadataDirty(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'outlinks' field. Hyperlinks which direct outside
+     * of the current host domain these can used in a histogram style manner to
+     * generate host statistics
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'outlinks' field. Hyperlinks which direct outside
+     * of the current host domain these can used in a histogram style manner to
+     * generate host statistics * @param value the value to set.
+     */
+    public void setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Hyperlinks which direct outside of the current host domain these can used
+     * in a histogram style manner to generate host statistics * @param value
+     * the value to set.
+     */
+    public boolean isOutlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
+     * within the current host domain these can used in a histogram style manner
+     * to generate host statistics
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
+     * within the current host domain these can used in a histogram style manner
+     * to generate host statistics * @param value the value to set.
+     */
+    public void setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Hyperlinks which link to pages within the current host domain these can
+     * used in a histogram style manner to generate host statistics * @param
+     * value the value to set.
+     */
+    public boolean isInlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/Mark.java
===================================================================
--- src/java/org/apache/nutch/storage/Mark.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/Mark.java	(working copy)
@@ -19,8 +19,8 @@
 import org.apache.avro.util.Utf8;
 
 public enum Mark {
-  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"),
-  PARSE_MARK("__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
+  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"), PARSE_MARK(
+      "__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
 
   private Utf8 name;
 
@@ -29,7 +29,7 @@
   }
 
   public void putMark(WebPage page, Utf8 markValue) {
-      page.getMarkers().put(name, markValue);
+    page.getMarkers().put(name, markValue);
   }
 
   public void putMark(WebPage page, String markValue) {
@@ -46,7 +46,9 @@
 
   /**
    * Remove the mark only if the mark is present on the page.
-   * @param page The page to remove the mark from.
+   * 
+   * @param page
+   *          The page to remove the mark from.
    * @return If the mark was present.
    */
   public Utf8 removeMarkIfExist(WebPage page) {
@@ -55,8 +57,8 @@
     }
     return null;
   }
-  
+
   public Utf8 getName() {
-	return name;
+    return name;
   }
 }
Index: src/java/org/apache/nutch/storage/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ParseStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/ParseStatus.java	(working copy)
@@ -1,36 +1,38 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
+
 @SuppressWarnings("all")
 /** A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage */
-public class ParseStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
+public class ParseStatus extends
+    org.apache.gora.persistency.impl.PersistentBase implements
+    org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    MAJOR_CODE(0, "majorCode"),
-    MINOR_CODE(1, "minorCode"),
-    ARGS(2, "args"),
-    ;
+    MAJOR_CODE(0, "majorCode"), MINOR_CODE(1, "minorCode"), ARGS(2, "args"), ;
     /**
      * Field's index.
      */
@@ -43,38 +45,51 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "majorCode",
-  "minorCode",
-  "args",
-  };
+  public static final String[] _ALL_FIELDS = { "majorCode", "minorCode",
+      "args", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -81,53 +96,98 @@
     return ParseStatus._ALL_FIELDS.length;
   }
 
-  /** Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) */
+  /**
+   * Major parsing status' including NOTPARSED (Parsing was not performed),
+   * SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more
+   * specific error message in arguments.)
+   */
   private int majorCode;
-  /** Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. */
+  /**
+   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of
+   * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive
+   * to redirect to another URL. The target URL can be retrieved from the
+   * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
+   * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
+   * Content was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other
+   * related parts of the content are needed to complete parsing. The list of
+   * URLs to missing parts may be provided in arguments. The Fetcher may decide
+   * to fetch these parts at once, then put them into Content.metadata, and
+   * supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There
+   * was no content to be parsed - probably caused by errors at protocol stage.
+   */
   private int minorCode;
-  /** Optional arguments supplied to compliment and/or justify the parse status code. */
+  /**
+   * Optional arguments supplied to compliment and/or justify the parse status
+   * code.
+   */
   private java.util.List<java.lang.CharSequence> args;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return majorCode;
-    case 1: return minorCode;
-    case 2: return args;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return majorCode;
+    case 1:
+      return minorCode;
+    case 2:
+      return args;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: majorCode = (java.lang.Integer)(value); break;
-    case 1: minorCode = (java.lang.Integer)(value); break;
-    case 2: args = (java.util.List<java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      majorCode = (java.lang.Integer) (value);
+      break;
+    case 1:
+      minorCode = (java.lang.Integer) (value);
+      break;
+    case 2:
+      args = (java.util.List<java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyListWrapper(
+              (java.util.List) value));
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'majorCode' field.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   */
+   * Gets the value of the 'majorCode' field. Major parsing status' including
+   * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
+   * (General failure. There may be a more specific error message in arguments.)
+   */
   public java.lang.Integer getMajorCode() {
     return majorCode;
   }
 
   /**
-   * Sets the value of the 'majorCode' field.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   * @param value the value to set.
+   * Sets the value of the 'majorCode' field. Major parsing status' including
+   * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
+   * (General failure. There may be a more specific error message in arguments.)
+   * * @param value the value to set.
    */
   public void setMajorCode(java.lang.Integer value) {
     this.majorCode = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)   * @param value the value to set.
+   * Checks the dirty status of the 'majorCode' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Major
+   * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
+   * (Parsing succeeded), FAILED (General failure. There may be a more specific
+   * error message in arguments.) * @param value the value to set.
    */
   public boolean isMajorCodeDirty(java.lang.Integer value) {
     return isDirty(0);
@@ -134,24 +194,65 @@
   }
 
   /**
-   * Gets the value of the 'minorCode' field.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   */
+   * Gets the value of the 'minorCode' field. Minor parsing status' including
+   * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+   * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+   * another URL. The target URL can be retrieved from the arguments.,
+   * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+   * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+   * was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage.
+   */
   public java.lang.Integer getMinorCode() {
     return minorCode;
   }
 
   /**
-   * Sets the value of the 'minorCode' field.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   * @param value the value to set.
+   * Sets the value of the 'minorCode' field. Minor parsing status' including
+   * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+   * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+   * another URL. The target URL can be retrieved from the arguments.,
+   * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+   * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+   * was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage. * @param value the value to set.
    */
   public void setMinorCode(java.lang.Integer value) {
     this.minorCode = value;
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.   * @param value the value to set.
+   * Checks the dirty status of the 'minorCode' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Minor
+   * parsing status' including SUCCESS_OK - Successful parse devoid of anomalies
+   * or issues, SUCCESS_REDIRECT - Parsed content contains a directive to
+   * redirect to another URL. The target URL can be retrieved from the
+   * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
+   * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
+   * Content was truncated, but the parser cannot handle incomplete content.,
+   * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
+   * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
+   * Other related parts of the content are needed to complete parsing. The list
+   * of URLs to missing parts may be provided in arguments. The Fetcher may
+   * decide to fetch these parts at once, then put them into Content.metadata,
+   * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
+   * There was no content to be parsed - probably caused by errors at protocol
+   * stage. * @param value the value to set.
    */
   public boolean isMinorCodeDirty(java.lang.Integer value) {
     return isDirty(1);
@@ -158,24 +259,29 @@
   }
 
   /**
-   * Gets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   */
+   * Gets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the parse status code.
+   */
   public java.util.List<java.lang.CharSequence> getArgs() {
     return args;
   }
 
   /**
-   * Sets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   * @param value the value to set.
+   * Sets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the parse status code. * @param value the value
+   * to set.
    */
   public void setArgs(java.util.List<java.lang.CharSequence> value) {
-    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Optional arguments supplied to compliment and/or justify the parse status code.   * @param value the value to set.
+   * Checks the dirty status of the 'args' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Optional
+   * arguments supplied to compliment and/or justify the parse status code. * @param
+   * value the value to set.
    */
   public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
     return isDirty(2);
@@ -185,17 +291,22 @@
   public static org.apache.nutch.storage.ParseStatus.Builder newBuilder() {
     return new org.apache.nutch.storage.ParseStatus.Builder();
   }
-  
+
   /** Creates a new ParseStatus RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus.Builder other) {
+  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
+      org.apache.nutch.storage.ParseStatus.Builder other) {
     return new org.apache.nutch.storage.ParseStatus.Builder(other);
   }
-  
-  /** Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus instance */
-  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus other) {
+
+  /**
+   * Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus
+   * instance
+   */
+  public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
+      org.apache.nutch.storage.ParseStatus other) {
     return new org.apache.nutch.storage.ParseStatus.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -218,12 +329,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for ParseStatus instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<ParseStatus>
-    implements org.apache.avro.data.RecordBuilder<ParseStatus> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<ParseStatus> implements
+      org.apache.avro.data.RecordBuilder<ParseStatus> {
 
     private int majorCode;
     private int minorCode;
@@ -233,25 +345,28 @@
     private Builder() {
       super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.ParseStatus.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing ParseStatus instance */
     private Builder(org.apache.nutch.storage.ParseStatus other) {
-            super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
+      super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
       if (isValidValue(fields()[0], other.majorCode)) {
-        this.majorCode = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.majorCode);
+        this.majorCode = (java.lang.Integer) data().deepCopy(
+            fields()[0].schema(), other.majorCode);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.minorCode)) {
-        this.minorCode = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.minorCode);
+        this.minorCode = (java.lang.Integer) data().deepCopy(
+            fields()[1].schema(), other.minorCode);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.args)) {
-        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(fields()[2].schema(), other.args);
+        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(
+            fields()[2].schema(), other.args);
         fieldSetFlags()[2] = true;
       }
     }
@@ -260,68 +375,69 @@
     public java.lang.Integer getMajorCode() {
       return majorCode;
     }
-    
+
     /** Sets the value of the 'majorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder setMajorCode(int value) {
       validate(fields()[0], value);
       this.majorCode = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'majorCode' field has been set */
     public boolean hasMajorCode() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'majorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearMajorCode() {
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'minorCode' field */
     public java.lang.Integer getMinorCode() {
       return minorCode;
     }
-    
+
     /** Sets the value of the 'minorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder setMinorCode(int value) {
       validate(fields()[1], value);
       this.minorCode = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'minorCode' field has been set */
     public boolean hasMinorCode() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'minorCode' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearMinorCode() {
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'args' field */
     public java.util.List<java.lang.CharSequence> getArgs() {
       return args;
     }
-    
+
     /** Sets the value of the 'args' field */
-    public org.apache.nutch.storage.ParseStatus.Builder setArgs(java.util.List<java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.ParseStatus.Builder setArgs(
+        java.util.List<java.lang.CharSequence> value) {
       validate(fields()[2], value);
       this.args = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'args' field has been set */
     public boolean hasArgs() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'args' field */
     public org.apache.nutch.storage.ParseStatus.Builder clearArgs() {
       args = null;
@@ -328,14 +444,18 @@
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public ParseStatus build() {
       try {
         ParseStatus record = new ParseStatus();
-        record.majorCode = fieldSetFlags()[0] ? this.majorCode : (java.lang.Integer) defaultValue(fields()[0]);
-        record.minorCode = fieldSetFlags()[1] ? this.minorCode : (java.lang.Integer) defaultValue(fields()[1]);
-        record.args = fieldSetFlags()[2] ? this.args : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[2]));
+        record.majorCode = fieldSetFlags()[0] ? this.majorCode
+            : (java.lang.Integer) defaultValue(fields()[0]);
+        record.minorCode = fieldSetFlags()[1] ? this.minorCode
+            : (java.lang.Integer) defaultValue(fields()[1]);
+        record.args = fieldSetFlags()[2] ? this.args
+            : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper(
+                (java.util.List) defaultValue(fields()[2]));
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -342,92 +462,155 @@
       }
     }
   }
-  
-  public ParseStatus.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public ParseStatus.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public ParseStatus newInstance(){
+  public ParseStatus newInstance() {
     return newBuilder().build();
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends ParseStatus implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'majorCode' field.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   */
-	  public java.lang.Integer getMajorCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'majorCode' field.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   * @param value the value to set.
-	   */
-	  public void setMajorCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)	   * @param value the value to set.
-	   */
-	  public boolean isMajorCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'minorCode' field.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   */
-	  public java.lang.Integer getMinorCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'minorCode' field.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   * @param value the value to set.
-	   */
-	  public void setMinorCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.	   * @param value the value to set.
-	   */
-	  public boolean isMinorCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   */
-	  public java.util.List<java.lang.CharSequence> getArgs() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   * @param value the value to set.
-	   */
-	  public void setArgs(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Optional arguments supplied to compliment and/or justify the parse status code.	   * @param value the value to set.
-	   */
-	  public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends ParseStatus implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'majorCode' field. Major parsing status' including
+     * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
+     * FAILED (General failure. There may be a more specific error message in
+     * arguments.)
+     */
+    public java.lang.Integer getMajorCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'majorCode' field. Major parsing status' including
+     * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
+     * FAILED (General failure. There may be a more specific error message in
+     * arguments.) * @param value the value to set.
+     */
+    public void setMajorCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'majorCode' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Major
+     * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
+     * (Parsing succeeded), FAILED (General failure. There may be a more
+     * specific error message in arguments.) * @param value the value to set.
+     */
+    public boolean isMajorCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'minorCode' field. Minor parsing status' including
+     * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+     * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+     * another URL. The target URL can be retrieved from the arguments.,
+     * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+     * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+     * was truncated, but the parser cannot handle incomplete content.,
+     * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
+     * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
+     * failed. Other related parts of the content are needed to complete
+     * parsing. The list of URLs to missing parts may be provided in arguments.
+     * The Fetcher may decide to fetch these parts at once, then put them into
+     * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
+     * - Parsing failed. There was no content to be parsed - probably caused by
+     * errors at protocol stage.
+     */
+    public java.lang.Integer getMinorCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'minorCode' field. Minor parsing status' including
+     * SUCCESS_OK - Successful parse devoid of anomalies or issues,
+     * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
+     * another URL. The target URL can be retrieved from the arguments.,
+     * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
+     * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
+     * was truncated, but the parser cannot handle incomplete content.,
+     * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
+     * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
+     * failed. Other related parts of the content are needed to complete
+     * parsing. The list of URLs to missing parts may be provided in arguments.
+     * The Fetcher may decide to fetch these parts at once, then put them into
+     * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
+     * - Parsing failed. There was no content to be parsed - probably caused by
+     * errors at protocol stage. * @param value the value to set.
+     */
+    public void setMinorCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'minorCode' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Minor
+     * parsing status' including SUCCESS_OK - Successful parse devoid of
+     * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a
+     * directive to redirect to another URL. The target URL can be retrieved
+     * from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception
+     * occured which may be retrieved from the arguments., FAILED_TRUNCATED -
+     * Parsing failed. Content was truncated, but the parser cannot handle
+     * incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid
+     * format e.g. the content may be corrupted or of wrong type.,
+     * FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content
+     * are needed to complete parsing. The list of URLs to missing parts may be
+     * provided in arguments. The Fetcher may decide to fetch these parts at
+     * once, then put them into Content.metadata, and supply them for
+     * re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content
+     * to be parsed - probably caused by errors at protocol stage. * @param
+     * value the value to set.
+     */
+    public boolean isMinorCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the parse status code.
+     */
+    public java.util.List<java.lang.CharSequence> getArgs() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the parse status code. * @param value the value
+     * to set.
+     */
+    public void setArgs(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'args' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Optional arguments supplied to compliment and/or justify the parse status
+     * code. * @param value the value to set.
+     */
+    public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ProtocolStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/ProtocolStatus.java	(working copy)
@@ -1,39 +1,40 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
 
 import org.apache.nutch.protocol.ProtocolStatusUtils;
 
 @SuppressWarnings("all")
 /** A nested container representing data captured from web server responses. */
-public class ProtocolStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
+public class ProtocolStatus extends
+    org.apache.gora.persistency.impl.PersistentBase implements
+    org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    CODE(0, "code"),
-    ARGS(1, "args"),
-    LAST_MODIFIED(2, "lastModified"),
-    ;
+    CODE(0, "code"), ARGS(1, "args"), LAST_MODIFIED(2, "lastModified"), ;
     /**
      * Field's index.
      */
@@ -46,38 +47,50 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "code",
-  "args",
-  "lastModified",
-  };
+  public static final String[] _ALL_FIELDS = { "code", "args", "lastModified", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -84,53 +97,140 @@
     return ProtocolStatus._ALL_FIELDS.length;
   }
 
-  /** A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. */
+  /**
+   * A protocol response code which can be one of SUCCESS - content was
+   * retrieved without errors, FAILED - Content was not retrieved. Any further
+   * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
+   * found. Application may attempt to retry later, GONE - Resource is gone,
+   * MOVED - Resource has moved permanently. New url should be found in args,
+   * TEMP_MOVED - Resource has moved temporarily. New url should be found in
+   * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
+   * Application may retry immediately., EXCEPTION - Unspecified exception
+   * occured. Further information may be provided in args., ACCESS_DENIED -
+   * Access denied - authorization required, but missing/incorrect.,
+   * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
+   * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since
+   * the last fetch., WOULDBLOCK - Request was refused by protocol plugins,
+   * because it would block. The expected number of milliseconds to wait before
+   * retry may be provided in args., BLOCKED - Thread was blocked http.max.delays
+   * times during fetching.
+   */
   private int code;
-  /** Optional arguments supplied to compliment and/or justify the response code. */
+  /**
+   * Optional arguments supplied to compliment and/or justify the response code.
+   */
   private java.util.List<java.lang.CharSequence> args;
-  /** A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. */
+  /**
+   * A server reponse indicating when this page was last modified, this can be
+   * unreliable at times hence this is used as a default fall back value for the
+   * preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage
+   * itself.
+   */
   private long lastModified;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return code;
-    case 1: return args;
-    case 2: return lastModified;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return code;
+    case 1:
+      return args;
+    case 2:
+      return lastModified;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: code = (java.lang.Integer)(value); break;
-    case 1: args = (java.util.List<java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
-    case 2: lastModified = (java.lang.Long)(value); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      code = (java.lang.Integer) (value);
+      break;
+    case 1:
+      args = (java.util.List<java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyListWrapper(
+              (java.util.List) value));
+      break;
+    case 2:
+      lastModified = (java.lang.Long) (value);
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'code' field.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   */
+   * Gets the value of the 'code' field. A protocol response code which can be
+   * one of SUCCESS - content was retrieved without errors, FAILED - Content was
+   * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
+   * - This protocol was not found. Application may attempt to retry later, GONE
+   * - Resource is gone, MOVED - Resource has moved permanently. New url should
+   * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
+   * should be found in args., NOTFOUND - Resource was not found, RETRY -
+   * Temporary failure. Application may retry immediately., EXCEPTION -
+   * Unspecified exception occured. Further information may be provided in
+   * args., ACCESS_DENIED - Access denied - authorization required, but
+   * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
+   * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
+   * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
+   * refused by protocol plugins, because it would block. The expected number of
+   * milliseconds to wait before retry may be provided in args., BLOCKED -
+   * Thread was blocked http.max.delays times during fetching.
+   */
   public java.lang.Integer getCode() {
     return code;
   }
 
   /**
-   * Sets the value of the 'code' field.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   * @param value the value to set.
+   * Sets the value of the 'code' field. A protocol response code which can be
+   * one of SUCCESS - content was retrieved without errors, FAILED - Content was
+   * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
+   * - This protocol was not found. Application may attempt to retry later, GONE
+   * - Resource is gone, MOVED - Resource has moved permanently. New url should
+   * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
+   * should be found in args., NOTFOUND - Resource was not found, RETRY -
+   * Temporary failure. Application may retry immediately., EXCEPTION -
+   * Unspecified exception occured. Further information may be provided in
+   * args., ACCESS_DENIED - Access denied - authorization required, but
+   * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
+   * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
+   * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
+   * refused by protocol plugins, because it would block. The expected number of
+   * milliseconds to wait before retry may be provided in args., BLOCKED -
+   * Thread was blocked http.max.delays times during fetching. * @param value
+   * the value to set.
    */
   public void setCode(java.lang.Integer value) {
     this.code = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.   * @param value the value to set.
+   * Checks the dirty status of the 'code' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * protocol response code which can be one of SUCCESS - content was retrieved
+   * without errors, FAILED - Content was not retrieved. Any further errors may
+   * be indicated in args, PROTO_NOT_FOUND - This protocol was not found.
+   * Application may attempt to retry later, GONE - Resource is gone, MOVED -
+   * Resource has moved permanently. New url should be found in args, TEMP_MOVED
+   * - Resource has moved temporarily. New url should be found in args.,
+   * NOTFOUND - Resource was not found, RETRY - Temporary failure. Application
+   * may retry immediately., EXCEPTION - Unspecified exception occured. Further
+   * information may be provided in args., ACCESS_DENIED - Access denied -
+   * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+   * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+   * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch.,
+   * WOULDBLOCK - Request was refused by protocol plugins, because it would
+   * block. The expected number of milliseconds to wait before retry may be
+   * provided in args., BLOCKED - Thread was blocked http.max.delays times
+   * during fetching. * @param value the value to set.
    */
   public boolean isCodeDirty(java.lang.Integer value) {
     return isDirty(0);
@@ -137,24 +237,29 @@
   }
 
   /**
-   * Gets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the response code.   */
+   * Gets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the response code.
+   */
   public java.util.List<java.lang.CharSequence> getArgs() {
     return args;
   }
 
   /**
-   * Sets the value of the 'args' field.
-   * Optional arguments supplied to compliment and/or justify the response code.   * @param value the value to set.
+   * Sets the value of the 'args' field. Optional arguments supplied to
+   * compliment and/or justify the response code. * @param value the value to
+   * set.
    */
   public void setArgs(java.util.List<java.lang.CharSequence> value) {
-    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+    this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Optional arguments supplied to compliment and/or justify the response code.   * @param value the value to set.
+   * Checks the dirty status of the 'args' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Optional
+   * arguments supplied to compliment and/or justify the response code. * @param
+   * value the value to set.
    */
   public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
     return isDirty(1);
@@ -161,24 +266,34 @@
   }
 
   /**
-   * Gets the value of the 'lastModified' field.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   */
+   * Gets the value of the 'lastModified' field. A server reponse indicating
+   * when this page was last modified, this can be unreliable at times hence
+   * this is used as a default fall back value for the preferred 'modifiedTime'
+   * and 'preModifiedTime' obtained from the WebPage itself.
+   */
   public java.lang.Long getLastModified() {
     return lastModified;
   }
 
   /**
-   * Sets the value of the 'lastModified' field.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   * @param value the value to set.
+   * Sets the value of the 'lastModified' field. A server reponse indicating
+   * when this page was last modified, this can be unreliable at times hence
+   * this is used as a default fall back value for the preferred 'modifiedTime'
+   * and 'preModifiedTime' obtained from the WebPage itself. * @param value the
+   * value to set.
    */
   public void setLastModified(java.lang.Long value) {
     this.lastModified = value;
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.   * @param value the value to set.
+   * Checks the dirty status of the 'lastModified' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A server
+   * reponse indicating when this page was last modified, this can be unreliable
+   * at times hence this is used as a default fall back value for the preferred
+   * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
+   * value the value to set.
    */
   public boolean isLastModifiedDirty(java.lang.Long value) {
     return isDirty(2);
@@ -188,17 +303,22 @@
   public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder() {
     return new org.apache.nutch.storage.ProtocolStatus.Builder();
   }
-  
+
   /** Creates a new ProtocolStatus RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
+  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
+      org.apache.nutch.storage.ProtocolStatus.Builder other) {
     return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
   }
-  
-  /** Creates a new ProtocolStatus RecordBuilder by copying an existing ProtocolStatus instance */
-  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus other) {
+
+  /**
+   * Creates a new ProtocolStatus RecordBuilder by copying an existing
+   * ProtocolStatus instance
+   */
+  public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
+      org.apache.nutch.storage.ProtocolStatus other) {
     return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -221,12 +341,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for ProtocolStatus instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<ProtocolStatus>
-    implements org.apache.avro.data.RecordBuilder<ProtocolStatus> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<ProtocolStatus>
+      implements org.apache.avro.data.RecordBuilder<ProtocolStatus> {
 
     private int code;
     private java.util.List<java.lang.CharSequence> args;
@@ -236,25 +357,28 @@
     private Builder() {
       super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing ProtocolStatus instance */
     private Builder(org.apache.nutch.storage.ProtocolStatus other) {
-            super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
+      super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
       if (isValidValue(fields()[0], other.code)) {
-        this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.code);
+        this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(),
+            other.code);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.args)) {
-        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(fields()[1].schema(), other.args);
+        this.args = (java.util.List<java.lang.CharSequence>) data().deepCopy(
+            fields()[1].schema(), other.args);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.lastModified)) {
-        this.lastModified = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.lastModified);
+        this.lastModified = (java.lang.Long) data().deepCopy(
+            fields()[2].schema(), other.lastModified);
         fieldSetFlags()[2] = true;
       }
     }
@@ -263,44 +387,45 @@
     public java.lang.Integer getCode() {
       return code;
     }
-    
+
     /** Sets the value of the 'code' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder setCode(int value) {
       validate(fields()[0], value);
       this.code = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'code' field has been set */
     public boolean hasCode() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'code' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearCode() {
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'args' field */
     public java.util.List<java.lang.CharSequence> getArgs() {
       return args;
     }
-    
+
     /** Sets the value of the 'args' field */
-    public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(java.util.List<java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(
+        java.util.List<java.lang.CharSequence> value) {
       validate(fields()[1], value);
       this.args = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'args' field has been set */
     public boolean hasArgs() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'args' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearArgs() {
       args = null;
@@ -307,38 +432,43 @@
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'lastModified' field */
     public java.lang.Long getLastModified() {
       return lastModified;
     }
-    
+
     /** Sets the value of the 'lastModified' field */
-    public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(long value) {
+    public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(
+        long value) {
       validate(fields()[2], value);
       this.lastModified = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'lastModified' field has been set */
     public boolean hasLastModified() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'lastModified' field */
     public org.apache.nutch.storage.ProtocolStatus.Builder clearLastModified() {
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     @Override
     public ProtocolStatus build() {
       try {
         ProtocolStatus record = new ProtocolStatus();
-        record.code = fieldSetFlags()[0] ? this.code : (java.lang.Integer) defaultValue(fields()[0]);
-        record.args = fieldSetFlags()[1] ? this.args : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[1]));
-        record.lastModified = fieldSetFlags()[2] ? this.lastModified : (java.lang.Long) defaultValue(fields()[2]);
+        record.code = fieldSetFlags()[0] ? this.code
+            : (java.lang.Integer) defaultValue(fields()[0]);
+        record.args = fieldSetFlags()[1] ? this.args
+            : (java.util.List<java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyListWrapper(
+                (java.util.List) defaultValue(fields()[1]));
+        record.lastModified = fieldSetFlags()[2] ? this.lastModified
+            : (java.lang.Long) defaultValue(fields()[2]);
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -345,101 +475,173 @@
       }
     }
   }
-  
-  public ProtocolStatus.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public ProtocolStatus.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public ProtocolStatus newInstance(){
+  public ProtocolStatus newInstance() {
     return newBuilder().build();
   }
 
-  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
+  // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
+  // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
   /**
    * A convenience method which returns a successful {@link ProtocolStatus}.
+   * 
    * @return the {@link ProtocolStatus} value for 200 (success).
    */
   public boolean isSuccess() {
-    return code == ProtocolStatusUtils.SUCCESS; 
+    return code == ProtocolStatusUtils.SUCCESS;
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends ProtocolStatus implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'code' field.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   */
-	  public java.lang.Integer getCode() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'code' field.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   * @param value the value to set.
-	   */
-	  public void setCode(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.	   * @param value the value to set.
-	   */
-	  public boolean isCodeDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   */
-	  public java.util.List<java.lang.CharSequence> getArgs() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'args' field.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   * @param value the value to set.
-	   */
-	  public void setArgs(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Optional arguments supplied to compliment and/or justify the response code.	   * @param value the value to set.
-	   */
-	  public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'lastModified' field.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   */
-	  public java.lang.Long getLastModified() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'lastModified' field.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   * @param value the value to set.
-	   */
-	  public void setLastModified(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.	   * @param value the value to set.
-	   */
-	  public boolean isLastModifiedDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends ProtocolStatus implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'code' field. A protocol response code which can be
+     * one of SUCCESS - content was retrieved without errors, FAILED - Content
+     * was not retrieved. Any further errors may be indicated in args,
+     * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
+     * retry later, GONE - Resource is gone, MOVED - Resource has moved
+     * permanently. New url should be found in args, TEMP_MOVED - Resource has
+     * moved temporarily. New url should be found in args., NOTFOUND - Resource
+     * was not found, RETRY - Temporary failure. Application may retry
+     * immediately., EXCEPTION - Unspecified exception occured. Further
+     * information may be provided in args., ACCESS_DENIED - Access denied -
+     * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+     * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+     * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
+     * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
+     * would block. The expected number of milliseconds to wait before retry may
+     * be provided in args., BLOCKED - Thread was blocked http.max.delays times
+     * during fetching.
+     */
+    public java.lang.Integer getCode() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'code' field. A protocol response code which can be
+     * one of SUCCESS - content was retrieved without errors, FAILED - Content
+     * was not retrieved. Any further errors may be indicated in args,
+     * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
+     * retry later, GONE - Resource is gone, MOVED - Resource has moved
+     * permanently. New url should be found in args, TEMP_MOVED - Resource has
+     * moved temporarily. New url should be found in args., NOTFOUND - Resource
+     * was not found, RETRY - Temporary failure. Application may retry
+     * immediately., EXCEPTION - Unspecified exception occured. Further
+     * information may be provided in args., ACCESS_DENIED - Access denied -
+     * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
+     * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
+     * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
+     * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
+     * would block. The expected number of milliseconds to wait before retry may
+     * be provided in args., BLOCKED - Thread was blocked http.max.delays times
+     * during fetching. * @param value the value to set.
+     */
+    public void setCode(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'code' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * protocol response code which can be one of SUCCESS - content was
+     * retrieved without errors, FAILED - Content was not retrieved. Any further
+     * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
+     * found. Application may attempt to retry later, GONE - Resource is gone,
+     * MOVED - Resource has moved permanently. New url should be found in args,
+     * TEMP_MOVED - Resource has moved temporarily. New url should be found in
+     * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
+     * Application may retry immediately., EXCEPTION - Unspecified exception
+     * occured. Further information may be provided in args., ACCESS_DENIED -
+     * Access denied - authorization required, but missing/incorrect.,
+     * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
+     * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged
+     * since the last fetch., WOULDBLOCK - Request was refused by protocol
+     * plugins, because it would block. The expected number of milliseconds to
+     * wait before retry may be provided in args., BLOCKED - Thread was blocked
+     * http.max.delays times during fetching. * @param value the value to set.
+     */
+    public boolean isCodeDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the response code.
+     */
+    public java.util.List<java.lang.CharSequence> getArgs() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'args' field. Optional arguments supplied to
+     * compliment and/or justify the response code. * @param value the value to
+     * set.
+     */
+    public void setArgs(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'args' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Optional arguments supplied to compliment and/or justify the response
+     * code. * @param value the value to set.
+     */
+    public boolean isArgsDirty(java.util.List<java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'lastModified' field. A server reponse indicating
+     * when this page was last modified, this can be unreliable at times hence
+     * this is used as a default fall back value for the preferred
+     * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.
+     */
+    public java.lang.Long getLastModified() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'lastModified' field. A server reponse indicating
+     * when this page was last modified, this can be unreliable at times hence
+     * this is used as a default fall back value for the preferred
+     * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
+     * value the value to set.
+     */
+    public void setLastModified(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'lastModified' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. A
+     * server reponse indicating when this page was last modified, this can be
+     * unreliable at times hence this is used as a default fall back value for
+     * the preferred 'modifiedTime' and 'preModifiedTime' obtained from the
+     * WebPage itself. * @param value the value to set.
+     */
+    public boolean isLastModifiedDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/StorageUtils.java
===================================================================
--- src/java/org/apache/nutch/storage/StorageUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/StorageUtils.java	(working copy)
@@ -35,13 +35,14 @@
 import java.util.Iterator;
 
 /**
- * Entry point to Gora store/mapreduce functionality.
- * Translates the concept of "crawlid" to the corresponding Gora support.
+ * Entry point to Gora store/mapreduce functionality. Translates the concept of
+ * "crawlid" to the corresponding Gora support.
  */
 public class StorageUtils {
 
-  /** Creates a store for the given persistentClass.
-   * Currently supports Webpage and Host stores.
+  /**
+   * Creates a store for the given persistentClass. Currently supports Webpage
+   * and Host stores.
    * 
    * @param conf
    * @param keyClass
@@ -51,8 +52,9 @@
    * @throws GoraException
    */
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> DataStore<K, V> createWebStore(Configuration conf,
-      Class<K> keyClass, Class<V> persistentClass) throws ClassNotFoundException, GoraException {
+  public static <K, V extends Persistent> DataStore<K, V> createWebStore(
+      Configuration conf, Class<K> keyClass, Class<V> persistentClass)
+      throws ClassNotFoundException, GoraException {
 
     String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
     String schemaPrefix = "";
@@ -59,7 +61,7 @@
     if (!crawlId.isEmpty()) {
       schemaPrefix = crawlId + "_";
     }
-      
+
     String schema;
     if (WebPage.class.equals(persistentClass)) {
       schema = conf.get("storage.schema.webpage", "webpage");
@@ -68,52 +70,52 @@
       schema = conf.get("storage.schema.host", "host");
       conf.set("preferred.schema.name", schemaPrefix + "host");
     } else {
-      throw new UnsupportedOperationException("Unable to create store for class " + persistentClass);
+      throw new UnsupportedOperationException(
+          "Unable to create store for class " + persistentClass);
     }
 
-    Class<? extends DataStore<K, V>> dataStoreClass =
-      (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
-    return DataStoreFactory.createDataStore(dataStoreClass,
-            keyClass, persistentClass, conf, schema);
+    Class<? extends DataStore<K, V>> dataStoreClass = (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
+    return DataStoreFactory.createDataStore(dataStoreClass, keyClass,
+        persistentClass, conf, schema);
   }
-  
+
   /**
    * Return the Persistent Gora class used to persist Nutch Web data.
    * 
-   * @param the Nutch configuration 
+   * @param the
+   *          Nutch configuration
    * @return the Gora DataStore persistent class
    * @throws ClassNotFoundException
    */
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> Class<? extends DataStore<K, V>>
-  getDataStoreClass(Configuration conf)  throws ClassNotFoundException {
-    return (Class<? extends DataStore<K, V>>)
-      Class.forName(conf.get("storage.data.store.class",
-          "org.apache.gora.sql.store.SqlStore"));
+  public static <K, V extends Persistent> Class<? extends DataStore<K, V>> getDataStoreClass(
+      Configuration conf) throws ClassNotFoundException {
+    return (Class<? extends DataStore<K, V>>) Class.forName(conf.get(
+        "storage.data.store.class", "org.apache.gora.sql.store.SqlStore"));
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, null, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass, null,
+        true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, partitionerClass, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass,
+        partitionerClass, true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass, boolean reuseObjects)
       throws ClassNotFoundException, IOException {
@@ -154,10 +156,10 @@
 
   public static <K, V> void initReducerJob(Job job,
       Class<? extends GoraReducer<K, V, String, WebPage>> reducerClass)
-  throws ClassNotFoundException, GoraException {
+      throws ClassNotFoundException, GoraException {
     Configuration conf = job.getConfiguration();
-    DataStore<String, WebPage> store =
-      StorageUtils.createWebStore(conf, String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(conf,
+        String.class, WebPage.class);
     GoraReducer.initReducerJob(job, store, reducerClass);
     GoraOutputFormat.setOutput(job, store, true);
   }
Index: src/java/org/apache/nutch/storage/WebPage.java
===================================================================
--- src/java/org/apache/nutch/storage/WebPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/WebPage.java	(working copy)
@@ -1,57 +1,46 @@
 /*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
+
 @SuppressWarnings("all")
 /** WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time */
-public class WebPage extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
+public class WebPage extends org.apache.gora.persistency.impl.PersistentBase
+    implements org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+      .parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    BASE_URL(0, "baseUrl"),
-    STATUS(1, "status"),
-    FETCH_TIME(2, "fetchTime"),
-    PREV_FETCH_TIME(3, "prevFetchTime"),
-    FETCH_INTERVAL(4, "fetchInterval"),
-    RETRIES_SINCE_FETCH(5, "retriesSinceFetch"),
-    MODIFIED_TIME(6, "modifiedTime"),
-    PREV_MODIFIED_TIME(7, "prevModifiedTime"),
-    PROTOCOL_STATUS(8, "protocolStatus"),
-    CONTENT(9, "content"),
-    CONTENT_TYPE(10, "contentType"),
-    PREV_SIGNATURE(11, "prevSignature"),
-    SIGNATURE(12, "signature"),
-    TITLE(13, "title"),
-    TEXT(14, "text"),
-    PARSE_STATUS(15, "parseStatus"),
-    SCORE(16, "score"),
-    REPR_URL(17, "reprUrl"),
-    HEADERS(18, "headers"),
-    OUTLINKS(19, "outlinks"),
-    INLINKS(20, "inlinks"),
-    MARKERS(21, "markers"),
-    METADATA(22, "metadata"),
-    BATCH_ID(23, "batchId"),
-    ;
+    BASE_URL(0, "baseUrl"), STATUS(1, "status"), FETCH_TIME(2, "fetchTime"), PREV_FETCH_TIME(
+        3, "prevFetchTime"), FETCH_INTERVAL(4, "fetchInterval"), RETRIES_SINCE_FETCH(
+        5, "retriesSinceFetch"), MODIFIED_TIME(6, "modifiedTime"), PREV_MODIFIED_TIME(
+        7, "prevModifiedTime"), PROTOCOL_STATUS(8, "protocolStatus"), CONTENT(
+        9, "content"), CONTENT_TYPE(10, "contentType"), PREV_SIGNATURE(11,
+        "prevSignature"), SIGNATURE(12, "signature"), TITLE(13, "title"), TEXT(
+        14, "text"), PARSE_STATUS(15, "parseStatus"), SCORE(16, "score"), REPR_URL(
+        17, "reprUrl"), HEADERS(18, "headers"), OUTLINKS(19, "outlinks"), INLINKS(
+        20, "inlinks"), MARKERS(21, "markers"), METADATA(22, "metadata"), BATCH_ID(
+        23, "batchId"), ;
     /**
      * Field's index.
      */
@@ -64,59 +53,55 @@
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "baseUrl",
-  "status",
-  "fetchTime",
-  "prevFetchTime",
-  "fetchInterval",
-  "retriesSinceFetch",
-  "modifiedTime",
-  "prevModifiedTime",
-  "protocolStatus",
-  "content",
-  "contentType",
-  "prevSignature",
-  "signature",
-  "title",
-  "text",
-  "parseStatus",
-  "score",
-  "reprUrl",
-  "headers",
-  "outlinks",
-  "inlinks",
-  "markers",
-  "metadata",
-  "batchId",
-  };
+  public static final String[] _ALL_FIELDS = { "baseUrl", "status",
+      "fetchTime", "prevFetchTime", "fetchInterval", "retriesSinceFetch",
+      "modifiedTime", "prevModifiedTime", "protocolStatus", "content",
+      "contentType", "prevSignature", "signature", "title", "text",
+      "parseStatus", "score", "reprUrl", "headers", "outlinks", "inlinks",
+      "markers", "metadata", "batchId", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -125,28 +110,72 @@
 
   /** The original associated with this WebPage. */
   private java.lang.CharSequence baseUrl;
-  /** A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified */
+  /**
+   * A crawl status associated with the WebPage, can be of value
+   * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
+   * was successfully fetched, STATUS_GONE - WebPage no longer exists,
+   * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
+   * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   private int status;
   /** The system time in milliseconds for when the page was fetched. */
   private long fetchTime;
-  /** The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation */
+  /**
+   * The system time in milliseconds for when the page was last fetched if it
+   * was previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation
+   */
   private long prevFetchTime;
-  /** The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. */
+  /**
+   * The default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented.
+   */
   private int fetchInterval;
-  /** The number of retried attempts at fetching the WebPage since it was last successfully fetched. */
+  /**
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched.
+   */
   private int retriesSinceFetch;
-  /** The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage.
+   */
   private long modifiedTime;
-  /** The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage.
+   */
   private long prevModifiedTime;
   private org.apache.nutch.storage.ProtocolStatus protocolStatus;
   /** The entire raw document content e.g. raw XHTML */
   private java.nio.ByteBuffer content;
-  /** The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. */
+  /**
+   * The type of the content contained within the document itself. ContentType
+   * is an alias for MimeType. Historically, this parameter was only called
+   * MimeType, but since this is actually the value included in the HTTP
+   * Content-Type header, it can also include the character set encoding, which
+   * makes it more than just a MimeType specification. If MimeType is specified
+   * e.g. not None, that value is used. Otherwise, ContentType is used. If
+   * neither is given, the DEFAULT_CONTENT_TYPE setting is used.
+   */
   private java.lang.CharSequence contentType;
-  /** An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. */
+  /**
+   * An implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints.
+   */
   private java.nio.ByteBuffer prevSignature;
-  /** An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. */
+  /**
+   * An implementation of a WebPage's signature from which it can be identified
+   * and referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time.
+   */
   private java.nio.ByteBuffer signature;
   /** The title of the WebPage. */
   private java.lang.CharSequence title;
@@ -153,105 +182,223 @@
   /** The textual content of the WebPage devoid from native markup. */
   private java.lang.CharSequence text;
   private org.apache.nutch.storage.ParseStatus parseStatus;
-  /** A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. */
+  /**
+   * A score used to determine a WebPage's relevance within the web graph it is
+   * part of. This score may change over time based on graph characteristics.
+   */
   private float score;
-  /** In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler */
+  /**
+   * In the case where we are given two urls, a source and a destination of a
+   * redirect, we should determine and persist the representative url. The logic
+   * used to determine this is based largely on Yahoo!'s Slurp Crawler
+   */
   private java.lang.CharSequence reprUrl;
-  /** Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
+  /**
+   * Header information returned from the web server used to server the content
+   * which is subsequently fetched from. This includes keys such as
+   * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
+   * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
+   * LAST_MODIFIED and LOCATION.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> headers;
   /** Embedded hyperlinks which direct outside of the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
   /** Embedded hyperlinks which link to pages within the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-  /** Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
-  /** A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-  /** A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+  /**
+   * Markers flags which represent user and machine decisions which have
+   * affected influenced a WebPage's current state. Markers can be system
+   * specific and user machine driven in nature. They are assigned to a WebPage
+   * on a job-by-job basis and thier values indicative of what actions should be
+   * associated with a WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> markers;
+  /**
+   * A multi-valued metadata container used for storing everything from
+   * structured WebPage characterists, to ad-hoc extraction and metadata
+   * augmentation for any given WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+  /**
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in
+   * batches, called fetchlists. Pages are partitioned but can always be
+   * associated and fetched alongside pages of similar value (within a crawl
+   * cycle) based on batchId.
+   */
   private java.lang.CharSequence batchId;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return baseUrl;
-    case 1: return status;
-    case 2: return fetchTime;
-    case 3: return prevFetchTime;
-    case 4: return fetchInterval;
-    case 5: return retriesSinceFetch;
-    case 6: return modifiedTime;
-    case 7: return prevModifiedTime;
-    case 8: return protocolStatus;
-    case 9: return content;
-    case 10: return contentType;
-    case 11: return prevSignature;
-    case 12: return signature;
-    case 13: return title;
-    case 14: return text;
-    case 15: return parseStatus;
-    case 16: return score;
-    case 17: return reprUrl;
-    case 18: return headers;
-    case 19: return outlinks;
-    case 20: return inlinks;
-    case 21: return markers;
-    case 22: return metadata;
-    case 23: return batchId;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      return baseUrl;
+    case 1:
+      return status;
+    case 2:
+      return fetchTime;
+    case 3:
+      return prevFetchTime;
+    case 4:
+      return fetchInterval;
+    case 5:
+      return retriesSinceFetch;
+    case 6:
+      return modifiedTime;
+    case 7:
+      return prevModifiedTime;
+    case 8:
+      return protocolStatus;
+    case 9:
+      return content;
+    case 10:
+      return contentType;
+    case 11:
+      return prevSignature;
+    case 12:
+      return signature;
+    case 13:
+      return title;
+    case 14:
+      return text;
+    case 15:
+      return parseStatus;
+    case 16:
+      return score;
+    case 17:
+      return reprUrl;
+    case 18:
+      return headers;
+    case 19:
+      return outlinks;
+    case 20:
+      return inlinks;
+    case 21:
+      return markers;
+    case 22:
+      return metadata;
+    case 23:
+      return batchId;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: baseUrl = (java.lang.CharSequence)(value); break;
-    case 1: status = (java.lang.Integer)(value); break;
-    case 2: fetchTime = (java.lang.Long)(value); break;
-    case 3: prevFetchTime = (java.lang.Long)(value); break;
-    case 4: fetchInterval = (java.lang.Integer)(value); break;
-    case 5: retriesSinceFetch = (java.lang.Integer)(value); break;
-    case 6: modifiedTime = (java.lang.Long)(value); break;
-    case 7: prevModifiedTime = (java.lang.Long)(value); break;
-    case 8: protocolStatus = (org.apache.nutch.storage.ProtocolStatus)(value); break;
-    case 9: content = (java.nio.ByteBuffer)(value); break;
-    case 10: contentType = (java.lang.CharSequence)(value); break;
-    case 11: prevSignature = (java.nio.ByteBuffer)(value); break;
-    case 12: signature = (java.nio.ByteBuffer)(value); break;
-    case 13: title = (java.lang.CharSequence)(value); break;
-    case 14: text = (java.lang.CharSequence)(value); break;
-    case 15: parseStatus = (org.apache.nutch.storage.ParseStatus)(value); break;
-    case 16: score = (java.lang.Float)(value); break;
-    case 17: reprUrl = (java.lang.CharSequence)(value); break;
-    case 18: headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 19: outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 20: inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 21: markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 22: metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 23: batchId = (java.lang.CharSequence)(value); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      baseUrl = (java.lang.CharSequence) (value);
+      break;
+    case 1:
+      status = (java.lang.Integer) (value);
+      break;
+    case 2:
+      fetchTime = (java.lang.Long) (value);
+      break;
+    case 3:
+      prevFetchTime = (java.lang.Long) (value);
+      break;
+    case 4:
+      fetchInterval = (java.lang.Integer) (value);
+      break;
+    case 5:
+      retriesSinceFetch = (java.lang.Integer) (value);
+      break;
+    case 6:
+      modifiedTime = (java.lang.Long) (value);
+      break;
+    case 7:
+      prevModifiedTime = (java.lang.Long) (value);
+      break;
+    case 8:
+      protocolStatus = (org.apache.nutch.storage.ProtocolStatus) (value);
+      break;
+    case 9:
+      content = (java.nio.ByteBuffer) (value);
+      break;
+    case 10:
+      contentType = (java.lang.CharSequence) (value);
+      break;
+    case 11:
+      prevSignature = (java.nio.ByteBuffer) (value);
+      break;
+    case 12:
+      signature = (java.nio.ByteBuffer) (value);
+      break;
+    case 13:
+      title = (java.lang.CharSequence) (value);
+      break;
+    case 14:
+      text = (java.lang.CharSequence) (value);
+      break;
+    case 15:
+      parseStatus = (org.apache.nutch.storage.ParseStatus) (value);
+      break;
+    case 16:
+      score = (java.lang.Float) (value);
+      break;
+    case 17:
+      reprUrl = (java.lang.CharSequence) (value);
+      break;
+    case 18:
+      headers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 19:
+      outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 20:
+      inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 21:
+      markers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 22:
+      metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 23:
+      batchId = (java.lang.CharSequence) (value);
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   */
+   * Gets the value of the 'baseUrl' field. The original associated with this
+   * WebPage.
+   */
   public java.lang.CharSequence getBaseUrl() {
     return baseUrl;
   }
 
   /**
-   * Sets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   * @param value the value to set.
+   * Sets the value of the 'baseUrl' field. The original associated with this
+   * WebPage. * @param value the value to set.
    */
   public void setBaseUrl(java.lang.CharSequence value) {
     this.baseUrl = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The original associated with this WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * original associated with this WebPage. * @param value the value to set.
    */
   public boolean isBaseUrlDirty(java.lang.CharSequence value) {
     return isDirty(0);
@@ -258,24 +405,43 @@
   }
 
   /**
-   * Gets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   */
+   * Gets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   public java.lang.Integer getStatus() {
     return status;
   }
 
   /**
-   * Sets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
+   * Sets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   * * @param value the value to set.
    */
   public void setStatus(java.lang.Integer value) {
     this.status = value;
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
+   * Checks the dirty status of the 'status' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A crawl
+   * status associated with the WebPage, can be of value STATUS_UNFETCHED -
+   * WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully
+   * fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP -
+   * WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage
+   * permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful,
+   * needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching
+   * successful - page is not modified * @param value the value to set.
    */
   public boolean isStatusDirty(java.lang.Integer value) {
     return isDirty(1);
@@ -282,24 +448,27 @@
   }
 
   /**
-   * Gets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   */
+   * Gets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched.
+   */
   public java.lang.Long getFetchTime() {
     return fetchTime;
   }
 
   /**
-   * Sets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
+   * Sets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched. * @param value the value to set.
    */
   public void setFetchTime(java.lang.Long value) {
     this.fetchTime = value;
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
+   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was fetched. * @param value
+   * the value to set.
    */
   public boolean isFetchTimeDirty(java.lang.Long value) {
     return isDirty(2);
@@ -306,24 +475,32 @@
   }
 
   /**
-   * Gets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   */
+   * Gets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation
+   */
   public java.lang.Long getPrevFetchTime() {
     return prevFetchTime;
   }
 
   /**
-   * Sets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
+   * Sets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation * @param value the value to set.
    */
   public void setPrevFetchTime(java.lang.Long value) {
     this.prevFetchTime = value;
     setDirty(3);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
+   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was last fetched if it was
+   * previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation * @param value the value to set.
    */
   public boolean isPrevFetchTimeDirty(java.lang.Long value) {
     return isDirty(3);
@@ -330,24 +507,30 @@
   }
 
   /**
-   * Gets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   */
+   * Gets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless a
+   * custom fetch schedle is implemented.
+   */
   public java.lang.Integer getFetchInterval() {
     return fetchInterval;
   }
 
   /**
-   * Sets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
+   * Sets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless a
+   * custom fetch schedle is implemented. * @param value the value to set.
    */
   public void setFetchInterval(java.lang.Integer value) {
     this.fetchInterval = value;
     setDirty(4);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
+   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented. * @param
+   * value the value to set.
    */
   public boolean isFetchIntervalDirty(java.lang.Integer value) {
     return isDirty(4);
@@ -354,24 +537,28 @@
   }
 
   /**
-   * Gets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   */
+   * Gets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched.
+   */
   public java.lang.Integer getRetriesSinceFetch() {
     return retriesSinceFetch;
   }
 
   /**
-   * Sets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
+   * Sets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched. * @param
+   * value the value to set.
    */
   public void setRetriesSinceFetch(java.lang.Integer value) {
     this.retriesSinceFetch = value;
     setDirty(5);
   }
-  
+
   /**
-   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
+   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched. * @param value the value to set.
    */
   public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
     return isDirty(5);
@@ -378,24 +565,34 @@
   }
 
   /**
-   * Gets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   */
+   * Gets the value of the 'modifiedTime' field. The system time in milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage.
+   */
   public java.lang.Long getModifiedTime() {
     return modifiedTime;
   }
 
   /**
-   * Sets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
+   * Sets the value of the 'modifiedTime' field. The system time in milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage. * @param value the value
+   * to set.
    */
   public void setModifiedTime(java.lang.Long value) {
     this.modifiedTime = value;
     setDirty(6);
   }
-  
+
   /**
-   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage. * @param value the value to set.
    */
   public boolean isModifiedTimeDirty(java.lang.Long value) {
     return isDirty(6);
@@ -402,24 +599,35 @@
   }
 
   /**
-   * Gets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   */
+   * Gets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage.
+   */
   public java.lang.Long getPrevModifiedTime() {
     return prevModifiedTime;
   }
 
   /**
-   * Sets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
+   * Sets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage. * @param value the value to set.
    */
   public void setPrevModifiedTime(java.lang.Long value) {
     this.prevModifiedTime = value;
     setDirty(7);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage. * @param value the value to set.
    */
   public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
     return isDirty(7);
@@ -434,40 +642,48 @@
 
   /**
    * Sets the value of the 'protocolStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
     this.protocolStatus = value;
     setDirty(8);
   }
-  
+
   /**
-   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if
+   * it represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
-  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
+  public boolean isProtocolStatusDirty(
+      org.apache.nutch.storage.ProtocolStatus value) {
     return isDirty(8);
   }
 
   /**
-   * Gets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   */
+   * Gets the value of the 'content' field. The entire raw document content e.g.
+   * raw XHTML
+   */
   public java.nio.ByteBuffer getContent() {
     return content;
   }
 
   /**
-   * Sets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
+   * Sets the value of the 'content' field. The entire raw document content e.g.
+   * raw XHTML * @param value the value to set.
    */
   public void setContent(java.nio.ByteBuffer value) {
     this.content = value;
     setDirty(9);
   }
-  
+
   /**
-   * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
+   * Checks the dirty status of the 'content' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * entire raw document content e.g. raw XHTML * @param value the value to set.
    */
   public boolean isContentDirty(java.nio.ByteBuffer value) {
     return isDirty(9);
@@ -474,24 +690,44 @@
   }
 
   /**
-   * Gets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   */
+   * Gets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used.
+   */
   public java.lang.CharSequence getContentType() {
     return contentType;
   }
 
   /**
-   * Sets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
+   * Sets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public void setContentType(java.lang.CharSequence value) {
     this.contentType = value;
     setDirty(10);
   }
-  
+
   /**
-   * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
+   * Checks the dirty status of the 'contentType' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The type
+   * of the content contained within the document itself. ContentType is an
+   * alias for MimeType. Historically, this parameter was only called MimeType,
+   * but since this is actually the value included in the HTTP Content-Type
+   * header, it can also include the character set encoding, which makes it more
+   * than just a MimeType specification. If MimeType is specified e.g. not None,
+   * that value is used. Otherwise, ContentType is used. If neither is given,
+   * the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public boolean isContentTypeDirty(java.lang.CharSequence value) {
     return isDirty(10);
@@ -498,24 +734,33 @@
   }
 
   /**
-   * Gets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   */
+   * Gets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints.
+   */
   public java.nio.ByteBuffer getPrevSignature() {
     return prevSignature;
   }
 
   /**
-   * Sets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
+   * Sets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints. * @param value the value to set.
    */
   public void setPrevSignature(java.nio.ByteBuffer value) {
     this.prevSignature = value;
     setDirty(11);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
+   * Checks the dirty status of the 'prevSignature' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints. * @param value
+   * the value to set.
    */
   public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(11);
@@ -522,24 +767,33 @@
   }
 
   /**
-   * Gets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   */
+   * Gets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time.
+   */
   public java.nio.ByteBuffer getSignature() {
     return signature;
   }
 
   /**
-   * Sets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
+   * Sets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time. * @param value the value to set.
    */
   public void setSignature(java.nio.ByteBuffer value) {
     this.signature = value;
     setDirty(12);
   }
-  
+
   /**
-   * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
+   * Checks the dirty status of the 'signature' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's signature from which it can be identified and
+   * referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time. * @param value the
+   * value to set.
    */
   public boolean isSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(12);
@@ -546,24 +800,25 @@
   }
 
   /**
-   * Gets the value of the 'title' field.
-   * The title of the WebPage.   */
+   * Gets the value of the 'title' field. The title of the WebPage.
+   */
   public java.lang.CharSequence getTitle() {
     return title;
   }
 
   /**
-   * Sets the value of the 'title' field.
-   * The title of the WebPage.   * @param value the value to set.
+   * Sets the value of the 'title' field. The title of the WebPage. * @param
+   * value the value to set.
    */
   public void setTitle(java.lang.CharSequence value) {
     this.title = value;
     setDirty(13);
   }
-  
+
   /**
-   * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The title of the WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'title' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * title of the WebPage. * @param value the value to set.
    */
   public boolean isTitleDirty(java.lang.CharSequence value) {
     return isDirty(13);
@@ -570,24 +825,27 @@
   }
 
   /**
-   * Gets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   */
+   * Gets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup.
+   */
   public java.lang.CharSequence getText() {
     return text;
   }
 
   /**
-   * Sets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
+   * Sets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup. * @param value the value to set.
    */
   public void setText(java.lang.CharSequence value) {
     this.text = value;
     setDirty(14);
   }
-  
+
   /**
-   * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
+   * Checks the dirty status of the 'text' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * textual content of the WebPage devoid from native markup. * @param value
+   * the value to set.
    */
   public boolean isTextDirty(java.lang.CharSequence value) {
     return isDirty(14);
@@ -602,16 +860,21 @@
 
   /**
    * Sets the value of the 'parseStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
     this.parseStatus = value;
     setDirty(15);
   }
-  
+
   /**
-   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
   public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
     return isDirty(15);
@@ -618,24 +881,30 @@
   }
 
   /**
-   * Gets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   */
+   * Gets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics.
+   */
   public java.lang.Float getScore() {
     return score;
   }
 
   /**
-   * Sets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
+   * Sets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics. * @param value the value to set.
    */
   public void setScore(java.lang.Float value) {
     this.score = value;
     setDirty(16);
   }
-  
+
   /**
-   * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
+   * Checks the dirty status of the 'score' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A score
+   * used to determine a WebPage's relevance within the web graph it is part of.
+   * This score may change over time based on graph characteristics. * @param
+   * value the value to set.
    */
   public boolean isScoreDirty(java.lang.Float value) {
     return isDirty(16);
@@ -642,24 +911,33 @@
   }
 
   /**
-   * Gets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   */
+   * Gets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler
+   */
   public java.lang.CharSequence getReprUrl() {
     return reprUrl;
   }
 
   /**
-   * Sets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
+   * Sets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
    */
   public void setReprUrl(java.lang.CharSequence value) {
     this.reprUrl = value;
     setDirty(17);
   }
-  
+
   /**
-   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
+   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. In the
+   * case where we are given two urls, a source and a destination of a redirect,
+   * we should determine and persist the representative url. The logic used to
+   * determine this is based largely on Yahoo!'s Slurp Crawler * @param value
+   * the value to set.
    */
   public boolean isReprUrlDirty(java.lang.CharSequence value) {
     return isDirty(17);
@@ -666,144 +944,208 @@
   }
 
   /**
-   * Gets the value of the 'headers' field.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
+   * Gets the value of the 'headers' field. Header information returned from the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
     return headers;
   }
 
   /**
-   * Sets the value of the 'headers' field.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
+   * Sets the value of the 'headers' field. Header information returned from the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
+   * value to set.
    */
-  public void setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setHeaders(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(18);
   }
-  
+
   /**
-   * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
+   * Checks the dirty status of the 'headers' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Header
+   * information returned from the web server used to server the content which
+   * is subsequently fetched from. This includes keys such as TRANSFER_ENCODING,
+   * CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION,
+   * CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+   * * @param value the value to set.
    */
-  public boolean isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isHeadersDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(18);
   }
 
   /**
-   * Gets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+   * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
     return outlinks;
   }
 
   /**
-   * Sets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
+   * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain. * @param value the value to set.
    */
-  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setOutlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(19);
   }
-  
+
   /**
-   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
+   * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Embedded
+   * hyperlinks which direct outside of the current domain. * @param value the
+   * value to set.
    */
-  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isOutlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(19);
   }
 
   /**
-   * Gets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+   * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
     return inlinks;
   }
 
   /**
-   * Sets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
+   * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain. * @param value the value to set.
    */
-  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setInlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(20);
   }
-  
+
   /**
-   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
+   * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Embedded
+   * hyperlinks which link to pages within the current domain. * @param value
+   * the value to set.
    */
-  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isInlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(20);
   }
 
   /**
-   * Gets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
+   * Gets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
     return markers;
   }
 
   /**
-   * Sets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
+   * Sets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage. * @param
+   * value the value to set.
    */
-  public void setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMarkers(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(21);
   }
-  
+
   /**
-   * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'markers' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Markers
+   * flags which represent user and machine decisions which have affected
+   * influenced a WebPage's current state. Markers can be system specific and
+   * user machine driven in nature. They are assigned to a WebPage on a
+   * job-by-job basis and thier values indicative of what actions should be
+   * associated with a WebPage. * @param value the value to set.
    */
-  public boolean isMarkersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+  public boolean isMarkersDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(21);
   }
 
   /**
-   * Gets the value of the 'metadata' field.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   */
-  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+   * Gets the value of the 'metadata' field. A multi-valued metadata container
+   * used for storing everything from structured WebPage characterists, to
+   * ad-hoc extraction and metadata augmentation for any given WebPage.
+   */
+  public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
     return metadata;
   }
 
   /**
-   * Sets the value of the 'metadata' field.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
+   * Sets the value of the 'metadata' field. A multi-valued metadata container
+   * used for storing everything from structured WebPage characterists, to
+   * ad-hoc extraction and metadata augmentation for any given WebPage. * @param
+   * value the value to set.
    */
-  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setMetadata(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+    this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(22);
   }
-  
+
   /**
-   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'metadata' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * multi-valued metadata container used for storing everything from structured
+   * WebPage characterists, to ad-hoc extraction and metadata augmentation for
+   * any given WebPage. * @param value the value to set.
    */
-  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+  public boolean isMetadataDirty(
+      java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
     return isDirty(22);
   }
 
   /**
-   * Gets the value of the 'batchId' field.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   */
+   * Gets the value of the 'batchId' field. A batchId that this WebPage is
+   * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
+   * partitioned but can always be associated and fetched alongside pages of
+   * similar value (within a crawl cycle) based on batchId.
+   */
   public java.lang.CharSequence getBatchId() {
     return batchId;
   }
 
   /**
-   * Sets the value of the 'batchId' field.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
+   * Sets the value of the 'batchId' field. A batchId that this WebPage is
+   * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
+   * partitioned but can always be associated and fetched alongside pages of
+   * similar value (within a crawl cycle) based on batchId. * @param value the
+   * value to set.
    */
   public void setBatchId(java.lang.CharSequence value) {
     this.batchId = value;
     setDirty(23);
   }
-  
+
   /**
-   * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
+   * Checks the dirty status of the 'batchId' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A
+   * batchId that this WebPage is assigned to. WebPage's are fetched in batches,
+   * called fetchlists. Pages are partitioned but can always be associated and
+   * fetched alongside pages of similar value (within a crawl cycle) based on
+   * batchId. * @param value the value to set.
    */
   public boolean isBatchIdDirty(java.lang.CharSequence value) {
     return isDirty(23);
@@ -813,17 +1155,21 @@
   public static org.apache.nutch.storage.WebPage.Builder newBuilder() {
     return new org.apache.nutch.storage.WebPage.Builder();
   }
-  
+
   /** Creates a new WebPage RecordBuilder by copying an existing Builder */
-  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage.Builder other) {
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(
+      org.apache.nutch.storage.WebPage.Builder other) {
     return new org.apache.nutch.storage.WebPage.Builder(other);
   }
-  
-  /** Creates a new WebPage RecordBuilder by copying an existing WebPage instance */
-  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage other) {
+
+  /**
+   * Creates a new WebPage RecordBuilder by copying an existing WebPage instance
+   */
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(
+      org.apache.nutch.storage.WebPage other) {
     return new org.apache.nutch.storage.WebPage.Builder(other);
   }
-  
+
   private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -846,12 +1192,13 @@
     copy.limit(limit);
     return copy.asReadOnlyBuffer();
   }
-  
+
   /**
    * RecordBuilder for WebPage instances.
    */
-  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<WebPage>
-    implements org.apache.avro.data.RecordBuilder<WebPage> {
+  public static class Builder extends
+      org.apache.avro.specific.SpecificRecordBuilderBase<WebPage> implements
+      org.apache.avro.data.RecordBuilder<WebPage> {
 
     private java.lang.CharSequence baseUrl;
     private int status;
@@ -871,11 +1218,11 @@
     private org.apache.nutch.storage.ParseStatus parseStatus;
     private float score;
     private java.lang.CharSequence reprUrl;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
-    private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> headers;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> outlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> inlinks;
+    private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> markers;
+    private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
     private java.lang.CharSequence batchId;
 
     /** Creates a new Builder */
@@ -882,109 +1229,133 @@
     private Builder() {
       super(org.apache.nutch.storage.WebPage.SCHEMA$);
     }
-    
+
     /** Creates a Builder by copying an existing Builder */
     private Builder(org.apache.nutch.storage.WebPage.Builder other) {
       super(other);
     }
-    
+
     /** Creates a Builder by copying an existing WebPage instance */
     private Builder(org.apache.nutch.storage.WebPage other) {
-            super(org.apache.nutch.storage.WebPage.SCHEMA$);
+      super(org.apache.nutch.storage.WebPage.SCHEMA$);
       if (isValidValue(fields()[0], other.baseUrl)) {
-        this.baseUrl = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.baseUrl);
+        this.baseUrl = (java.lang.CharSequence) data().deepCopy(
+            fields()[0].schema(), other.baseUrl);
         fieldSetFlags()[0] = true;
       }
       if (isValidValue(fields()[1], other.status)) {
-        this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.status);
+        this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(),
+            other.status);
         fieldSetFlags()[1] = true;
       }
       if (isValidValue(fields()[2], other.fetchTime)) {
-        this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.fetchTime);
+        this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(),
+            other.fetchTime);
         fieldSetFlags()[2] = true;
       }
       if (isValidValue(fields()[3], other.prevFetchTime)) {
-        this.prevFetchTime = (java.lang.Long) data().deepCopy(fields()[3].schema(), other.prevFetchTime);
+        this.prevFetchTime = (java.lang.Long) data().deepCopy(
+            fields()[3].schema(), other.prevFetchTime);
         fieldSetFlags()[3] = true;
       }
       if (isValidValue(fields()[4], other.fetchInterval)) {
-        this.fetchInterval = (java.lang.Integer) data().deepCopy(fields()[4].schema(), other.fetchInterval);
+        this.fetchInterval = (java.lang.Integer) data().deepCopy(
+            fields()[4].schema(), other.fetchInterval);
         fieldSetFlags()[4] = true;
       }
       if (isValidValue(fields()[5], other.retriesSinceFetch)) {
-        this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(fields()[5].schema(), other.retriesSinceFetch);
+        this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(
+            fields()[5].schema(), other.retriesSinceFetch);
         fieldSetFlags()[5] = true;
       }
       if (isValidValue(fields()[6], other.modifiedTime)) {
-        this.modifiedTime = (java.lang.Long) data().deepCopy(fields()[6].schema(), other.modifiedTime);
+        this.modifiedTime = (java.lang.Long) data().deepCopy(
+            fields()[6].schema(), other.modifiedTime);
         fieldSetFlags()[6] = true;
       }
       if (isValidValue(fields()[7], other.prevModifiedTime)) {
-        this.prevModifiedTime = (java.lang.Long) data().deepCopy(fields()[7].schema(), other.prevModifiedTime);
+        this.prevModifiedTime = (java.lang.Long) data().deepCopy(
+            fields()[7].schema(), other.prevModifiedTime);
         fieldSetFlags()[7] = true;
       }
       if (isValidValue(fields()[8], other.protocolStatus)) {
-        this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data().deepCopy(fields()[8].schema(), other.protocolStatus);
+        this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data()
+            .deepCopy(fields()[8].schema(), other.protocolStatus);
         fieldSetFlags()[8] = true;
       }
       if (isValidValue(fields()[9], other.content)) {
-        this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[9].schema(), other.content);
+        this.content = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[9].schema(), other.content);
         fieldSetFlags()[9] = true;
       }
       if (isValidValue(fields()[10], other.contentType)) {
-        this.contentType = (java.lang.CharSequence) data().deepCopy(fields()[10].schema(), other.contentType);
+        this.contentType = (java.lang.CharSequence) data().deepCopy(
+            fields()[10].schema(), other.contentType);
         fieldSetFlags()[10] = true;
       }
       if (isValidValue(fields()[11], other.prevSignature)) {
-        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[11].schema(), other.prevSignature);
+        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[11].schema(), other.prevSignature);
         fieldSetFlags()[11] = true;
       }
       if (isValidValue(fields()[12], other.signature)) {
-        this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.signature);
+        this.signature = (java.nio.ByteBuffer) data().deepCopy(
+            fields()[12].schema(), other.signature);
         fieldSetFlags()[12] = true;
       }
       if (isValidValue(fields()[13], other.title)) {
-        this.title = (java.lang.CharSequence) data().deepCopy(fields()[13].schema(), other.title);
+        this.title = (java.lang.CharSequence) data().deepCopy(
+            fields()[13].schema(), other.title);
         fieldSetFlags()[13] = true;
       }
       if (isValidValue(fields()[14], other.text)) {
-        this.text = (java.lang.CharSequence) data().deepCopy(fields()[14].schema(), other.text);
+        this.text = (java.lang.CharSequence) data().deepCopy(
+            fields()[14].schema(), other.text);
         fieldSetFlags()[14] = true;
       }
       if (isValidValue(fields()[15], other.parseStatus)) {
-        this.parseStatus = (org.apache.nutch.storage.ParseStatus) data().deepCopy(fields()[15].schema(), other.parseStatus);
+        this.parseStatus = (org.apache.nutch.storage.ParseStatus) data()
+            .deepCopy(fields()[15].schema(), other.parseStatus);
         fieldSetFlags()[15] = true;
       }
       if (isValidValue(fields()[16], other.score)) {
-        this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(), other.score);
+        this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(),
+            other.score);
         fieldSetFlags()[16] = true;
       }
       if (isValidValue(fields()[17], other.reprUrl)) {
-        this.reprUrl = (java.lang.CharSequence) data().deepCopy(fields()[17].schema(), other.reprUrl);
+        this.reprUrl = (java.lang.CharSequence) data().deepCopy(
+            fields()[17].schema(), other.reprUrl);
         fieldSetFlags()[17] = true;
       }
       if (isValidValue(fields()[18], other.headers)) {
-        this.headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[18].schema(), other.headers);
+        this.headers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[18].schema(), other.headers);
         fieldSetFlags()[18] = true;
       }
       if (isValidValue(fields()[19], other.outlinks)) {
-        this.outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[19].schema(), other.outlinks);
+        this.outlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[19].schema(), other.outlinks);
         fieldSetFlags()[19] = true;
       }
       if (isValidValue(fields()[20], other.inlinks)) {
-        this.inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[20].schema(), other.inlinks);
+        this.inlinks = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[20].schema(), other.inlinks);
         fieldSetFlags()[20] = true;
       }
       if (isValidValue(fields()[21], other.markers)) {
-        this.markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[21].schema(), other.markers);
+        this.markers = (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) data()
+            .deepCopy(fields()[21].schema(), other.markers);
         fieldSetFlags()[21] = true;
       }
       if (isValidValue(fields()[22], other.metadata)) {
-        this.metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[22].schema(), other.metadata);
+        this.metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) data()
+            .deepCopy(fields()[22].schema(), other.metadata);
         fieldSetFlags()[22] = true;
       }
       if (isValidValue(fields()[23], other.batchId)) {
-        this.batchId = (java.lang.CharSequence) data().deepCopy(fields()[23].schema(), other.batchId);
+        this.batchId = (java.lang.CharSequence) data().deepCopy(
+            fields()[23].schema(), other.batchId);
         fieldSetFlags()[23] = true;
       }
     }
@@ -993,20 +1364,21 @@
     public java.lang.CharSequence getBaseUrl() {
       return baseUrl;
     }
-    
+
     /** Sets the value of the 'baseUrl' field */
-    public org.apache.nutch.storage.WebPage.Builder setBaseUrl(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setBaseUrl(
+        java.lang.CharSequence value) {
       validate(fields()[0], value);
       this.baseUrl = value;
       fieldSetFlags()[0] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'baseUrl' field has been set */
     public boolean hasBaseUrl() {
       return fieldSetFlags()[0];
     }
-    
+
     /** Clears the value of the 'baseUrl' field */
     public org.apache.nutch.storage.WebPage.Builder clearBaseUrl() {
       baseUrl = null;
@@ -1013,193 +1385,196 @@
       fieldSetFlags()[0] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'status' field */
     public java.lang.Integer getStatus() {
       return status;
     }
-    
+
     /** Sets the value of the 'status' field */
     public org.apache.nutch.storage.WebPage.Builder setStatus(int value) {
       validate(fields()[1], value);
       this.status = value;
       fieldSetFlags()[1] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'status' field has been set */
     public boolean hasStatus() {
       return fieldSetFlags()[1];
     }
-    
+
     /** Clears the value of the 'status' field */
     public org.apache.nutch.storage.WebPage.Builder clearStatus() {
       fieldSetFlags()[1] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'fetchTime' field */
     public java.lang.Long getFetchTime() {
       return fetchTime;
     }
-    
+
     /** Sets the value of the 'fetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder setFetchTime(long value) {
       validate(fields()[2], value);
       this.fetchTime = value;
       fieldSetFlags()[2] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'fetchTime' field has been set */
     public boolean hasFetchTime() {
       return fieldSetFlags()[2];
     }
-    
+
     /** Clears the value of the 'fetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearFetchTime() {
       fieldSetFlags()[2] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevFetchTime' field */
     public java.lang.Long getPrevFetchTime() {
       return prevFetchTime;
     }
-    
+
     /** Sets the value of the 'prevFetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder setPrevFetchTime(long value) {
       validate(fields()[3], value);
       this.prevFetchTime = value;
       fieldSetFlags()[3] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevFetchTime' field has been set */
     public boolean hasPrevFetchTime() {
       return fieldSetFlags()[3];
     }
-    
+
     /** Clears the value of the 'prevFetchTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevFetchTime() {
       fieldSetFlags()[3] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'fetchInterval' field */
     public java.lang.Integer getFetchInterval() {
       return fetchInterval;
     }
-    
+
     /** Sets the value of the 'fetchInterval' field */
     public org.apache.nutch.storage.WebPage.Builder setFetchInterval(int value) {
       validate(fields()[4], value);
       this.fetchInterval = value;
       fieldSetFlags()[4] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'fetchInterval' field has been set */
     public boolean hasFetchInterval() {
       return fieldSetFlags()[4];
     }
-    
+
     /** Clears the value of the 'fetchInterval' field */
     public org.apache.nutch.storage.WebPage.Builder clearFetchInterval() {
       fieldSetFlags()[4] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'retriesSinceFetch' field */
     public java.lang.Integer getRetriesSinceFetch() {
       return retriesSinceFetch;
     }
-    
+
     /** Sets the value of the 'retriesSinceFetch' field */
-    public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(int value) {
+    public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(
+        int value) {
       validate(fields()[5], value);
       this.retriesSinceFetch = value;
       fieldSetFlags()[5] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'retriesSinceFetch' field has been set */
     public boolean hasRetriesSinceFetch() {
       return fieldSetFlags()[5];
     }
-    
+
     /** Clears the value of the 'retriesSinceFetch' field */
     public org.apache.nutch.storage.WebPage.Builder clearRetriesSinceFetch() {
       fieldSetFlags()[5] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'modifiedTime' field */
     public java.lang.Long getModifiedTime() {
       return modifiedTime;
     }
-    
+
     /** Sets the value of the 'modifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder setModifiedTime(long value) {
       validate(fields()[6], value);
       this.modifiedTime = value;
       fieldSetFlags()[6] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'modifiedTime' field has been set */
     public boolean hasModifiedTime() {
       return fieldSetFlags()[6];
     }
-    
+
     /** Clears the value of the 'modifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearModifiedTime() {
       fieldSetFlags()[6] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevModifiedTime' field */
     public java.lang.Long getPrevModifiedTime() {
       return prevModifiedTime;
     }
-    
+
     /** Sets the value of the 'prevModifiedTime' field */
-    public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(long value) {
+    public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(
+        long value) {
       validate(fields()[7], value);
       this.prevModifiedTime = value;
       fieldSetFlags()[7] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevModifiedTime' field has been set */
     public boolean hasPrevModifiedTime() {
       return fieldSetFlags()[7];
     }
-    
+
     /** Clears the value of the 'prevModifiedTime' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevModifiedTime() {
       fieldSetFlags()[7] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'protocolStatus' field */
     public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
       return protocolStatus;
     }
-    
+
     /** Sets the value of the 'protocolStatus' field */
-    public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
+    public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(
+        org.apache.nutch.storage.ProtocolStatus value) {
       validate(fields()[8], value);
       this.protocolStatus = value;
       fieldSetFlags()[8] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'protocolStatus' field has been set */
     public boolean hasProtocolStatus() {
       return fieldSetFlags()[8];
     }
-    
+
     /** Clears the value of the 'protocolStatus' field */
     public org.apache.nutch.storage.WebPage.Builder clearProtocolStatus() {
       protocolStatus = null;
@@ -1206,25 +1581,26 @@
       fieldSetFlags()[8] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'content' field */
     public java.nio.ByteBuffer getContent() {
       return content;
     }
-    
+
     /** Sets the value of the 'content' field */
-    public org.apache.nutch.storage.WebPage.Builder setContent(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setContent(
+        java.nio.ByteBuffer value) {
       validate(fields()[9], value);
       this.content = value;
       fieldSetFlags()[9] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'content' field has been set */
     public boolean hasContent() {
       return fieldSetFlags()[9];
     }
-    
+
     /** Clears the value of the 'content' field */
     public org.apache.nutch.storage.WebPage.Builder clearContent() {
       content = null;
@@ -1231,25 +1607,26 @@
       fieldSetFlags()[9] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'contentType' field */
     public java.lang.CharSequence getContentType() {
       return contentType;
     }
-    
+
     /** Sets the value of the 'contentType' field */
-    public org.apache.nutch.storage.WebPage.Builder setContentType(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setContentType(
+        java.lang.CharSequence value) {
       validate(fields()[10], value);
       this.contentType = value;
       fieldSetFlags()[10] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'contentType' field has been set */
     public boolean hasContentType() {
       return fieldSetFlags()[10];
     }
-    
+
     /** Clears the value of the 'contentType' field */
     public org.apache.nutch.storage.WebPage.Builder clearContentType() {
       contentType = null;
@@ -1256,25 +1633,26 @@
       fieldSetFlags()[10] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'prevSignature' field */
     public java.nio.ByteBuffer getPrevSignature() {
       return prevSignature;
     }
-    
+
     /** Sets the value of the 'prevSignature' field */
-    public org.apache.nutch.storage.WebPage.Builder setPrevSignature(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setPrevSignature(
+        java.nio.ByteBuffer value) {
       validate(fields()[11], value);
       this.prevSignature = value;
       fieldSetFlags()[11] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'prevSignature' field has been set */
     public boolean hasPrevSignature() {
       return fieldSetFlags()[11];
     }
-    
+
     /** Clears the value of the 'prevSignature' field */
     public org.apache.nutch.storage.WebPage.Builder clearPrevSignature() {
       prevSignature = null;
@@ -1281,25 +1659,26 @@
       fieldSetFlags()[11] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'signature' field */
     public java.nio.ByteBuffer getSignature() {
       return signature;
     }
-    
+
     /** Sets the value of the 'signature' field */
-    public org.apache.nutch.storage.WebPage.Builder setSignature(java.nio.ByteBuffer value) {
+    public org.apache.nutch.storage.WebPage.Builder setSignature(
+        java.nio.ByteBuffer value) {
       validate(fields()[12], value);
       this.signature = value;
       fieldSetFlags()[12] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'signature' field has been set */
     public boolean hasSignature() {
       return fieldSetFlags()[12];
     }
-    
+
     /** Clears the value of the 'signature' field */
     public org.apache.nutch.storage.WebPage.Builder clearSignature() {
       signature = null;
@@ -1306,25 +1685,26 @@
       fieldSetFlags()[12] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'title' field */
     public java.lang.CharSequence getTitle() {
       return title;
     }
-    
+
     /** Sets the value of the 'title' field */
-    public org.apache.nutch.storage.WebPage.Builder setTitle(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setTitle(
+        java.lang.CharSequence value) {
       validate(fields()[13], value);
       this.title = value;
       fieldSetFlags()[13] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'title' field has been set */
     public boolean hasTitle() {
       return fieldSetFlags()[13];
     }
-    
+
     /** Clears the value of the 'title' field */
     public org.apache.nutch.storage.WebPage.Builder clearTitle() {
       title = null;
@@ -1331,25 +1711,26 @@
       fieldSetFlags()[13] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'text' field */
     public java.lang.CharSequence getText() {
       return text;
     }
-    
+
     /** Sets the value of the 'text' field */
-    public org.apache.nutch.storage.WebPage.Builder setText(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setText(
+        java.lang.CharSequence value) {
       validate(fields()[14], value);
       this.text = value;
       fieldSetFlags()[14] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'text' field has been set */
     public boolean hasText() {
       return fieldSetFlags()[14];
     }
-    
+
     /** Clears the value of the 'text' field */
     public org.apache.nutch.storage.WebPage.Builder clearText() {
       text = null;
@@ -1356,25 +1737,26 @@
       fieldSetFlags()[14] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'parseStatus' field */
     public org.apache.nutch.storage.ParseStatus getParseStatus() {
       return parseStatus;
     }
-    
+
     /** Sets the value of the 'parseStatus' field */
-    public org.apache.nutch.storage.WebPage.Builder setParseStatus(org.apache.nutch.storage.ParseStatus value) {
+    public org.apache.nutch.storage.WebPage.Builder setParseStatus(
+        org.apache.nutch.storage.ParseStatus value) {
       validate(fields()[15], value);
       this.parseStatus = value;
       fieldSetFlags()[15] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'parseStatus' field has been set */
     public boolean hasParseStatus() {
       return fieldSetFlags()[15];
     }
-    
+
     /** Clears the value of the 'parseStatus' field */
     public org.apache.nutch.storage.WebPage.Builder clearParseStatus() {
       parseStatus = null;
@@ -1381,49 +1763,50 @@
       fieldSetFlags()[15] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'score' field */
     public java.lang.Float getScore() {
       return score;
     }
-    
+
     /** Sets the value of the 'score' field */
     public org.apache.nutch.storage.WebPage.Builder setScore(float value) {
       validate(fields()[16], value);
       this.score = value;
       fieldSetFlags()[16] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'score' field has been set */
     public boolean hasScore() {
       return fieldSetFlags()[16];
     }
-    
+
     /** Clears the value of the 'score' field */
     public org.apache.nutch.storage.WebPage.Builder clearScore() {
       fieldSetFlags()[16] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'reprUrl' field */
     public java.lang.CharSequence getReprUrl() {
       return reprUrl;
     }
-    
+
     /** Sets the value of the 'reprUrl' field */
-    public org.apache.nutch.storage.WebPage.Builder setReprUrl(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setReprUrl(
+        java.lang.CharSequence value) {
       validate(fields()[17], value);
       this.reprUrl = value;
       fieldSetFlags()[17] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'reprUrl' field has been set */
     public boolean hasReprUrl() {
       return fieldSetFlags()[17];
     }
-    
+
     /** Clears the value of the 'reprUrl' field */
     public org.apache.nutch.storage.WebPage.Builder clearReprUrl() {
       reprUrl = null;
@@ -1430,25 +1813,26 @@
       fieldSetFlags()[17] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'headers' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
       return headers;
     }
-    
+
     /** Sets the value of the 'headers' field */
-    public org.apache.nutch.storage.WebPage.Builder setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setHeaders(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[18], value);
       this.headers = value;
       fieldSetFlags()[18] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'headers' field has been set */
     public boolean hasHeaders() {
       return fieldSetFlags()[18];
     }
-    
+
     /** Clears the value of the 'headers' field */
     public org.apache.nutch.storage.WebPage.Builder clearHeaders() {
       headers = null;
@@ -1455,25 +1839,26 @@
       fieldSetFlags()[18] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'outlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
       return outlinks;
     }
-    
+
     /** Sets the value of the 'outlinks' field */
-    public org.apache.nutch.storage.WebPage.Builder setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[19], value);
       this.outlinks = value;
       fieldSetFlags()[19] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'outlinks' field has been set */
     public boolean hasOutlinks() {
       return fieldSetFlags()[19];
     }
-    
+
     /** Clears the value of the 'outlinks' field */
     public org.apache.nutch.storage.WebPage.Builder clearOutlinks() {
       outlinks = null;
@@ -1480,25 +1865,26 @@
       fieldSetFlags()[19] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'inlinks' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
       return inlinks;
     }
-    
+
     /** Sets the value of the 'inlinks' field */
-    public org.apache.nutch.storage.WebPage.Builder setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[20], value);
       this.inlinks = value;
       fieldSetFlags()[20] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'inlinks' field has been set */
     public boolean hasInlinks() {
       return fieldSetFlags()[20];
     }
-    
+
     /** Clears the value of the 'inlinks' field */
     public org.apache.nutch.storage.WebPage.Builder clearInlinks() {
       inlinks = null;
@@ -1505,25 +1891,26 @@
       fieldSetFlags()[20] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'markers' field */
-    public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
       return markers;
     }
-    
+
     /** Sets the value of the 'markers' field */
-    public org.apache.nutch.storage.WebPage.Builder setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    public org.apache.nutch.storage.WebPage.Builder setMarkers(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
       validate(fields()[21], value);
       this.markers = value;
       fieldSetFlags()[21] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'markers' field has been set */
     public boolean hasMarkers() {
       return fieldSetFlags()[21];
     }
-    
+
     /** Clears the value of the 'markers' field */
     public org.apache.nutch.storage.WebPage.Builder clearMarkers() {
       markers = null;
@@ -1530,25 +1917,26 @@
       fieldSetFlags()[21] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'metadata' field */
-    public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
       return metadata;
     }
-    
+
     /** Sets the value of the 'metadata' field */
-    public org.apache.nutch.storage.WebPage.Builder setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+    public org.apache.nutch.storage.WebPage.Builder setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
       validate(fields()[22], value);
       this.metadata = value;
       fieldSetFlags()[22] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'metadata' field has been set */
     public boolean hasMetadata() {
       return fieldSetFlags()[22];
     }
-    
+
     /** Clears the value of the 'metadata' field */
     public org.apache.nutch.storage.WebPage.Builder clearMetadata() {
       metadata = null;
@@ -1555,25 +1943,26 @@
       fieldSetFlags()[22] = false;
       return this;
     }
-    
+
     /** Gets the value of the 'batchId' field */
     public java.lang.CharSequence getBatchId() {
       return batchId;
     }
-    
+
     /** Sets the value of the 'batchId' field */
-    public org.apache.nutch.storage.WebPage.Builder setBatchId(java.lang.CharSequence value) {
+    public org.apache.nutch.storage.WebPage.Builder setBatchId(
+        java.lang.CharSequence value) {
       validate(fields()[23], value);
       this.batchId = value;
       fieldSetFlags()[23] = true;
-      return this; 
+      return this;
     }
-    
+
     /** Checks whether the 'batchId' field has been set */
     public boolean hasBatchId() {
       return fieldSetFlags()[23];
     }
-    
+
     /** Clears the value of the 'batchId' field */
     public org.apache.nutch.storage.WebPage.Builder clearBatchId() {
       batchId = null;
@@ -1580,35 +1969,64 @@
       fieldSetFlags()[23] = false;
       return this;
     }
-    
+
     @Override
     public WebPage build() {
       try {
         WebPage record = new WebPage();
-        record.baseUrl = fieldSetFlags()[0] ? this.baseUrl : (java.lang.CharSequence) defaultValue(fields()[0]);
-        record.status = fieldSetFlags()[1] ? this.status : (java.lang.Integer) defaultValue(fields()[1]);
-        record.fetchTime = fieldSetFlags()[2] ? this.fetchTime : (java.lang.Long) defaultValue(fields()[2]);
-        record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime : (java.lang.Long) defaultValue(fields()[3]);
-        record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval : (java.lang.Integer) defaultValue(fields()[4]);
-        record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch : (java.lang.Integer) defaultValue(fields()[5]);
-        record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime : (java.lang.Long) defaultValue(fields()[6]);
-        record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime : (java.lang.Long) defaultValue(fields()[7]);
-        record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
-        record.content = fieldSetFlags()[9] ? this.content : (java.nio.ByteBuffer) defaultValue(fields()[9]);
-        record.contentType = fieldSetFlags()[10] ? this.contentType : (java.lang.CharSequence) defaultValue(fields()[10]);
-        record.prevSignature = fieldSetFlags()[11] ? this.prevSignature : (java.nio.ByteBuffer) defaultValue(fields()[11]);
-        record.signature = fieldSetFlags()[12] ? this.signature : (java.nio.ByteBuffer) defaultValue(fields()[12]);
-        record.title = fieldSetFlags()[13] ? this.title : (java.lang.CharSequence) defaultValue(fields()[13]);
-        record.text = fieldSetFlags()[14] ? this.text : (java.lang.CharSequence) defaultValue(fields()[14]);
-        record.parseStatus = fieldSetFlags()[15] ? this.parseStatus : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
-        record.score = fieldSetFlags()[16] ? this.score : (java.lang.Float) defaultValue(fields()[16]);
-        record.reprUrl = fieldSetFlags()[17] ? this.reprUrl : (java.lang.CharSequence) defaultValue(fields()[17]);
-        record.headers = fieldSetFlags()[18] ? this.headers : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[18]));
-        record.outlinks = fieldSetFlags()[19] ? this.outlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[19]));
-        record.inlinks = fieldSetFlags()[20] ? this.inlinks : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[20]));
-        record.markers = fieldSetFlags()[21] ? this.markers : (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[21]));
-        record.metadata = fieldSetFlags()[22] ? this.metadata : (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[22]));
-        record.batchId = fieldSetFlags()[23] ? this.batchId : (java.lang.CharSequence) defaultValue(fields()[23]);
+        record.baseUrl = fieldSetFlags()[0] ? this.baseUrl
+            : (java.lang.CharSequence) defaultValue(fields()[0]);
+        record.status = fieldSetFlags()[1] ? this.status
+            : (java.lang.Integer) defaultValue(fields()[1]);
+        record.fetchTime = fieldSetFlags()[2] ? this.fetchTime
+            : (java.lang.Long) defaultValue(fields()[2]);
+        record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime
+            : (java.lang.Long) defaultValue(fields()[3]);
+        record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval
+            : (java.lang.Integer) defaultValue(fields()[4]);
+        record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch
+            : (java.lang.Integer) defaultValue(fields()[5]);
+        record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime
+            : (java.lang.Long) defaultValue(fields()[6]);
+        record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime
+            : (java.lang.Long) defaultValue(fields()[7]);
+        record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus
+            : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
+        record.content = fieldSetFlags()[9] ? this.content
+            : (java.nio.ByteBuffer) defaultValue(fields()[9]);
+        record.contentType = fieldSetFlags()[10] ? this.contentType
+            : (java.lang.CharSequence) defaultValue(fields()[10]);
+        record.prevSignature = fieldSetFlags()[11] ? this.prevSignature
+            : (java.nio.ByteBuffer) defaultValue(fields()[11]);
+        record.signature = fieldSetFlags()[12] ? this.signature
+            : (java.nio.ByteBuffer) defaultValue(fields()[12]);
+        record.title = fieldSetFlags()[13] ? this.title
+            : (java.lang.CharSequence) defaultValue(fields()[13]);
+        record.text = fieldSetFlags()[14] ? this.text
+            : (java.lang.CharSequence) defaultValue(fields()[14]);
+        record.parseStatus = fieldSetFlags()[15] ? this.parseStatus
+            : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
+        record.score = fieldSetFlags()[16] ? this.score
+            : (java.lang.Float) defaultValue(fields()[16]);
+        record.reprUrl = fieldSetFlags()[17] ? this.reprUrl
+            : (java.lang.CharSequence) defaultValue(fields()[17]);
+        record.headers = fieldSetFlags()[18] ? this.headers
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[18]));
+        record.outlinks = fieldSetFlags()[19] ? this.outlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[19]));
+        record.inlinks = fieldSetFlags()[20] ? this.inlinks
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[20]));
+        record.markers = fieldSetFlags()[21] ? this.markers
+            : (java.util.Map<java.lang.CharSequence, java.lang.CharSequence>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[21]));
+        record.metadata = fieldSetFlags()[22] ? this.metadata
+            : (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) new org.apache.gora.persistency.impl.DirtyMapWrapper(
+                (java.util.Map) defaultValue(fields()[22]));
+        record.batchId = fieldSetFlags()[23] ? this.batchId
+            : (java.lang.CharSequence) defaultValue(fields()[23]);
         return record;
       } catch (Exception e) {
         throw new org.apache.avro.AvroRuntimeException(e);
@@ -1615,575 +2033,847 @@
       }
     }
   }
-  
-  public WebPage.Tombstone getTombstone(){
-  	return TOMBSTONE;
+
+  public WebPage.Tombstone getTombstone() {
+    return TOMBSTONE;
   }
 
-  public WebPage newInstance(){
+  public WebPage newInstance() {
     return newBuilder().build();
   }
 
   private static final Tombstone TOMBSTONE = new Tombstone();
-  
-  public static final class Tombstone extends WebPage implements org.apache.gora.persistency.Tombstone {
-  
-      private Tombstone() { }
-  
-	  		  /**
-	   * Gets the value of the 'baseUrl' field.
-	   * The original associated with this WebPage.	   */
-	  public java.lang.CharSequence getBaseUrl() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'baseUrl' field.
-	   * The original associated with this WebPage.	   * @param value the value to set.
-	   */
-	  public void setBaseUrl(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The original associated with this WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isBaseUrlDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'status' field.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   */
-	  public java.lang.Integer getStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'status' field.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   * @param value the value to set.
-	   */
-	  public void setStatus(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified	   * @param value the value to set.
-	   */
-	  public boolean isStatusDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'fetchTime' field.
-	   * The system time in milliseconds for when the page was fetched.	   */
-	  public java.lang.Long getFetchTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'fetchTime' field.
-	   * The system time in milliseconds for when the page was fetched.	   * @param value the value to set.
-	   */
-	  public void setFetchTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when the page was fetched.	   * @param value the value to set.
-	   */
-	  public boolean isFetchTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevFetchTime' field.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   */
-	  public java.lang.Long getPrevFetchTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevFetchTime' field.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   * @param value the value to set.
-	   */
-	  public void setPrevFetchTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation	   * @param value the value to set.
-	   */
-	  public boolean isPrevFetchTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'fetchInterval' field.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   */
-	  public java.lang.Integer getFetchInterval() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'fetchInterval' field.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   * @param value the value to set.
-	   */
-	  public void setFetchInterval(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.	   * @param value the value to set.
-	   */
-	  public boolean isFetchIntervalDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'retriesSinceFetch' field.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   */
-	  public java.lang.Integer getRetriesSinceFetch() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'retriesSinceFetch' field.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   * @param value the value to set.
-	   */
-	  public void setRetriesSinceFetch(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.	   * @param value the value to set.
-	   */
-	  public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'modifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   */
-	  public java.lang.Long getModifiedTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'modifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   * @param value the value to set.
-	   */
-	  public void setModifiedTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isModifiedTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevModifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   */
-	  public java.lang.Long getPrevModifiedTime() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevModifiedTime' field.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   * @param value the value to set.
-	   */
-	  public void setPrevModifiedTime(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'protocolStatus' field.
-		   */
-	  public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'protocolStatus' field.
-		   * @param value the value to set.
-	   */
-	  public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-		   * @param value the value to set.
-	   */
-	  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'content' field.
-	   * The entire raw document content e.g. raw XHTML	   */
-	  public java.nio.ByteBuffer getContent() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'content' field.
-	   * The entire raw document content e.g. raw XHTML	   * @param value the value to set.
-	   */
-	  public void setContent(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The entire raw document content e.g. raw XHTML	   * @param value the value to set.
-	   */
-	  public boolean isContentDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'contentType' field.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   */
-	  public java.lang.CharSequence getContentType() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'contentType' field.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   * @param value the value to set.
-	   */
-	  public void setContentType(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.	   * @param value the value to set.
-	   */
-	  public boolean isContentTypeDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'prevSignature' field.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   */
-	  public java.nio.ByteBuffer getPrevSignature() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'prevSignature' field.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   * @param value the value to set.
-	   */
-	  public void setPrevSignature(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.	   * @param value the value to set.
-	   */
-	  public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'signature' field.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   */
-	  public java.nio.ByteBuffer getSignature() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'signature' field.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   * @param value the value to set.
-	   */
-	  public void setSignature(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.	   * @param value the value to set.
-	   */
-	  public boolean isSignatureDirty(java.nio.ByteBuffer value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'title' field.
-	   * The title of the WebPage.	   */
-	  public java.lang.CharSequence getTitle() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'title' field.
-	   * The title of the WebPage.	   * @param value the value to set.
-	   */
-	  public void setTitle(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The title of the WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isTitleDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'text' field.
-	   * The textual content of the WebPage devoid from native markup.	   */
-	  public java.lang.CharSequence getText() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'text' field.
-	   * The textual content of the WebPage devoid from native markup.	   * @param value the value to set.
-	   */
-	  public void setText(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * The textual content of the WebPage devoid from native markup.	   * @param value the value to set.
-	   */
-	  public boolean isTextDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'parseStatus' field.
-		   */
-	  public org.apache.nutch.storage.ParseStatus getParseStatus() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'parseStatus' field.
-		   * @param value the value to set.
-	   */
-	  public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
-		   * @param value the value to set.
-	   */
-	  public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'score' field.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   */
-	  public java.lang.Float getScore() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'score' field.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   * @param value the value to set.
-	   */
-	  public void setScore(java.lang.Float value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.	   * @param value the value to set.
-	   */
-	  public boolean isScoreDirty(java.lang.Float value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'reprUrl' field.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   */
-	  public java.lang.CharSequence getReprUrl() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'reprUrl' field.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   * @param value the value to set.
-	   */
-	  public void setReprUrl(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler	   * @param value the value to set.
-	   */
-	  public boolean isReprUrlDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'headers' field.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'headers' field.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   * @param value the value to set.
-	   */
-	  public void setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.	   * @param value the value to set.
-	   */
-	  public boolean isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'outlinks' field.
-	   * Embedded hyperlinks which direct outside of the current domain.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'outlinks' field.
-	   * Embedded hyperlinks which direct outside of the current domain.	   * @param value the value to set.
-	   */
-	  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Embedded hyperlinks which direct outside of the current domain.	   * @param value the value to set.
-	   */
-	  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'inlinks' field.
-	   * Embedded hyperlinks which link to pages within the current domain.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'inlinks' field.
-	   * Embedded hyperlinks which link to pages within the current domain.	   * @param value the value to set.
-	   */
-	  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Embedded hyperlinks which link to pages within the current domain.	   * @param value the value to set.
-	   */
-	  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'markers' field.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   */
-	  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'markers' field.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   * @param value the value to set.
-	   */
-	  public void setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isMarkersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'metadata' field.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   */
-	  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'metadata' field.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   * @param value the value to set.
-	   */
-	  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.	   * @param value the value to set.
-	   */
-	  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-				  /**
-	   * Gets the value of the 'batchId' field.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   */
-	  public java.lang.CharSequence getBatchId() {
-	    throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
-	  }
-	
-	  /**
-	   * Sets the value of the 'batchId' field.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   * @param value the value to set.
-	   */
-	  public void setBatchId(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
-	  }
-	  
-	  /**
-	   * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
-	   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.	   * @param value the value to set.
-	   */
-	  public boolean isBatchIdDirty(java.lang.CharSequence value) {
-	    throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
-	  }
-	
-		  
+
+  public static final class Tombstone extends WebPage implements
+      org.apache.gora.persistency.Tombstone {
+
+    private Tombstone() {
+    }
+
+    /**
+     * Gets the value of the 'baseUrl' field. The original associated with this
+     * WebPage.
+     */
+    public java.lang.CharSequence getBaseUrl() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'baseUrl' field. The original associated with this
+     * WebPage. * @param value the value to set.
+     */
+    public void setBaseUrl(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * original associated with this WebPage. * @param value the value to set.
+     */
+    public boolean isBaseUrlDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'status' field. A crawl status associated with the
+     * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+     * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
+     * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
+     * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
+     * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
+     * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
+     * not modified
+     */
+    public java.lang.Integer getStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'status' field. A crawl status associated with the
+     * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+     * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
+     * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
+     * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
+     * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
+     * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
+     * not modified * @param value the value to set.
+     */
+    public void setStatus(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'status' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * crawl status associated with the WebPage, can be of value
+     * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
+     * was successfully fetched, STATUS_GONE - WebPage no longer exists,
+     * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
+     * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+     * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+     * errors and STATUS_NOTMODIFIED - fetching successful - page is not
+     * modified * @param value the value to set.
+     */
+    public boolean isStatusDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'fetchTime' field. The system time in milliseconds
+     * for when the page was fetched.
+     */
+    public java.lang.Long getFetchTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'fetchTime' field. The system time in milliseconds
+     * for when the page was fetched. * @param value the value to set.
+     */
+    public void setFetchTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when the page was fetched. * @param value
+     * the value to set.
+     */
+    public boolean isFetchTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevFetchTime' field. The system time in
+     * milliseconds for when the page was last fetched if it was previously
+     * fetched which can be used to calculate time delta within a fetching
+     * schedule implementation
+     */
+    public java.lang.Long getPrevFetchTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevFetchTime' field. The system time in
+     * milliseconds for when the page was last fetched if it was previously
+     * fetched which can be used to calculate time delta within a fetching
+     * schedule implementation * @param value the value to set.
+     */
+    public void setPrevFetchTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when the page was last fetched if it was
+     * previously fetched which can be used to calculate time delta within a
+     * fetching schedule implementation * @param value the value to set.
+     */
+    public boolean isPrevFetchTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'fetchInterval' field. The default number of
+     * seconds between re-fetches of a page. The default is considered as 30
+     * days unless a custom fetch schedle is implemented.
+     */
+    public java.lang.Integer getFetchInterval() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'fetchInterval' field. The default number of
+     * seconds between re-fetches of a page. The default is considered as 30
+     * days unless a custom fetch schedle is implemented. * @param value the
+     * value to set.
+     */
+    public void setFetchInterval(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * default number of seconds between re-fetches of a page. The default is
+     * considered as 30 days unless a custom fetch schedle is implemented. * @param
+     * value the value to set.
+     */
+    public boolean isFetchIntervalDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'retriesSinceFetch' field. The number of retried
+     * attempts at fetching the WebPage since it was last successfully fetched.
+     */
+    public java.lang.Integer getRetriesSinceFetch() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'retriesSinceFetch' field. The number of retried
+     * attempts at fetching the WebPage since it was last successfully fetched.
+     * * @param value the value to set.
+     */
+    public void setRetriesSinceFetch(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'retriesSinceFetch' field. A field is
+     * dirty if it represents a change that has not yet been written to the
+     * database. The number of retried attempts at fetching the WebPage since it
+     * was last successfully fetched. * @param value the value to set.
+     */
+    public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'modifiedTime' field. The system time in
+     * milliseconds for when this WebPage was modified by the WebPage author, if
+     * this is not available we default to the server for this information. This
+     * is important to understand the changing nature of the WebPage.
+     */
+    public java.lang.Long getModifiedTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'modifiedTime' field. The system time in
+     * milliseconds for when this WebPage was modified by the WebPage author, if
+     * this is not available we default to the server for this information. This
+     * is important to understand the changing nature of the WebPage. * @param
+     * value the value to set.
+     */
+    public void setModifiedTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'modifiedTime' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * system time in milliseconds for when this WebPage was modified by the
+     * WebPage author, if this is not available we default to the server for
+     * this information. This is important to understand the changing nature of
+     * the WebPage. * @param value the value to set.
+     */
+    public boolean isModifiedTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevModifiedTime' field. The system time in
+     * milliseconds for when this WebPage was previously modified by the author,
+     * if this is not available then we default to the server for this
+     * information. This is important to understand the changing nature of a
+     * WebPage.
+     */
+    public java.lang.Long getPrevModifiedTime() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevModifiedTime' field. The system time in
+     * milliseconds for when this WebPage was previously modified by the author,
+     * if this is not available then we default to the server for this
+     * information. This is important to understand the changing nature of a
+     * WebPage. * @param value the value to set.
+     */
+    public void setPrevModifiedTime(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
+     * if it represents a change that has not yet been written to the database.
+     * The system time in milliseconds for when this WebPage was previously
+     * modified by the author, if this is not available then we default to the
+     * server for this information. This is important to understand the changing
+     * nature of a WebPage. * @param value the value to set.
+     */
+    public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'protocolStatus' field.
+     */
+    public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'protocolStatus' field.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'protocolStatus' field. A field is dirty
+     * if it represents a change that has not yet been written to the database.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public boolean isProtocolStatusDirty(
+        org.apache.nutch.storage.ProtocolStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'content' field. The entire raw document content
+     * e.g. raw XHTML
+     */
+    public java.nio.ByteBuffer getContent() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'content' field. The entire raw document content
+     * e.g. raw XHTML * @param value the value to set.
+     */
+    public void setContent(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'content' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * entire raw document content e.g. raw XHTML * @param value the value to
+     * set.
+     */
+    public boolean isContentDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'contentType' field. The type of the content
+     * contained within the document itself. ContentType is an alias for
+     * MimeType. Historically, this parameter was only called MimeType, but
+     * since this is actually the value included in the HTTP Content-Type
+     * header, it can also include the character set encoding, which makes it
+     * more than just a MimeType specification. If MimeType is specified e.g.
+     * not None, that value is used. Otherwise, ContentType is used. If neither
+     * is given, the DEFAULT_CONTENT_TYPE setting is used.
+     */
+    public java.lang.CharSequence getContentType() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'contentType' field. The type of the content
+     * contained within the document itself. ContentType is an alias for
+     * MimeType. Historically, this parameter was only called MimeType, but
+     * since this is actually the value included in the HTTP Content-Type
+     * header, it can also include the character set encoding, which makes it
+     * more than just a MimeType specification. If MimeType is specified e.g.
+     * not None, that value is used. Otherwise, ContentType is used. If neither
+     * is given, the DEFAULT_CONTENT_TYPE setting is used. * @param value the
+     * value to set.
+     */
+    public void setContentType(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'contentType' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. The
+     * type of the content contained within the document itself. ContentType is
+     * an alias for MimeType. Historically, this parameter was only called
+     * MimeType, but since this is actually the value included in the HTTP
+     * Content-Type header, it can also include the character set encoding,
+     * which makes it more than just a MimeType specification. If MimeType is
+     * specified e.g. not None, that value is used. Otherwise, ContentType is
+     * used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. * @param
+     * value the value to set.
+     */
+    public boolean isContentTypeDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'prevSignature' field. An implementation of a
+     * WebPage's previous signature from which it can be identified and
+     * referenced at any point in time. This can be used to uniquely identify
+     * WebPage deltas based on page fingerprints.
+     */
+    public java.nio.ByteBuffer getPrevSignature() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'prevSignature' field. An implementation of a
+     * WebPage's previous signature from which it can be identified and
+     * referenced at any point in time. This can be used to uniquely identify
+     * WebPage deltas based on page fingerprints. * @param value the value to
+     * set.
+     */
+    public void setPrevSignature(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'prevSignature' field. A field is dirty if
+     * it represents a change that has not yet been written to the database. An
+     * implementation of a WebPage's previous signature from which it can be
+     * identified and referenced at any point in time. This can be used to
+     * uniquely identify WebPage deltas based on page fingerprints. * @param
+     * value the value to set.
+     */
+    public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'signature' field. An implementation of a WebPage's
+     * signature from which it can be identified and referenced at any point in
+     * time. This is essentially the WebPage's fingerprint represnting its state
+     * for any point in time.
+     */
+    public java.nio.ByteBuffer getSignature() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'signature' field. An implementation of a WebPage's
+     * signature from which it can be identified and referenced at any point in
+     * time. This is essentially the WebPage's fingerprint represnting its state
+     * for any point in time. * @param value the value to set.
+     */
+    public void setSignature(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'signature' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. An
+     * implementation of a WebPage's signature from which it can be identified
+     * and referenced at any point in time. This is essentially the WebPage's
+     * fingerprint represnting its state for any point in time. * @param value
+     * the value to set.
+     */
+    public boolean isSignatureDirty(java.nio.ByteBuffer value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'title' field. The title of the WebPage.
+     */
+    public java.lang.CharSequence getTitle() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'title' field. The title of the WebPage. * @param
+     * value the value to set.
+     */
+    public void setTitle(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'title' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * title of the WebPage. * @param value the value to set.
+     */
+    public boolean isTitleDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'text' field. The textual content of the WebPage
+     * devoid from native markup.
+     */
+    public java.lang.CharSequence getText() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'text' field. The textual content of the WebPage
+     * devoid from native markup. * @param value the value to set.
+     */
+    public void setText(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'text' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. The
+     * textual content of the WebPage devoid from native markup. * @param value
+     * the value to set.
+     */
+    public boolean isTextDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'parseStatus' field.
+     */
+    public org.apache.nutch.storage.ParseStatus getParseStatus() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'parseStatus' field.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'parseStatus' field. A field is dirty if
+     * it represents a change that has not yet been written to the database.
+     * 
+     * @param value
+     *          the value to set.
+     */
+    public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'score' field. A score used to determine a
+     * WebPage's relevance within the web graph it is part of. This score may
+     * change over time based on graph characteristics.
+     */
+    public java.lang.Float getScore() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'score' field. A score used to determine a
+     * WebPage's relevance within the web graph it is part of. This score may
+     * change over time based on graph characteristics. * @param value the value
+     * to set.
+     */
+    public void setScore(java.lang.Float value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'score' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * score used to determine a WebPage's relevance within the web graph it is
+     * part of. This score may change over time based on graph characteristics.
+     * * @param value the value to set.
+     */
+    public boolean isScoreDirty(java.lang.Float value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'reprUrl' field. In the case where we are given two
+     * urls, a source and a destination of a redirect, we should determine and
+     * persist the representative url. The logic used to determine this is based
+     * largely on Yahoo!'s Slurp Crawler
+     */
+    public java.lang.CharSequence getReprUrl() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'reprUrl' field. In the case where we are given two
+     * urls, a source and a destination of a redirect, we should determine and
+     * persist the representative url. The logic used to determine this is based
+     * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
+     */
+    public void setReprUrl(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. In the
+     * case where we are given two urls, a source and a destination of a
+     * redirect, we should determine and persist the representative url. The
+     * logic used to determine this is based largely on Yahoo!'s Slurp Crawler * @param
+     * value the value to set.
+     */
+    public boolean isReprUrlDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'headers' field. Header information returned from
+     * the web server used to server the content which is subsequently fetched
+     * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+     * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+     * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getHeaders() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'headers' field. Header information returned from
+     * the web server used to server the content which is subsequently fetched
+     * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+     * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+     * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
+     * value to set.
+     */
+    public void setHeaders(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'headers' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. Header
+     * information returned from the web server used to server the content which
+     * is subsequently fetched from. This includes keys such as
+     * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
+     * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
+     * LAST_MODIFIED and LOCATION. * @param value the value to set.
+     */
+    public boolean isHeadersDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
+     * outside of the current domain.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getOutlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
+     * outside of the current domain. * @param value the value to set.
+     */
+    public void setOutlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Embedded hyperlinks which direct outside of the current domain. * @param
+     * value the value to set.
+     */
+    public boolean isOutlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
+     * pages within the current domain.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getInlinks() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
+     * pages within the current domain. * @param value the value to set.
+     */
+    public void setInlinks(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Embedded hyperlinks which link to pages within the current domain. * @param
+     * value the value to set.
+     */
+    public boolean isInlinksDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'markers' field. Markers flags which represent user
+     * and machine decisions which have affected influenced a WebPage's current
+     * state. Markers can be system specific and user machine driven in nature.
+     * They are assigned to a WebPage on a job-by-job basis and thier values
+     * indicative of what actions should be associated with a WebPage.
+     */
+    public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> getMarkers() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'markers' field. Markers flags which represent user
+     * and machine decisions which have affected influenced a WebPage's current
+     * state. Markers can be system specific and user machine driven in nature.
+     * They are assigned to a WebPage on a job-by-job basis and thier values
+     * indicative of what actions should be associated with a WebPage. * @param
+     * value the value to set.
+     */
+    public void setMarkers(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'markers' field. A field is dirty if it
+     * represents a change that has not yet been written to the database.
+     * Markers flags which represent user and machine decisions which have
+     * affected influenced a WebPage's current state. Markers can be system
+     * specific and user machine driven in nature. They are assigned to a
+     * WebPage on a job-by-job basis and thier values indicative of what actions
+     * should be associated with a WebPage. * @param value the value to set.
+     */
+    public boolean isMarkersDirty(
+        java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'metadata' field. A multi-valued metadata container
+     * used for storing everything from structured WebPage characterists, to
+     * ad-hoc extraction and metadata augmentation for any given WebPage.
+     */
+    public java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> getMetadata() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'metadata' field. A multi-valued metadata container
+     * used for storing everything from structured WebPage characterists, to
+     * ad-hoc extraction and metadata augmentation for any given WebPage. * @param
+     * value the value to set.
+     */
+    public void setMetadata(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'metadata' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * multi-valued metadata container used for storing everything from
+     * structured WebPage characterists, to ad-hoc extraction and metadata
+     * augmentation for any given WebPage. * @param value the value to set.
+     */
+    public boolean isMetadataDirty(
+        java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
+    /**
+     * Gets the value of the 'batchId' field. A batchId that this WebPage is
+     * assigned to. WebPage's are fetched in batches, called fetchlists. Pages
+     * are partitioned but can always be associated and fetched alongside pages
+     * of similar value (within a crawl cycle) based on batchId.
+     */
+    public java.lang.CharSequence getBatchId() {
+      throw new java.lang.UnsupportedOperationException(
+          "Get is not supported on tombstones");
+    }
+
+    /**
+     * Sets the value of the 'batchId' field. A batchId that this WebPage is
+     * assigned to. WebPage's are fetched in batches, called fetchlists. Pages
+     * are partitioned but can always be associated and fetched alongside pages
+     * of similar value (within a crawl cycle) based on batchId. * @param value
+     * the value to set.
+     */
+    public void setBatchId(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "Set is not supported on tombstones");
+    }
+
+    /**
+     * Checks the dirty status of the 'batchId' field. A field is dirty if it
+     * represents a change that has not yet been written to the database. A
+     * batchId that this WebPage is assigned to. WebPage's are fetched in
+     * batches, called fetchlists. Pages are partitioned but can always be
+     * associated and fetched alongside pages of similar value (within a crawl
+     * cycle) based on batchId. * @param value the value to set.
+     */
+    public boolean isBatchIdDirty(java.lang.CharSequence value) {
+      throw new java.lang.UnsupportedOperationException(
+          "IsDirty is not supported on tombstones");
+    }
+
   }
-  
+
 }
-
Index: src/java/org/apache/nutch/storage/WebTableCreator.java
===================================================================
--- src/java/org/apache/nutch/storage/WebTableCreator.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/WebTableCreator.java	(working copy)
@@ -21,9 +21,8 @@
 
 public class WebTableCreator {
   public static void main(String[] args) throws Exception {
-    DataStore<String, WebPage> store =
-      StorageUtils.createWebStore(NutchConfiguration.create(), String.class,
-        WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        NutchConfiguration.create(), String.class, WebPage.class);
 
     System.out.println(store);
   }
Index: src/java/org/apache/nutch/storage/package-info.java
===================================================================
--- src/java/org/apache/nutch/storage/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/storage/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * {@link org.apache.nutch.storage.Host host metadata}) of data in abstracted storage.
  */
 package org.apache.nutch.storage;
+
Index: src/java/org/apache/nutch/tools/Benchmark.java
===================================================================
--- src/java/org/apache/nutch/tools/Benchmark.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/Benchmark.java	(working copy)
@@ -50,7 +50,8 @@
     System.exit(res);
   }
 
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+  private void createSeeds(FileSystem fs, Path seedsDir, int count)
+      throws Exception {
     OutputStream os = fs.create(new Path(seedsDir, "seeds"));
     for (int i = 0; i < count; i++) {
       String url = "http://www.test-" + i + ".com/\r\n";
@@ -61,7 +62,7 @@
   }
 
   public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
     List<String> runs = new ArrayList<String>();
     List<String> stages = new ArrayList<String>();
     int seeds, depth, threads;
@@ -76,9 +77,9 @@
       if (!stages.contains(stage)) {
         stages.add(stage);
       }
-      Map<String,Long> t = timings.get(stage);
+      Map<String, Long> t = timings.get(stage);
       if (t == null) {
-        t = new HashMap<String,Long>();
+        t = new HashMap<String, Long>();
         timings.put(stage, t);
       }
       t.put(run, timing);
@@ -94,8 +95,9 @@
       sb.append("* TopN:\t" + topN + "\n");
       sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
       for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
+        Map<String, Long> timing = timings.get(stage);
+        if (timing == null)
+          continue;
         sb.append("- stage: " + stage + "\n");
         for (String r : runs) {
           Long Time = timing.get(r);
@@ -111,6 +113,7 @@
     public List<String> getStages() {
       return stages;
     }
+
     public List<String> getRuns() {
       return runs;
     }
@@ -121,21 +124,28 @@
     int seeds = 1;
     int depth = 10;
     int threads = 10;
-    //boolean delete = true;
+    // boolean delete = true;
     long topN = Long.MAX_VALUE;
 
     if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err
+          .println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
+      System.err
+          .println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err
+          .println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
       System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      System.err
+          .println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
       // XXX what is the equivalent here? not an additional job...
       // System.err.println("\t-keep\tkeep batchId data (default: delete after updatedb)");
       System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      System.err.println("\tNOTE: if not specified, this is reset to: "
+          + plugins);
+      System.err
+          .println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err
+          .println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
       return -1;
     }
     int maxPerHost = Integer.MAX_VALUE;
@@ -157,13 +167,14 @@
         return -1;
       }
     }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, plugins);
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN,
+        plugins);
     System.out.println(res);
     return 0;
   }
 
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, String plugins) throws Exception {
+  public BenchmarkResults benchmark(int seeds, int depth, int threads,
+      int maxPerHost, long topN, String plugins) throws Exception {
     Configuration conf = getConf();
     conf.set("http.proxy.host", "localhost");
     conf.setInt("http.proxy.port", 8181);
@@ -173,11 +184,12 @@
       conf.set("plugin.includes", plugins);
     }
     conf.setInt(GeneratorJob.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    conf.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     Job job = new NutchJob(conf);
     FileSystem fs = FileSystem.get(job.getConfiguration());
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"), "bench-"
+        + System.currentTimeMillis());
     fs.mkdirs(dir);
     Path rootUrlDir = new Path(dir, "seed");
     fs.mkdirs(rootUrlDir);
@@ -204,7 +216,7 @@
     ParserJob parseBatch = new ParserJob(conf);
     DbUpdaterJob crawlDbTool = new DbUpdaterJob(conf);
     // not needed in the new API
-    //LinkDb linkDbTool = new LinkDb(getConf());
+    // LinkDb linkDbTool = new LinkDb(getConf());
 
     long start = System.currentTimeMillis();
     // initialize crawlDb
@@ -212,10 +224,10 @@
     long delta = System.currentTimeMillis() - start;
     res.addTiming("inject", "0", delta);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new batch
+    for (i = 0; i < depth; i++) { // generate new batch
       start = System.currentTimeMillis();
       String batchId = generator.generate(topN, System.currentTimeMillis(),
-              false, false);
+          false, false);
       delta = System.currentTimeMillis() - start;
       res.addTiming("generate", i + "", delta);
       if (batchId == null) {
@@ -224,12 +236,12 @@
       }
       boolean isParsing = getConf().getBoolean("fetcher.parse", false);
       start = System.currentTimeMillis();
-      fetcher.fetch(batchId, threads, false, -1);  // fetch it
+      fetcher.fetch(batchId, threads, false, -1); // fetch it
       delta = System.currentTimeMillis() - start;
       res.addTiming("fetch", i + "", delta);
       if (!isParsing) {
         start = System.currentTimeMillis();
-        parseBatch.parse(batchId, false, false);    // parse it, if needed
+        parseBatch.parse(batchId, false, false); // parse it, if needed
         delta = System.currentTimeMillis() - start;
         res.addTiming("parse", i + "", delta);
       }
@@ -241,7 +253,9 @@
     if (i == 0) {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     res.elapsed = System.currentTimeMillis() - res.elapsed;
     WebTableReader dbreader = new WebTableReader();
     dbreader.setConf(conf);
Index: src/java/org/apache/nutch/tools/DmozParser.java
===================================================================
--- src/java/org/apache/nutch/tools/DmozParser.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/DmozParser.java	(working copy)
@@ -40,17 +40,16 @@
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.TableUtil;
 
-
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
   public static final Logger LOG = LoggerFactory.getLogger(DmozParser.class);
-  
-    long pages = 0;
-    private static DataStore<String, WebPage> store = null;
-    
+
+  long pages = 0;
+  private static DataStore<String, WebPage> store = null;
+
   /**
-   * This filter fixes characters that might offend our parser.
-   * This lets us be tolerant of errors that might appear in the input XML.
+   * This filter fixes characters that might offend our parser. This lets us be
+   * tolerant of errors that might appear in the input XML.
    */
   private static class XMLCharFilter extends FilterReader {
     private boolean lastBad = false;
@@ -62,9 +61,9 @@
     public int read() throws IOException {
       int c = in.read();
       int value = c;
-      if (c != -1 && !(XMLChar.isValid(c)))     // fix invalid characters
+      if (c != -1 && !(XMLChar.isValid(c))) // fix invalid characters
         value = 'X';
-      else if (lastBad && c == '<') {           // fix mis-matched brackets
+      else if (lastBad && c == '<') { // fix mis-matched brackets
         in.mark(1);
         if (in.read() != '/')
           value = 'X';
@@ -75,21 +74,20 @@
       return value;
     }
 
-    public int read(char[] cbuf, int off, int len)
-      throws IOException {
+    public int read(char[] cbuf, int off, int len) throws IOException {
       int n = in.read(cbuf, off, len);
       if (n != -1) {
         for (int i = 0; i < n; i++) {
-          char c = cbuf[off+i];
+          char c = cbuf[off + i];
           char value = c;
-          if (!(XMLChar.isValid(c)))            // fix invalid characters
+          if (!(XMLChar.isValid(c))) // fix invalid characters
             value = 'X';
-          else if (lastBad && c == '<') {       // fix mis-matched brackets
-            if (i != n-1 && cbuf[off+i+1] != '/')
+          else if (lastBad && c == '<') { // fix mis-matched brackets
+            if (i != n - 1 && cbuf[off + i + 1] != '/')
               value = 'X';
           }
           lastBad = (c == 65533);
-          cbuf[off+i] = value;
+          cbuf[off + i] = value;
         }
       }
       return n;
@@ -96,16 +94,15 @@
     }
   }
 
-
   /**
-   * The RDFProcessor receives tag messages during a parse
-   * of RDF XML data.  We build whatever structures we need
-   * from these messages.
+   * The RDFProcessor receives tag messages during a parse of RDF XML data. We
+   * build whatever structures we need from these messages.
    */
   private class RDFProcessor extends DefaultHandler {
     String curURL = null, curSection = null;
-    boolean titlePending = false, descPending = false, insideAdultSection = false;
-    Pattern topicPattern = null; 
+    boolean titlePending = false, descPending = false,
+        insideAdultSection = false;
+    Pattern topicPattern = null;
     StringBuffer title = new StringBuffer(), desc = new StringBuffer();
     XMLReader reader;
     int subsetDenom;
@@ -115,16 +112,18 @@
     Locator location;
 
     /**
-     * Pass in an XMLReader, plus a flag as to whether we 
-     * should include adult material.
+     * Pass in an XMLReader, plus a flag as to whether we should include adult
+     * material.
      */
-    public RDFProcessor(XMLReader reader, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern, boolean snippet) throws IOException {
+    public RDFProcessor(XMLReader reader, int subsetDenom,
+        boolean includeAdult, int skew, Pattern topicPattern, boolean snippet)
+        throws IOException {
       this.reader = reader;
       this.subsetDenom = subsetDenom;
       this.includeAdult = includeAdult;
       this.topicPattern = topicPattern;
       this.snippet = snippet;
-      
+
       this.hashSkew = skew != 0 ? skew : new Random().nextInt();
     }
 
@@ -135,20 +134,21 @@
     /**
      * Start of an XML elt
      */
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
+    public void startElement(String namespaceURI, String localName,
+        String qName, Attributes atts) throws SAXException {
       if ("Topic".equals(qName)) {
         curSection = atts.getValue("r:id");
       } else if ("ExternalPage".equals(qName)) {
         // Porn filter
-        if ((! includeAdult) && curSection.startsWith("Top/Adult")) {
+        if ((!includeAdult) && curSection.startsWith("Top/Adult")) {
           return;
         }
-          
+
         if (topicPattern != null && !topicPattern.matcher(curSection).matches()) {
           return;
         }
 
-        // Subset denominator filter.  
+        // Subset denominator filter.
         // Only emit with a chance of 1/denominator.
         String url = atts.getValue("about");
         int hashValue = MD5Hash.digest(url).hashCode();
@@ -181,40 +181,42 @@
      * Termination of XML elt
      */
     public void endElement(String namespaceURI, String localName, String qName)
-      throws SAXException {
+        throws SAXException {
       if (curURL != null) {
         if ("ExternalPage".equals(qName)) {
           //
-          // Inc the number of pages, insert the page, and 
+          // Inc the number of pages, insert the page, and
           // possibly print status.
           //
-          if(snippet){
+          if (snippet) {
             try {
               String reversedUrl = TableUtil.reverseUrl(curURL);
               WebPage row = store.get(reversedUrl);
-              
-              if(row!=null){
+
+              if (row != null) {
                 if (desc.length() > 0) {
-                  row.getMetadata().put(new Utf8("_dmoz_desc_"), ByteBuffer.wrap(desc.toString().getBytes()));
+                  row.getMetadata().put(new Utf8("_dmoz_desc_"),
+                      ByteBuffer.wrap(desc.toString().getBytes()));
                   desc.delete(0, desc.length());
                 }
                 if (title.length() > 0) {
-                  row.getMetadata().put(new Utf8("_dmoz_title_"), ByteBuffer.wrap(title.toString().getBytes()));
+                  row.getMetadata().put(new Utf8("_dmoz_title_"),
+                      ByteBuffer.wrap(title.toString().getBytes()));
                   title.delete(0, title.length());
                 }
                 store.put(reversedUrl, row);
                 store.flush();
               }
-              
-             } catch (IOException e) {
+
+            } catch (IOException e) {
               // TODO Auto-generated catch block
               e.printStackTrace();
-             }
+            }
           } else {
-            System.out.println(curURL); 
-            
+            System.out.println(curURL);
+
             //
-            // Clear out the link text.  This is what
+            // Clear out the link text. This is what
             // you would use for adding to the linkdb.
             //
             if (desc.length() > 0) {
@@ -225,7 +227,7 @@
             }
           }
           pages++;
-          
+
           // Null out the URL.
           curURL = null;
         } else if ("d:Title".equals(qName)) {
@@ -252,15 +254,13 @@
     }
 
     /**
-     * From time to time the Parser will set the "current location"
-     * by calling this function.  It's useful for emitting locations
-     * for error messages.
+     * From time to time the Parser will set the "current location" by calling
+     * this function. It's useful for emitting locations for error messages.
      */
     public void setDocumentLocator(Locator locator) {
       location = locator;
     }
 
-
     //
     // Interface ErrorHandler
     //
@@ -280,11 +280,11 @@
     public void fatalError(SAXParseException spe) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Fatal err: " + spe.toString() + ": " + spe.getMessage());
-        LOG.error("Last known line is " + location.getLineNumber() +
-                  ", column " + location.getColumnNumber());
+        LOG.error("Last known line is " + location.getLineNumber()
+            + ", column " + location.getColumnNumber());
       }
     }
-        
+
     /**
      * Emit exception warning message
      */
@@ -296,16 +296,13 @@
   }
 
   /**
-   * Iterate through all the items in this structured DMOZ file.
-   * Add each URL to the web db.
+   * Iterate through all the items in this structured DMOZ file. Add each URL to
+   * the web db.
    */
   public void parseDmozFile(File dmozFile, int subsetDenom,
-                            boolean includeAdult,
-                            int skew,
-                            Pattern topicPattern,
-                            boolean snippet)
+      boolean includeAdult, int skew, Pattern topicPattern, boolean snippet)
 
-    throws IOException, SAXException, ParserConfigurationException {
+  throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
     SAXParser parser = parserFactory.newSAXParser();
@@ -312,19 +309,20 @@
     XMLReader reader = parser.getXMLReader();
 
     // Create our own processor to receive SAX events
-    RDFProcessor rp =
-      new RDFProcessor(reader, subsetDenom, includeAdult,
-                       skew, topicPattern, snippet);
+    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,
+        topicPattern, snippet);
     reader.setContentHandler(rp);
     reader.setErrorHandler(rp);
     LOG.info("skew = " + rp.hashSkew);
 
     //
-    // Open filtered text stream.  The TextFilter makes sure that
+    // Open filtered text stream. The TextFilter makes sure that
     // only appropriate XML-approved Text characters are received.
     // Any non-conforming characters are silently skipped.
     //
-    XMLCharFilter in = new XMLCharFilter(new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
+    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
+        new InputStreamReader(new BufferedInputStream(new FileInputStream(
+            dmozFile)), "UTF-8")));
     try {
       InputSource is = new InputSource(in);
       reader.parse(is);
@@ -338,18 +336,17 @@
     }
   }
 
-  private static void addTopicsFromFile(String topicFile,
-                                        Vector<String> topics)
-  throws IOException {
+  private static void addTopicsFromFile(String topicFile, Vector<String> topics)
+      throws IOException {
     BufferedReader in = null;
     try {
-      in = new BufferedReader(new InputStreamReader(new FileInputStream(topicFile), "UTF-8"));
+      in = new BufferedReader(new InputStreamReader(new FileInputStream(
+          topicFile), "UTF-8"));
       String line = null;
       while ((line = in.readLine()) != null) {
         topics.addElement(new String(line));
       }
-    } 
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Failed with the following exception: ", e.toString());
       }
@@ -358,18 +355,19 @@
       in.close();
     }
   }
-    
+
   /**
-   * Command-line access.  User may add URLs via a flat text file
-   * or the structured DMOZ file.  By default, we ignore Adult
-   * material (as categorized by DMOZ).
+   * Command-line access. User may add URLs via a flat text file or the
+   * structured DMOZ file. By default, we ignore Adult material (as categorized
+   * by DMOZ).
    */
   public static void main(String argv[]) throws Exception {
     if (argv.length < 1) {
-      System.err.println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
+      System.err
+          .println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
       return;
     }
-    
+
     //
     // Parse the command line, figure out what kind of
     // URL file we need to load
@@ -379,11 +377,11 @@
     String dmozFile = argv[0];
     boolean includeAdult = false;
     boolean snippet = false;
-    Pattern topicPattern = null; 
+    Pattern topicPattern = null;
     Vector<String> topics = new Vector<String>();
-    
+
     Configuration conf = NutchConfiguration.create();
-    store = StorageUtils.createWebStore(conf,String.class, WebPage.class);
+    store = StorageUtils.createWebStore(conf, String.class, WebPage.class);
     FileSystem fs = FileSystem.get(conf);
     try {
       for (int i = 1; i < argv.length; i++) {
@@ -390,18 +388,18 @@
         if ("-includeAdultMaterial".equals(argv[i])) {
           includeAdult = true;
         } else if ("-subset".equals(argv[i])) {
-          subsetDenom = Integer.parseInt(argv[i+1]);
+          subsetDenom = Integer.parseInt(argv[i + 1]);
           i++;
         } else if ("-topic".equals(argv[i])) {
-          topics.addElement(argv[i+1]); 
+          topics.addElement(argv[i + 1]);
           i++;
         } else if ("-topicFile".equals(argv[i])) {
-          addTopicsFromFile(argv[i+1], topics);
+          addTopicsFromFile(argv[i + 1], topics);
           i++;
         } else if ("-skew".equals(argv[i])) {
-          skew = Integer.parseInt(argv[i+1]);
+          skew = Integer.parseInt(argv[i + 1]);
           i++;
-        }else if ("-snippet".equals(argv[i])) {
+        } else if ("-snippet".equals(argv[i])) {
           snippet = true;
         }
       }
@@ -409,21 +407,21 @@
       DmozParser parser = new DmozParser();
 
       if (!topics.isEmpty()) {
-        String regExp = new String("^("); 
+        String regExp = new String("^(");
         int j = 0;
-        for ( ; j < topics.size() - 1; ++j) {
+        for (; j < topics.size() - 1; ++j) {
           regExp = regExp.concat(topics.get(j));
           regExp = regExp.concat("|");
         }
         regExp = regExp.concat(topics.get(j));
-        regExp = regExp.concat(").*"); 
+        regExp = regExp.concat(").*");
         LOG.info("Topic selection pattern = " + regExp);
-        topicPattern = Pattern.compile(regExp); 
+        topicPattern = Pattern.compile(regExp);
       }
 
-      parser.parseDmozFile(new File(dmozFile), subsetDenom,
-                           includeAdult, skew, topicPattern, snippet);
-      
+      parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew,
+          topicPattern, snippet);
+
     } finally {
       fs.close();
     }
Index: src/java/org/apache/nutch/tools/ResolveUrls.java
===================================================================
--- src/java/org/apache/nutch/tools/ResolveUrls.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/ResolveUrls.java	(working copy)
@@ -59,8 +59,7 @@
   /**
    * A Thread which gets the ip address of a single host by name.
    */
-  private static class ResolverThread
-    extends Thread {
+  private static class ResolverThread extends Thread {
 
     private String url = null;
 
@@ -74,13 +73,12 @@
       String host = URLUtil.getHost(url);
       long start = System.currentTimeMillis();
       try {
-        // get the address by name and if no error is thrown then it 
+        // get the address by name and if no error is thrown then it
         // is resolved successfully
         InetAddress.getByName(host);
         LOG.info("Resolved: " + host);
         numResolved.incrementAndGet();
-      }
-      catch (Exception uhe) {
+      } catch (Exception uhe) {
         LOG.info("Error Resolving: " + host);
         numErrored.incrementAndGet();
       }
@@ -92,8 +90,8 @@
   }
 
   /**
-   * Creates a thread pool for resolving urls.  Reads in the url file on the
-   * local filesystem.  For each url it attempts to resolve it keeping a total
+   * Creates a thread pool for resolving urls. Reads in the url file on the
+   * local filesystem. For each url it attempts to resolve it keeping a total
    * account of the number resolved, errored, and the amount of time.
    */
   public void resolveUrls() {
@@ -102,13 +100,13 @@
 
       // create a thread pool with a fixed number of threads
       pool = Executors.newFixedThreadPool(numThreads);
-      
+
       // read in the urls file and loop through each line, one url per line
       BufferedReader buffRead = new BufferedReader(new FileReader(new File(
-        urlsFile)));
+          urlsFile)));
       String urlStr = null;
       while ((urlStr = buffRead.readLine()) != null) {
-        
+
         // spin up a resolver thread per url
         LOG.info("Starting: " + urlStr);
         pool.execute(new ResolverThread(urlStr));
@@ -118,9 +116,8 @@
       // the thread pool to give urls time to finish resolving
       buffRead.close();
       pool.awaitTermination(60, TimeUnit.SECONDS);
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // on error shutdown the thread pool immediately
       pool.shutdownNow();
       LOG.info(StringUtils.stringifyException(e));
@@ -128,15 +125,16 @@
 
     // shutdown the thread pool and log totals
     pool.shutdown();
-    LOG.info("Total: " + numTotal.get() + ", Resovled: "
-      + numResolved.get() + ", Errored: " + numErrored.get()
-      + ", Average Time: " + totalTime.get() / numTotal.get());
+    LOG.info("Total: " + numTotal.get() + ", Resovled: " + numResolved.get()
+        + ", Errored: " + numErrored.get() + ", Average Time: "
+        + totalTime.get() / numTotal.get());
   }
 
   /**
    * Create a new ResolveUrls with a file from the local file system.
-   *
-   * @param urlsFile The local urls file, one url per line.
+   * 
+   * @param urlsFile
+   *          The local urls file, one url per line.
    */
   public ResolveUrls(String urlsFile) {
     this(urlsFile, 100);
@@ -144,10 +142,12 @@
 
   /**
    * Create a new ResolveUrls with a urls file and a number of threads for the
-   * Thread pool.  Number of threads is 100 by default.
+   * Thread pool. Number of threads is 100 by default.
    * 
-   * @param urlsFile The local urls file, one url per line.
-   * @param numThreads The number of threads used to resolve urls in parallel.
+   * @param urlsFile
+   *          The local urls file, one url per line.
+   * @param numThreads
+   *          The number of threads used to resolve urls in parallel.
    */
   public ResolveUrls(String urlsFile, int numThreads) {
     this.urlsFile = urlsFile;
@@ -163,17 +163,17 @@
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
-    
+
     OptionBuilder.withArgName("urls");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the urls file to check");
     Option urlOpts = OptionBuilder.create("urls");
-    
+
     OptionBuilder.withArgName("numThreads");
     OptionBuilder.hasArgs();
     OptionBuilder.withDescription("the number of threads to use");
     Option numThreadOpts = OptionBuilder.create("numThreads");
-    
+
     options.addOption(helpOpts);
     options.addOption(urlOpts);
     options.addOption(numThreadOpts);
@@ -198,8 +198,7 @@
       }
       ResolveUrls resolve = new ResolveUrls(urls, numThreads);
       resolve.resolveUrls();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
     }
   }
Index: src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(working copy)
@@ -30,21 +30,22 @@
 /**
  * A input format the reads arc files.
  */
-public class ArcInputFormat
-  extends FileInputFormat<Text, BytesWritable> {
+public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> {
 
   /**
    * Returns the <code>RecordReader</code> for reading the arc file.
    * 
-   * @param split The InputSplit of the arc file to process.
-   * @param job The job configuration.
-   * @param reporter The progress reporter.
+   * @param split
+   *          The InputSplit of the arc file to process.
+   * @param job
+   *          The job configuration.
+   * @param reporter
+   *          The progress reporter.
    */
   public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
-      JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
     reporter.setStatus(split.toString());
-    return new ArcRecordReader(job, (FileSplit)split);
+    return new ArcRecordReader(job, (FileSplit) split);
   }
 
 }
Index: src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(working copy)
@@ -34,23 +34,29 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * <p>The <code>ArchRecordReader</code> class provides a record reader which 
- * reads records from arc files.</p>
+ * <p>
+ * The <code>ArchRecordReader</code> class provides a record reader which reads
+ * records from arc files.
+ * </p>
  * 
- * <p>Arc files are essentially tars of gzips.  Each record in an arc file is
- * a compressed gzip.  Multiple records are concatenated together to form a
- * complete arc.  For more information on the arc file format see
- * {@link http://www.archive.org/web/researcher/ArcFileFormat.php}.</p>
+ * <p>
+ * Arc files are essentially tars of gzips. Each record in an arc file is a
+ * compressed gzip. Multiple records are concatenated together to form a
+ * complete arc. For more information on the arc file format see {@link http
+ * ://www.archive.org/web/researcher/ArcFileFormat.php}.
+ * </p>
  * 
- * <p>Arc files are used by the internet archive and grub projects.</p>
+ * <p>
+ * Arc files are used by the internet archive and grub projects.
+ * </p>
  * 
  * @see http://www.archive.org/
  * @see http://www.grub.org/
  */
-public class ArcRecordReader
-  implements RecordReader<Text, BytesWritable> {
+public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcRecordReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcRecordReader.class);
 
   protected Configuration conf;
   protected long splitStart = 0;
@@ -60,23 +66,25 @@
   protected long fileLen = 0;
   protected FSDataInputStream in;
 
-  private static byte[] MAGIC = {(byte)0x1F, (byte)0x8B};
+  private static byte[] MAGIC = { (byte) 0x1F, (byte) 0x8B };
 
   /**
-   * <p>Returns true if the byte array passed matches the gzip header magic 
-   * number.</p>
+   * <p>
+   * Returns true if the byte array passed matches the gzip header magic number.
+   * </p>
    * 
-   * @param input The byte array to check.
+   * @param input
+   *          The byte array to check.
    * 
    * @return True if the byte array matches the gzip header magic number.
    */
   public static boolean isMagic(byte[] input) {
 
-	// check for null and incorrect length
+    // check for null and incorrect length
     if (input == null || input.length != MAGIC.length) {
       return false;
     }
-    
+
     // check byte by byte
     for (int i = 0; i < MAGIC.length; i++) {
       if (MAGIC[i] != input[i]) {
@@ -83,7 +91,7 @@
         return false;
       }
     }
-    
+
     // must match
     return true;
   }
@@ -91,13 +99,16 @@
   /**
    * Constructor that sets the configuration and file split.
    * 
-   * @param conf The job configuration.
-   * @param split The file split to read from.
+   * @param conf
+   *          The job configuration.
+   * @param split
+   *          The file split to read from.
    * 
-   * @throws IOException  If an IO error occurs while initializing file split.
+   * @throws IOException
+   *           If an IO error occurs while initializing file split.
    */
   public ArcRecordReader(Configuration conf, FileSplit split)
-    throws IOException {
+      throws IOException {
 
     Path path = split.getPath();
     FileSystem fs = path.getFileSystem(conf);
@@ -113,8 +124,7 @@
   /**
    * Closes the record reader resources.
    */
-  public void close()
-    throws IOException {
+  public void close() throws IOException {
     this.in.close();
   }
 
@@ -137,63 +147,64 @@
    * 
    * @return The long of the current position in the file.
    */
-  public long getPos()
-    throws IOException {
+  public long getPos() throws IOException {
     return in.getPos();
   }
 
   /**
-   * Returns the percentage of progress in processing the file.  This will be
+   * Returns the percentage of progress in processing the file. This will be
    * represented as a float from 0 to 1 with 1 being 100% completed.
    * 
    * @return The percentage of progress as a float from 0 to 1.
    */
-  public float getProgress()
-    throws IOException {
-	  
+  public float getProgress() throws IOException {
+
     // if we haven't even started
     if (splitEnd == splitStart) {
       return 0.0f;
+    } else {
+      // the progress is current pos - where we started / length of the split
+      return Math.min(1.0f, (getPos() - splitStart) / (float) splitLen);
     }
-    else {
-      // the progress is current pos - where we started  / length of the split
-      return Math.min(1.0f, (getPos() - splitStart) / (float)splitLen);
-    }
   }
 
   /**
-   * <p>Returns true if the next record in the split is read into the key and 
-   * value pair.  The key will be the arc record header and the values will be
-   * the raw content bytes of the arc record.</p>
+   * <p>
+   * Returns true if the next record in the split is read into the key and value
+   * pair. The key will be the arc record header and the values will be the raw
+   * content bytes of the arc record.
+   * </p>
    * 
-   * @param key The record key
-   * @param value The record value
+   * @param key
+   *          The record key
+   * @param value
+   *          The record value
    * 
    * @return True if the next record is read.
    * 
-   * @throws IOException If an error occurs while reading the record value.
+   * @throws IOException
+   *           If an error occurs while reading the record value.
    */
-  public boolean next(Text key, BytesWritable value)
-    throws IOException {
+  public boolean next(Text key, BytesWritable value) throws IOException {
 
     try {
-      
+
       // get the starting position on the input stream
       long startRead = in.getPos();
       byte[] magicBuffer = null;
-      
+
       // we need this loop to handle false positives in reading of gzip records
       while (true) {
-        
+
         // while we haven't passed the end of the split
         if (startRead >= splitEnd) {
           return false;
         }
-        
+
         // scanning for the gzip header
         boolean foundStart = false;
         while (!foundStart) {
-          
+
           // start at the current file position and scan for 1K at time, break
           // if there is no more to read
           startRead = in.getPos();
@@ -202,13 +213,13 @@
           if (read < 0) {
             break;
           }
-          
-          // scan the byte array for the gzip header magic number.  This happens
+
+          // scan the byte array for the gzip header magic number. This happens
           // byte by byte
           for (int i = 0; i < read - 1; i++) {
             byte[] testMagic = new byte[2];
-            System.arraycopy(magicBuffer, i, testMagic, 0, 2);            
-            if (isMagic(testMagic)) {              
+            System.arraycopy(magicBuffer, i, testMagic, 0, 2);
+            if (isMagic(testMagic)) {
               // set the next start to the current gzip header
               startRead += i;
               foundStart = true;
@@ -216,7 +227,7 @@
             }
           }
         }
-        
+
         // seek to the start of the gzip header
         in.seek(startRead);
         ByteArrayOutputStream baos = null;
@@ -223,7 +234,7 @@
         int totalRead = 0;
 
         try {
-          
+
           // read 4K of the gzip at a time putting into a byte array
           byte[] buffer = new byte[4096];
           GZIPInputStream zin = new GZIPInputStream(in);
@@ -233,9 +244,8 @@
             baos.write(buffer, 0, gzipRead);
             totalRead += gzipRead;
           }
-        }
-        catch (Exception e) {
-          
+        } catch (Exception e) {
+
           // there are times we get false positives where the gzip header exists
           // but it is not an actual gzip record, so we ignore it and start
           // over seeking
@@ -248,7 +258,7 @@
 
         // change the output stream to a byte array
         byte[] content = baos.toByteArray();
-        
+
         // the first line of the raw content in arc files is the header
         int eol = 0;
         for (int i = 0; i < content.length; i++) {
@@ -257,12 +267,12 @@
             break;
           }
         }
-        
+
         // create the header and the raw content minus the header
         String header = new String(content, 0, eol).trim();
         byte[] raw = new byte[(content.length - eol) - 1];
         System.arraycopy(content, eol + 1, raw, 0, raw.length);
-        
+
         // populate key and values with the header and raw content.
         Text keyText = key;
         keyText.set(header);
@@ -269,22 +279,21 @@
         BytesWritable valueBytes = value;
         valueBytes.set(raw, 0, raw.length);
 
-        // TODO: It would be best to start at the end of the gzip read but 
-        // the bytes read in gzip don't match raw bytes in the file so we 
-        // overshoot the next header.  With this current method you get
+        // TODO: It would be best to start at the end of the gzip read but
+        // the bytes read in gzip don't match raw bytes in the file so we
+        // overshoot the next header. With this current method you get
         // some false positives but don't miss records.
         if (startRead + 1 < fileLen) {
           in.seek(startRead + 1);
         }
-        
+
         // populated the record, now return
         return true;
       }
+    } catch (Exception e) {
+      LOG.equals(StringUtils.stringifyException(e));
     }
-    catch (Exception e) {
-      LOG.equals(StringUtils.stringifyException(e));      
-    }
-    
+
     // couldn't populate the record or there is no next record to read
     return false;
   }
Index: src/java/org/apache/nutch/tools/arc/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/arc/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * <a href="http://archive.org/web/researcher/ArcFileFormat.php">Arc file format</a>.
  */
 package org.apache.nutch.tools.arc;
+
Index: src/java/org/apache/nutch/tools/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Miscellaneous tools.
  */
 package org.apache.nutch.tools;
+
Index: src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -46,16 +47,17 @@
 
   @Override
   public void handle(String target, HttpServletRequest req,
-          HttpServletResponse res, int dispatch) throws IOException,
-          ServletException {
-    Request base_request = (req instanceof Request) ? (Request)req : HttpConnection.getCurrentConnection().getRequest();
+      HttpServletResponse res, int dispatch) throws IOException,
+      ServletException {
+    Request base_request = (req instanceof Request) ? (Request) req
+        : HttpConnection.getCurrentConnection().getRequest();
     res.addHeader("X-TestbedHandlers", this.getClass().getSimpleName());
     handle(base_request, res, target, dispatch);
   }
-  
-  public abstract void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException;
-  
+
+  public abstract void handle(Request req, HttpServletResponse res,
+      String target, int dispatch) throws IOException, ServletException;
+
   public void addMyHeader(HttpServletResponse res, String name, String value) {
     name = "X-" + this.getClass().getSimpleName() + "-" + name;
     res.addHeader(name, value);
Index: src/java/org/apache/nutch/tools/proxy/DelayHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -41,13 +42,13 @@
 import org.mortbay.jetty.Request;
 
 public class DelayHandler extends AbstractTestbedHandler {
-  
+
   public static final long DEFAULT_DELAY = 2000;
-  
+
   private int delay;
   private boolean random;
   private Random r;
-  
+
   public DelayHandler(int delay) {
     if (delay < 0) {
       delay = -delay;
@@ -59,13 +60,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       int del = random ? r.nextInt(delay) : delay;
       Thread.sleep(del);
       addMyHeader(res, "Delay", String.valueOf(del));
     } catch (Exception e) {
-      
+
     }
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/FakeHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -44,9 +45,14 @@
 import org.mortbay.jetty.Request;
 
 public class FakeHandler extends AbstractTestbedHandler {
-  /** Create links to hosts generated from a pool of numHosts/numPages random names. */
-  public static enum Mode {UNIQUE, RANDOM};
-    
+  /**
+   * Create links to hosts generated from a pool of numHosts/numPages random
+   * names.
+   */
+  public static enum Mode {
+    UNIQUE, RANDOM
+  };
+
   int numInternalLinks;
   int numExternalLinks;
   Mode hostMode;
@@ -55,34 +61,36 @@
   AtomicLong pageSeq = new AtomicLong(0);
   int numHosts;
   int numPages;
-  
+
   Random r = new Random(1234567890L); // predictable
   Random pageR;
 
-  private static final String testA = 
-    "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n" + 
-    "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>" +
-    "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>" +
-    "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
-  private static final String testB =
-    "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>" +
-    "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>" +
-    "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
+  private static final String testA = "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n"
+      + "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>"
+      + "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>"
+      + "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
+  private static final String testB = "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>"
+      + "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>"
+      + "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
 
   /**
    * Create fake pages.
-   * @param hostMode if UNIQUE then each external outlink will use a unique host name. If
-   * RANDOM then each outlink will use a host name allocated from pool of numHosts.
-   * @param pageMode if UNIQUE then each internal outlinks will use a unique page name.
-   * if RANDOM then each outlink will use a page name allocated from pool of numPages.
+   * 
+   * @param hostMode
+   *          if UNIQUE then each external outlink will use a unique host name.
+   *          If RANDOM then each outlink will use a host name allocated from
+   *          pool of numHosts.
+   * @param pageMode
+   *          if UNIQUE then each internal outlinks will use a unique page name.
+   *          if RANDOM then each outlink will use a page name allocated from
+   *          pool of numPages.
    * @param numInternalLinks
    * @param numExternalLinks
    * @param numHosts
    * @param numPages
    */
-  public FakeHandler(Mode hostMode, Mode pageMode,
-      int numInternalLinks, int numExternalLinks,
-      int numHosts, int numPages) {
+  public FakeHandler(Mode hostMode, Mode pageMode, int numInternalLinks,
+      int numExternalLinks, int numHosts, int numPages) {
     this.numExternalLinks = numExternalLinks;
     this.numInternalLinks = numInternalLinks;
     this.numHosts = numHosts;
@@ -90,10 +98,10 @@
     this.hostMode = hostMode;
     this.pageMode = pageMode;
   }
-  
+
   @Override
-  public void handle(Request req, HttpServletResponse res, String target, 
-          int dispatch) throws IOException, ServletException {
+  public void handle(Request req, HttpServletResponse res, String target,
+      int dispatch) throws IOException, ServletException {
     HttpURI u = req.getUri();
     String uri = u.toString();
     addMyHeader(res, "URI", uri);
@@ -126,7 +134,7 @@
       for (int i = 0; i < numInternalLinks; i++) {
         String link = "<p><a href='";
         if (pageMode.equals(Mode.RANDOM)) {
-          link += pageR.nextInt (numPages) + ".html'>";
+          link += pageR.nextInt(numPages) + ".html'>";
         } else {
           if (!basePath.endsWith("/")) {
             link += "/";
@@ -157,13 +165,14 @@
       }
       // fake a link to the root URL
       link = "<p><a href='" + u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) link += ":" + u.getPort();
+      if (u.getPort() != 80 && u.getPort() != -1)
+        link += ":" + u.getPort();
       link += "/'>site " + u.getHost() + "</a></p>\r\n";
       os.write(link.getBytes());
       os.write(testB.getBytes());
       res.flushBuffer();
     } catch (IOException ioe) {
-    }    
+    }
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -47,23 +48,27 @@
 import org.mortbay.jetty.Request;
 
 public class LogDebugHandler extends AbstractTestbedHandler implements Filter {
-  private static final Logger LOG = LoggerFactory.getLogger(LogDebugHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(LogDebugHandler.class);
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
-    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n" + req.getConnection().getRequestFields());
+      int dispatch) throws IOException, ServletException {
+    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n"
+        + req.getConnection().getRequestFields());
   }
 
   @Override
   public void doFilter(ServletRequest req, ServletResponse res,
-          FilterChain chain) throws IOException, ServletException {
-    ((HttpServletResponse)res).addHeader("X-Handled-By", "AsyncProxyHandler");
-    ((HttpServletResponse)res).addHeader("X-TestbedHandlers", "AsyncProxyHandler");
+      FilterChain chain) throws IOException, ServletException {
+    ((HttpServletResponse) res).addHeader("X-Handled-By", "AsyncProxyHandler");
+    ((HttpServletResponse) res).addHeader("X-TestbedHandlers",
+        "AsyncProxyHandler");
     try {
       chain.doFilter(req, res);
     } catch (Throwable e) {
-      ((HttpServletResponse)res).sendError(HttpServletResponse.SC_BAD_REQUEST, e.toString());
+      ((HttpServletResponse) res).sendError(HttpServletResponse.SC_BAD_REQUEST,
+          e.toString());
     }
   }
 
@@ -70,6 +75,6 @@
   @Override
   public void init(FilterConfig arg0) throws ServletException {
     // TODO Auto-generated method stub
-    
+
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -43,13 +44,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     // don't pass it down the chain
     req.setHandled(true);
     res.addHeader("X-Handled-By", getClass().getSimpleName());
     addMyHeader(res, "URI", req.getUri().toString());
-    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: " +
-            req.getUri().toString());
+    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: "
+        + req.getUri().toString());
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -52,19 +53,32 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
-      System.err.println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
-      System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
-      System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
-      System.err.println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
-      System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
-      System.err.println("\nAdditional options for -fake handler (all optional):");
-      System.err.println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
-      System.err.println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
-      System.err.println("\t-numHosts N\ttotal number of hosts when using hostMode r");
-      System.err.println("\t-numPages N\ttotal number of pages per host when using pageMode r");
-      System.err.println("\t-intLinks N\tnumber of internal (same host) links per page");
-      System.err.println("\t-extLinks N\tnumber of external (other host) links per page");
+      System.err
+          .println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
+      System.err
+          .println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
+      System.err
+          .println("-forward\tif specified, requests to all unknown urls will be passed to");
+      System.err
+          .println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
+      System.err
+          .println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
+      System.err
+          .println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
+      System.err
+          .println("\nAdditional options for -fake handler (all optional):");
+      System.err
+          .println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
+      System.err
+          .println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
+      System.err
+          .println("\t-numHosts N\ttotal number of hosts when using hostMode r");
+      System.err
+          .println("\t-numPages N\ttotal number of pages per host when using pageMode r");
+      System.err
+          .println("\t-intLinks N\tnumber of internal (same host) links per page");
+      System.err
+          .println("\t-extLinks N\tnumber of external (other host) links per page");
       System.err.println("\nDefaults for -fake handler:");
       System.err.println("\t-hostMode r");
       System.err.println("\t-pageMode r");
@@ -74,7 +88,7 @@
       System.err.println("\t-extLinks 5");
       System.exit(-1);
     }
-    
+
     Configuration conf = NutchConfiguration.create();
     int port = conf.getInt("batch.proxy.port", 8181);
     boolean forward = false;
@@ -88,7 +102,7 @@
     int numPages = 10000;
     int intLinks = 10;
     int extLinks = 5;
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-port")) {
         port = Integer.parseInt(args[++i]);
@@ -122,7 +136,7 @@
         System.exit(-1);
       }
     }
-    
+
     // Create the server
     Server server = new Server();
     SocketConnector connector = new SocketConnector();
@@ -129,21 +143,23 @@
     connector.setPort(port);
     connector.setResolveNames(false);
     server.addConnector(connector);
-    
+
     // create a list of handlers
     HandlerList list = new HandlerList();
     server.addHandler(list);
-    
+
     if (debug) {
       LOG.info("* Added debug handler.");
       list.addHandler(new LogDebugHandler());
     }
- 
+
     if (delay) {
-      LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
+      LOG.info("* Added delay handler: "
+          + (delayVal < 0 ? "random delay up to " + (-delayVal)
+              : "constant delay of " + delayVal));
       list.addHandler(new DelayHandler(delayVal));
     }
-    
+
     // XXX alternatively, we can add the DispatchHandler as the first one,
     // XXX to activate handler plugins and redirect requests to appropriate
     // XXX handlers ... Here we always load these handlers
Index: src/java/org/apache/nutch/tools/proxy/package-info.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/tools/proxy/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Proxy to {@link org.apache.nutch.tools.Benchmark benchmark} the crawler.
  */
 package org.apache.nutch.tools.proxy;
+
Index: src/java/org/apache/nutch/util/Bytes.java
===================================================================
--- src/java/org/apache/nutch/util/Bytes.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/Bytes.java	(working copy)
@@ -42,1425 +42,1412 @@
  */
 public class Bytes {
 
-	private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
+  private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
 
-	/** When we encode strings, we always specify UTF8 encoding */
-	public static final String UTF8_ENCODING = "UTF-8";
+  /** When we encode strings, we always specify UTF8 encoding */
+  public static final String UTF8_ENCODING = "UTF-8";
 
-	/**
-	 * An empty instance.
-	 */
-	public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+  /**
+   * An empty instance.
+   */
+  public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
 
-	/**
-	 * Size of boolean in bytes
-	 */
-	public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
+  /**
+   * Size of boolean in bytes
+   */
+  public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of byte in bytes
-	 */
-	public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
+  /**
+   * Size of byte in bytes
+   */
+  public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
 
-	/**
-	 * Size of char in bytes
-	 */
-	public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
+  /**
+   * Size of char in bytes
+   */
+  public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of double in bytes
-	 */
-	public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
+  /**
+   * Size of double in bytes
+   */
+  public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of float in bytes
-	 */
-	public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
+  /**
+   * Size of float in bytes
+   */
+  public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of int in bytes
-	 */
-	public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
+  /**
+   * Size of int in bytes
+   */
+  public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of long in bytes
-	 */
-	public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
+  /**
+   * Size of long in bytes
+   */
+  public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of short in bytes
-	 */
-	public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
+  /**
+   * Size of short in bytes
+   */
+  public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
 
-	/**
-	 * Estimate of size cost to pay beyond payload in jvm for instance of byte
-	 * []. Estimate based on study of jhat and jprofiler numbers.
-	 */
-	// JHat says BU is 56 bytes.
-	// SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
-	public static final int ESTIMATED_HEAP_TAX = 16;
+  /**
+   * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
+   * Estimate based on study of jhat and jprofiler numbers.
+   */
+  // JHat says BU is 56 bytes.
+  // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
+  public static final int ESTIMATED_HEAP_TAX = 16;
 
-	/**
-	 * Byte array comparator class.
-	 */
-	public static class ByteArrayComparator implements RawComparator<byte[]> {
-		/**
-		 * Constructor
-		 */
-		public ByteArrayComparator() {
-			super();
-		}
+  /**
+   * Byte array comparator class.
+   */
+  public static class ByteArrayComparator implements RawComparator<byte[]> {
+    /**
+     * Constructor
+     */
+    public ByteArrayComparator() {
+      super();
+    }
 
-		public int compare(byte[] left, byte[] right) {
-			return compareTo(left, right);
-		}
+    public int compare(byte[] left, byte[] right) {
+      return compareTo(left, right);
+    }
 
-		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-			return compareTo(b1, s1, l1, b2, s2, l2);
-		}
-	}
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return compareTo(b1, s1, l1, b2, s2, l2);
+    }
+  }
 
-	/**
-	 * Pass this to TreeMaps where byte [] are keys.
-	 */
-	public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
+  /**
+   * Pass this to TreeMaps where byte [] are keys.
+   */
+  public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Use comparing byte arrays, byte-by-byte
-	 */
-	public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
+  /**
+   * Use comparing byte arrays, byte-by-byte
+   */
+  public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 * @throws IOException
-	 *             e
-	 */
-	public static byte[] readByteArray(final DataInput in) throws IOException {
-		int len = WritableUtils.readVInt(in);
-		if (len < 0) {
-			throw new NegativeArraySizeException(Integer.toString(len));
-		}
-		byte[] result = new byte[len];
-		in.readFully(result, 0, len);
-		return result;
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   * @throws IOException
+   *           e
+   */
+  public static byte[] readByteArray(final DataInput in) throws IOException {
+    int len = WritableUtils.readVInt(in);
+    if (len < 0) {
+      throw new NegativeArraySizeException(Integer.toString(len));
+    }
+    byte[] result = new byte[len];
+    in.readFully(result, 0, len);
+    return result;
+  }
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix. IOException
-	 * is converted to a RuntimeException.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 */
-	public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
-		try {
-			return readByteArray(in);
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix. IOException
+   * is converted to a RuntimeException.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   */
+  public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
+    try {
+      return readByteArray(in);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
 
-	/**
-	 * Write byte-array with a WritableableUtils.vint prefix.
-	 * 
-	 * @param out
-	 *            output stream to be written to
-	 * @param b
-	 *            array to write
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b)
-			throws IOException {
-		if (b == null) {
-			WritableUtils.writeVInt(out, 0);
-		} else {
-			writeByteArray(out, b, 0, b.length);
-		}
-	}
+  /**
+   * Write byte-array with a WritableableUtils.vint prefix.
+   * 
+   * @param out
+   *          output stream to be written to
+   * @param b
+   *          array to write
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b)
+      throws IOException {
+    if (b == null) {
+      WritableUtils.writeVInt(out, 0);
+    } else {
+      writeByteArray(out, b, 0, b.length);
+    }
+  }
 
-	/**
-	 * Write byte-array to out with a vint length prefix.
-	 * 
-	 * @param out
-	 *            output stream
-	 * @param b
-	 *            array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length past offset
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b,
-			final int offset, final int length) throws IOException {
-		WritableUtils.writeVInt(out, length);
-		out.write(b, offset, length);
-	}
+  /**
+   * Write byte-array to out with a vint length prefix.
+   * 
+   * @param out
+   *          output stream
+   * @param b
+   *          array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length past offset
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b,
+      final int offset, final int length) throws IOException {
+    WritableUtils.writeVInt(out, length);
+    out.write(b, offset, length);
+  }
 
-	/**
-	 * Write byte-array from src to tgt with a vint length prefix.
-	 * 
-	 * @param tgt
-	 *            target array
-	 * @param tgtOffset
-	 *            offset into target array
-	 * @param src
-	 *            source array
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return New offset in src array.
-	 */
-	public static int writeByteArray(final byte[] tgt, final int tgtOffset,
-			final byte[] src, final int srcOffset, final int srcLength) {
-		byte[] vint = vintToBytes(srcLength);
-		System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
-		int offset = tgtOffset + vint.length;
-		System.arraycopy(src, srcOffset, tgt, offset, srcLength);
-		return offset + srcLength;
-	}
+  /**
+   * Write byte-array from src to tgt with a vint length prefix.
+   * 
+   * @param tgt
+   *          target array
+   * @param tgtOffset
+   *          offset into target array
+   * @param src
+   *          source array
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return New offset in src array.
+   */
+  public static int writeByteArray(final byte[] tgt, final int tgtOffset,
+      final byte[] src, final int srcOffset, final int srcLength) {
+    byte[] vint = vintToBytes(srcLength);
+    System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
+    int offset = tgtOffset + vint.length;
+    System.arraycopy(src, srcOffset, tgt, offset, srcLength);
+    return offset + srcLength;
+  }
 
-	/**
-	 * Put bytes at the specified byte array position.
-	 * 
-	 * @param tgtBytes
-	 *            the byte array
-	 * @param tgtOffset
-	 *            position in the array
-	 * @param srcBytes
-	 *            array to write out
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return incremented offset
-	 */
-	public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
-			int srcOffset, int srcLength) {
-		System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
-		return tgtOffset + srcLength;
-	}
+  /**
+   * Put bytes at the specified byte array position.
+   * 
+   * @param tgtBytes
+   *          the byte array
+   * @param tgtOffset
+   *          position in the array
+   * @param srcBytes
+   *          array to write out
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return incremented offset
+   */
+  public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
+      int srcOffset, int srcLength) {
+    System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
+    return tgtOffset + srcLength;
+  }
 
-	/**
-	 * Write a single byte out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param b
-	 *            byte to write out
-	 * @return incremented offset
-	 */
-	public static int putByte(byte[] bytes, int offset, byte b) {
-		bytes[offset] = b;
-		return offset + 1;
-	}
+  /**
+   * Write a single byte out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param b
+   *          byte to write out
+   * @return incremented offset
+   */
+  public static int putByte(byte[] bytes, int offset, byte b) {
+    bytes[offset] = b;
+    return offset + 1;
+  }
 
-	/**
-	 * Returns a new byte array, copied from the passed ByteBuffer.
-	 * 
-	 * @param bb
-	 *            A ByteBuffer
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(ByteBuffer bb) {
-		int length = bb.remaining();
-		byte[] result = new byte[length];
-		System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0, length);
-		return result;
-	}
+  /**
+   * Returns a new byte array, copied from the passed ByteBuffer.
+   * 
+   * @param bb
+   *          A ByteBuffer
+   * @return the byte array
+   */
+  public static byte[] toBytes(ByteBuffer bb) {
+    int length = bb.remaining();
+    byte[] result = new byte[length];
+    System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0,
+        length);
+    return result;
+  }
 
-    /**
-     * This method will convert utf8 encoded bytes into a string. If an
-     * UnsupportedEncodingException occurs, this method will eat it and return
-     * null instead.
-     *
-     * @param bb
-     *            Presumed UTF-8 encoded ByteBuffer.
-     * @return String made from <code>b</code> or null
-     */
-    public static String toString(ByteBuffer bb) {
-        return bb == null
-               ? null
-               : toString(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+  /**
+   * This method will convert utf8 encoded bytes into a string. If an
+   * UnsupportedEncodingException occurs, this method will eat it and return
+   * null instead.
+   * 
+   * @param bb
+   *          Presumed UTF-8 encoded ByteBuffer.
+   * @return String made from <code>b</code> or null
+   */
+  public static String toString(ByteBuffer bb) {
+    return bb == null ? null : toString(bb.array(),
+        bb.arrayOffset() + bb.position(), bb.remaining());
+  }
+
+  /**
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @return String made from <code>b</code>
+   */
+  public static String toString(final byte[] b) {
+    if (b == null) {
+      return null;
     }
+    return toString(b, 0, b.length);
+  }
 
-	/**
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @return String made from <code>b</code>
-	 */
-	public static String toString(final byte[] b) {
-		if (b == null) {
-			return null;
-		}
-		return toString(b, 0, b.length);
-	}
+  /**
+   * Joins two byte arrays together using a separator.
+   * 
+   * @param b1
+   *          The first byte array.
+   * @param sep
+   *          The separator to use.
+   * @param b2
+   *          The second byte array.
+   */
+  public static String toString(final byte[] b1, String sep, final byte[] b2) {
+    return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
+  }
 
-	/**
-	 * Joins two byte arrays together using a separator.
-	 * 
-	 * @param b1
-	 *            The first byte array.
-	 * @param sep
-	 *            The separator to use.
-	 * @param b2
-	 *            The second byte array.
-	 */
-	public static String toString(final byte[] b1, String sep, final byte[] b2) {
-		return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
-	}
+  /**
+   * This method will convert utf8 encoded bytes into a string. If an
+   * UnsupportedEncodingException occurs, this method will eat it and return
+   * null instead.
+   * 
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @param off
+   *          offset into array
+   * @param len
+   *          length of utf-8 sequence
+   * @return String made from <code>b</code> or null
+   */
+  public static String toString(final byte[] b, int off, int len) {
+    if (b == null) {
+      return null;
+    }
+    if (len == 0) {
+      return "";
+    }
+    try {
+      return new String(b, off, len, UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-	/**
-	 * This method will convert utf8 encoded bytes into a string. If an
-	 * UnsupportedEncodingException occurs, this method will eat it and return
-	 * null instead.
-	 * 
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @param off
-	 *            offset into array
-	 * @param len
-	 *            length of utf-8 sequence
-	 * @return String made from <code>b</code> or null
-	 */
-	public static String toString(final byte[] b, int off, int len) {
-		if (b == null) {
-			return null;
-		}
-		if (len == 0) {
-			return "";
-		}
-		try {
-			return new String(b, off, len, UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * Write a printable representation of a ByteBuffer. Non-printable characters
+   * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+   * 
+   * @param bb
+   *          ByteBuffer to write out
+   * @return string output
+   */
+  public static String toStringBinary(ByteBuffer bb) {
+    return bb == null ? null : toStringBinary(bb.array(),
+        bb.arrayOffset() + bb.position(), bb.remaining());
+  }
 
-    /**
-     * Write a printable representation of a ByteBuffer. Non-printable
-     * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
-     *
-     * @param bb
-     *            ByteBuffer to write out
-     * @return string output
-     */
-    public static String toStringBinary(ByteBuffer bb) {
-        return bb == null
-               ? null
-               : toStringBinary(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+  /**
+   * Write a printable representation of a byte array.
+   * 
+   * @param b
+   *          byte array
+   * @return string
+   * @see #toStringBinary(byte[], int, int)
+   */
+  public static String toStringBinary(final byte[] b) {
+    return toStringBinary(b, 0, b.length);
+  }
+
+  /**
+   * Write a printable representation of a byte array. Non-printable characters
+   * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+   * 
+   * @param b
+   *          array to write out
+   * @param off
+   *          offset to start at
+   * @param len
+   *          length to write
+   * @return string output
+   */
+  public static String toStringBinary(final byte[] b, int off, int len) {
+    StringBuilder result = new StringBuilder();
+    try {
+      String first = new String(b, off, len, "ISO-8859-1");
+      for (int i = 0; i < first.length(); ++i) {
+        int ch = first.charAt(i) & 0xFF;
+        if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
+            || (ch >= 'a' && ch <= 'z')
+            || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
+          result.append(first.charAt(i));
+        } else {
+          result.append(String.format("\\x%02X", ch));
+        }
+      }
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("ISO-8859-1 not supported?", e);
     }
+    return result.toString();
+  }
 
-	/**
-	 * Write a printable representation of a byte array.
-	 * 
-	 * @param b
-	 *            byte array
-	 * @return string
-	 * @see #toStringBinary(byte[], int, int)
-	 */
-	public static String toStringBinary(final byte[] b) {
-		return toStringBinary(b, 0, b.length);
-	}
+  private static boolean isHexDigit(char c) {
+    return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
+  }
 
-	/**
-	 * Write a printable representation of a byte array. Non-printable
-	 * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
-	 * 
-	 * @param b
-	 *            array to write out
-	 * @param off
-	 *            offset to start at
-	 * @param len
-	 *            length to write
-	 * @return string output
-	 */
-	public static String toStringBinary(final byte[] b, int off, int len) {
-		StringBuilder result = new StringBuilder();
-		try {
-			String first = new String(b, off, len, "ISO-8859-1");
-			for (int i = 0; i < first.length(); ++i) {
-				int ch = first.charAt(i) & 0xFF;
-				if ((ch >= '0' && ch <= '9')
-						|| (ch >= 'A' && ch <= 'Z')
-						|| (ch >= 'a' && ch <= 'z')
-						|| " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
-					result.append(first.charAt(i));
-				} else {
-					result.append(String.format("\\x%02X", ch));
-				}
-			}
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("ISO-8859-1 not supported?", e);
-		}
-		return result.toString();
-	}
+  /**
+   * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
+   * integer/ordinal value.
+   * 
+   * @param ch
+   *          The hex digit.
+   * @return The converted hex value as a byte.
+   */
+  public static byte toBinaryFromHex(byte ch) {
+    if (ch >= 'A' && ch <= 'F')
+      return (byte) ((byte) 10 + (byte) (ch - 'A'));
+    // else
+    return (byte) (ch - '0');
+  }
 
-	private static boolean isHexDigit(char c) {
-		return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
-	}
+  public static byte[] toBytesBinary(String in) {
+    // this may be bigger than we need, but lets be safe.
+    byte[] b = new byte[in.length()];
+    int size = 0;
+    for (int i = 0; i < in.length(); ++i) {
+      char ch = in.charAt(i);
+      if (ch == '\\') {
+        // begin hex escape:
+        char next = in.charAt(i + 1);
+        if (next != 'x') {
+          // invalid escape sequence, ignore this one.
+          b[size++] = (byte) ch;
+          continue;
+        }
+        // ok, take next 2 hex digits.
+        char hd1 = in.charAt(i + 2);
+        char hd2 = in.charAt(i + 3);
 
-	/**
-	 * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
-	 * integer/ordinal value.
-	 * 
-	 * @param ch
-	 *            The hex digit.
-	 * @return The converted hex value as a byte.
-	 */
-	public static byte toBinaryFromHex(byte ch) {
-		if (ch >= 'A' && ch <= 'F')
-			return (byte) ((byte) 10 + (byte) (ch - 'A'));
-		// else
-		return (byte) (ch - '0');
-	}
+        // they need to be A-F0-9:
+        if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
+          // bogus escape code, ignore:
+          continue;
+        }
+        // turn hex ASCII digit -> number
+        byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
 
-	public static byte[] toBytesBinary(String in) {
-		// this may be bigger than we need, but lets be safe.
-		byte[] b = new byte[in.length()];
-		int size = 0;
-		for (int i = 0; i < in.length(); ++i) {
-			char ch = in.charAt(i);
-			if (ch == '\\') {
-				// begin hex escape:
-				char next = in.charAt(i + 1);
-				if (next != 'x') {
-					// invalid escape sequence, ignore this one.
-					b[size++] = (byte) ch;
-					continue;
-				}
-				// ok, take next 2 hex digits.
-				char hd1 = in.charAt(i + 2);
-				char hd2 = in.charAt(i + 3);
+        b[size++] = d;
+        i += 3; // skip 3
+      } else {
+        b[size++] = (byte) ch;
+      }
+    }
+    // resize:
+    byte[] b2 = new byte[size];
+    System.arraycopy(b, 0, b2, 0, size);
+    return b2;
+  }
 
-				// they need to be A-F0-9:
-				if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
-					// bogus escape code, ignore:
-					continue;
-				}
-				// turn hex ASCII digit -> number
-				byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
+  /**
+   * Converts a string to a UTF-8 byte array.
+   * 
+   * @param s
+   *          string
+   * @return the byte array
+   */
+  public static byte[] toBytes(String s) {
+    try {
+      return s.getBytes(UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-				b[size++] = d;
-				i += 3; // skip 3
-			} else {
-				b[size++] = (byte) ch;
-			}
-		}
-		// resize:
-		byte[] b2 = new byte[size];
-		System.arraycopy(b, 0, b2, 0, size);
-		return b2;
-	}
+  /**
+   * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
+   * 
+   * @param b
+   *          value
+   * @return <code>b</code> encoded in a byte array.
+   */
+  public static byte[] toBytes(final boolean b) {
+    return new byte[] { b ? (byte) -1 : (byte) 0 };
+  }
 
-	/**
-	 * Converts a string to a UTF-8 byte array.
-	 * 
-	 * @param s
-	 *            string
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(String s) {
-		try {
-			return s.getBytes(UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * Reverses {@link #toBytes(boolean)}
+   * 
+   * @param b
+   *          array
+   * @return True or false.
+   */
+  public static boolean toBoolean(final byte[] b) {
+    if (b.length != 1) {
+      throw new IllegalArgumentException("Array has wrong size: " + b.length);
+    }
+    return b[0] != (byte) 0;
+  }
 
-	/**
-	 * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
-	 * 
-	 * @param b
-	 *            value
-	 * @return <code>b</code> encoded in a byte array.
-	 */
-	public static byte[] toBytes(final boolean b) {
-		return new byte[] { b ? (byte) -1 : (byte) 0 };
-	}
+  /**
+   * Convert a long value to a byte array using big-endian.
+   * 
+   * @param val
+   *          value to convert
+   * @return the byte array
+   */
+  public static byte[] toBytes(long val) {
+    byte[] b = new byte[8];
+    for (int i = 7; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Reverses {@link #toBytes(boolean)}
-	 * 
-	 * @param b
-	 *            array
-	 * @return True or false.
-	 */
-	public static boolean toBoolean(final byte[] b) {
-		if (b.length != 1) {
-			throw new IllegalArgumentException("Array has wrong size: "
-					+ b.length);
-		}
-		return b[0] != (byte) 0;
-	}
+  /**
+   * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
+   * 
+   * @param bytes
+   *          array
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes) {
+    return toLong(bytes, 0, SIZEOF_LONG);
+  }
 
-	/**
-	 * Convert a long value to a byte array using big-endian.
-	 * 
-	 * @param val
-	 *            value to convert
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(long val) {
-		byte[] b = new byte[8];
-		for (int i = 7; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to a long value. Assumes there will be
+   * {@link #SIZEOF_LONG} bytes available.
+   * 
+   * @param bytes
+   *          bytes
+   * @param offset
+   *          offset
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes, int offset) {
+    return toLong(bytes, offset, SIZEOF_LONG);
+  }
 
-	/**
-	 * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
-	 * 
-	 * @param bytes
-	 *            array
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes) {
-		return toLong(bytes, 0, SIZEOF_LONG);
-	}
+  /**
+   * Converts a byte array to a long value.
+   * 
+   * @param bytes
+   *          array of bytes
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of data (must be {@link #SIZEOF_LONG})
+   * @return the long value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_LONG} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static long toLong(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_LONG || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
+    }
+    long l = 0;
+    for (int i = offset; i < offset + length; i++) {
+      l <<= 8;
+      l ^= bytes[i] & 0xFF;
+    }
+    return l;
+  }
 
-	/**
-	 * Converts a byte array to a long value. Assumes there will be
-	 * {@link #SIZEOF_LONG} bytes available.
-	 * 
-	 * @param bytes
-	 *            bytes
-	 * @param offset
-	 *            offset
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes, int offset) {
-		return toLong(bytes, offset, SIZEOF_LONG);
-	}
+  private static IllegalArgumentException explainWrongLengthOrOffset(
+      final byte[] bytes, final int offset, final int length,
+      final int expectedLength) {
+    String reason;
+    if (length != expectedLength) {
+      reason = "Wrong length: " + length + ", expected " + expectedLength;
+    } else {
+      reason = "offset (" + offset + ") + length (" + length + ") exceed the"
+          + " capacity of the array: " + bytes.length;
+    }
+    return new IllegalArgumentException(reason);
+  }
 
-	/**
-	 * Converts a byte array to a long value.
-	 * 
-	 * @param bytes
-	 *            array of bytes
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of data (must be {@link #SIZEOF_LONG})
-	 * @return the long value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_LONG} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static long toLong(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_LONG || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
-		}
-		long l = 0;
-		for (int i = offset; i < offset + length; i++) {
-			l <<= 8;
-			l ^= bytes[i] & 0xFF;
-		}
-		return l;
-	}
+  /**
+   * Put a long value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          long to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putLong(byte[] bytes, int offset, long val) {
+    if (bytes.length - offset < SIZEOF_LONG) {
+      throw new IllegalArgumentException("Not enough room to put a long at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 7; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_LONG;
+  }
 
-	private static IllegalArgumentException explainWrongLengthOrOffset(
-			final byte[] bytes, final int offset, final int length,
-			final int expectedLength) {
-		String reason;
-		if (length != expectedLength) {
-			reason = "Wrong length: " + length + ", expected " + expectedLength;
-		} else {
-			reason = "offset (" + offset + ") + length (" + length
-					+ ") exceed the" + " capacity of the array: "
-					+ bytes.length;
-		}
-		return new IllegalArgumentException(reason);
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          byte array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes) {
+    return toFloat(bytes, 0);
+  }
 
-	/**
-	 * Put a long value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            long to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putLong(byte[] bytes, int offset, long val) {
-		if (bytes.length - offset < SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a long at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 7; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_LONG;
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          array to convert
+   * @param offset
+   *          offset into array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes, int offset) {
+    return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes) {
-		return toFloat(bytes, 0);
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param f
+   *          float value
+   * @return New offset in <code>bytes</code>
+   */
+  public static int putFloat(byte[] bytes, int offset, float f) {
+    return putInt(bytes, offset, Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            array to convert
-	 * @param offset
-	 *            offset into array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes, int offset) {
-		return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
-	}
+  /**
+   * @param f
+   *          float value
+   * @return the float represented as byte []
+   */
+  public static byte[] toBytes(final float f) {
+    // Encode it as int
+    return Bytes.toBytes(Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param f
-	 *            float value
-	 * @return New offset in <code>bytes</code>
-	 */
-	public static int putFloat(byte[] bytes, int offset, float f) {
-		return putInt(bytes, offset, Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes) {
+    return toDouble(bytes, 0);
+  }
 
-	/**
-	 * @param f
-	 *            float value
-	 * @return the float represented as byte []
-	 */
-	public static byte[] toBytes(final float f) {
-		// Encode it as int
-		return Bytes.toBytes(Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset where double is
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes, final int offset) {
+    return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes) {
-		return toDouble(bytes, 0);
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param d
+   *          value
+   * @return New offset into array <code>bytes</code>
+   */
+  public static int putDouble(byte[] bytes, int offset, double d) {
+    return putLong(bytes, offset, Double.doubleToLongBits(d));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset where double is
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes, final int offset) {
-		return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
-	}
+  /**
+   * Serialize a double as the IEEE 754 double format output. The resultant
+   * array will be 8 bytes long.
+   * 
+   * @param d
+   *          value
+   * @return the double represented as byte []
+   */
+  public static byte[] toBytes(final double d) {
+    // Encode it as a long
+    return Bytes.toBytes(Double.doubleToRawLongBits(d));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param d
-	 *            value
-	 * @return New offset into array <code>bytes</code>
-	 */
-	public static int putDouble(byte[] bytes, int offset, double d) {
-		return putLong(bytes, offset, Double.doubleToLongBits(d));
-	}
+  /**
+   * Convert an int value to a byte array
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(int val) {
+    byte[] b = new byte[4];
+    for (int i = 3; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Serialize a double as the IEEE 754 double format output. The resultant
-	 * array will be 8 bytes long.
-	 * 
-	 * @param d
-	 *            value
-	 * @return the double represented as byte []
-	 */
-	public static byte[] toBytes(final double d) {
-		// Encode it as a long
-		return Bytes.toBytes(Double.doubleToRawLongBits(d));
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes) {
+    return toInt(bytes, 0, SIZEOF_INT);
+  }
 
-	/**
-	 * Convert an int value to a byte array
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(int val) {
-		byte[] b = new byte[4];
-		for (int i = 3; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes, int offset) {
+    return toInt(bytes, offset, SIZEOF_INT);
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes) {
-		return toInt(bytes, 0, SIZEOF_INT);
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of int (has to be {@link #SIZEOF_INT})
+   * @return the int value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_INT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static int toInt(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_INT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
+    }
+    int n = 0;
+    for (int i = offset; i < (offset + length); i++) {
+      n <<= 8;
+      n ^= bytes[i] & 0xFF;
+    }
+    return n;
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes, int offset) {
-		return toInt(bytes, offset, SIZEOF_INT);
-	}
+  /**
+   * Put an int value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          int to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putInt(byte[] bytes, int offset, int val) {
+    if (bytes.length - offset < SIZEOF_INT) {
+      throw new IllegalArgumentException("Not enough room to put an int at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 3; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_INT;
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of int (has to be {@link #SIZEOF_INT})
-	 * @return the int value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_INT} or if there's not enough
-	 *             room in the array at the offset indicated.
-	 */
-	public static int toInt(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_INT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
-		}
-		int n = 0;
-		for (int i = offset; i < (offset + length); i++) {
-			n <<= 8;
-			n ^= bytes[i] & 0xFF;
-		}
-		return n;
-	}
+  /**
+   * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(short val) {
+    byte[] b = new byte[SIZEOF_SHORT];
+    b[1] = (byte) val;
+    val >>= 8;
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Put an int value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            int to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putInt(byte[] bytes, int offset, int val) {
-		if (bytes.length - offset < SIZEOF_INT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put an int at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 3; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_INT;
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes) {
+    return toShort(bytes, 0, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes
-	 * long.
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(short val) {
-		byte[] b = new byte[SIZEOF_SHORT];
-		b[1] = (byte) val;
-		val >>= 8;
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes, int offset) {
+    return toShort(bytes, offset, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes) {
-		return toShort(bytes, 0, SIZEOF_SHORT);
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length, has to be {@link #SIZEOF_SHORT}
+   * @return the short value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_SHORT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static short toShort(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_SHORT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT);
+    }
+    short n = 0;
+    n ^= bytes[offset] & 0xFF;
+    n <<= 8;
+    n ^= bytes[offset + 1] & 0xFF;
+    return n;
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes, int offset) {
-		return toShort(bytes, offset, SIZEOF_SHORT);
-	}
+  /**
+   * Put a short value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          short to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putShort(byte[] bytes, int offset, short val) {
+    if (bytes.length - offset < SIZEOF_SHORT) {
+      throw new IllegalArgumentException("Not enough room to put a short at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    bytes[offset + 1] = (byte) val;
+    val >>= 8;
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_SHORT;
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length, has to be {@link #SIZEOF_SHORT}
-	 * @return the short value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_SHORT} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static short toShort(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_SHORT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length,
-					SIZEOF_SHORT);
-		}
-		short n = 0;
-		n ^= bytes[offset] & 0xFF;
-		n <<= 8;
-		n ^= bytes[offset + 1] & 0xFF;
-		return n;
-	}
+  /**
+   * @param vint
+   *          Integer to make a vint of.
+   * @return Vint as bytes array.
+   */
+  public static byte[] vintToBytes(final long vint) {
+    long i = vint;
+    int size = WritableUtils.getVIntSize(i);
+    byte[] result = new byte[size];
+    int offset = 0;
+    if (i >= -112 && i <= 127) {
+      result[offset] = (byte) i;
+      return result;
+    }
 
-	/**
-	 * Put a short value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            short to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putShort(byte[] bytes, int offset, short val) {
-		if (bytes.length - offset < SIZEOF_SHORT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a short at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		bytes[offset + 1] = (byte) val;
-		val >>= 8;
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_SHORT;
-	}
+    int len = -112;
+    if (i < 0) {
+      i ^= -1L; // take one's complement'
+      len = -120;
+    }
 
-	/**
-	 * @param vint
-	 *            Integer to make a vint of.
-	 * @return Vint as bytes array.
-	 */
-	public static byte[] vintToBytes(final long vint) {
-		long i = vint;
-		int size = WritableUtils.getVIntSize(i);
-		byte[] result = new byte[size];
-		int offset = 0;
-		if (i >= -112 && i <= 127) {
-			result[offset] = (byte) i;
-			return result;
-		}
+    long tmp = i;
+    while (tmp != 0) {
+      tmp = tmp >> 8;
+      len--;
+    }
 
-		int len = -112;
-		if (i < 0) {
-			i ^= -1L; // take one's complement'
-			len = -120;
-		}
+    result[offset++] = (byte) len;
 
-		long tmp = i;
-		while (tmp != 0) {
-			tmp = tmp >> 8;
-			len--;
-		}
+    len = (len < -120) ? -(len + 120) : -(len + 112);
 
-		result[offset++] = (byte) len;
+    for (int idx = len; idx != 0; idx--) {
+      int shiftbits = (idx - 1) * 8;
+      long mask = 0xFFL << shiftbits;
+      result[offset++] = (byte) ((i & mask) >> shiftbits);
+    }
+    return result;
+  }
 
-		len = (len < -120) ? -(len + 120) : -(len + 112);
+  /**
+   * @param buffer
+   *          buffer to convert
+   * @return vint bytes as an integer.
+   */
+  public static long bytesToVint(final byte[] buffer) {
+    int offset = 0;
+    byte firstByte = buffer[offset++];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset++];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-		for (int idx = len; idx != 0; idx--) {
-			int shiftbits = (idx - 1) * 8;
-			long mask = 0xFFL << shiftbits;
-			result[offset++] = (byte) ((i & mask) >> shiftbits);
-		}
-		return result;
-	}
+  /**
+   * Reads a zero-compressed encoded long from input stream and returns it.
+   * 
+   * @param buffer
+   *          Binary array
+   * @param offset
+   *          Offset into array at which vint begins.
+   * @throws java.io.IOException
+   *           e
+   * @return deserialized long from stream.
+   */
+  public static long readVLong(final byte[] buffer, final int offset)
+      throws IOException {
+    byte firstByte = buffer[offset];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset + 1 + idx];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-	/**
-	 * @param buffer
-	 *            buffer to convert
-	 * @return vint bytes as an integer.
-	 */
-	public static long bytesToVint(final byte[] buffer) {
-		int offset = 0;
-		byte firstByte = buffer[offset++];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset++];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(final byte[] left, final byte[] right) {
+    return compareTo(left, 0, left.length, right, 0, right.length);
+  }
 
-	/**
-	 * Reads a zero-compressed encoded long from input stream and returns it.
-	 * 
-	 * @param buffer
-	 *            Binary array
-	 * @param offset
-	 *            Offset into array at which vint begins.
-	 * @throws java.io.IOException
-	 *             e
-	 * @return deserialized long from stream.
-	 */
-	public static long readVLong(final byte[] buffer, final int offset)
-			throws IOException {
-		byte firstByte = buffer[offset];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset + 1 + idx];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * Lexographically compare two arrays.
+   * 
+   * @param buffer1
+   *          left operand
+   * @param buffer2
+   *          right operand
+   * @param offset1
+   *          Where to start comparing in the left buffer
+   * @param offset2
+   *          Where to start comparing in the right buffer
+   * @param length1
+   *          How much to compare from the left buffer
+   * @param length2
+   *          How much to compare from the right buffer
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(byte[] buffer1, int offset1, int length1,
+      byte[] buffer2, int offset2, int length2) {
+    // Bring WritableComparator code local
+    int end1 = offset1 + length1;
+    int end2 = offset2 + length2;
+    for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+      int a = (buffer1[i] & 0xff);
+      int b = (buffer2[j] & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return length1 - length2;
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(final byte[] left, final byte[] right) {
-		return compareTo(left, 0, left.length, right, 0, right.length);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return True if equal
+   */
+  public static boolean equals(final byte[] left, final byte[] right) {
+    // Could use Arrays.equals?
+    // noinspection SimplifiableConditionalExpression
+    if (left == null && right == null) {
+      return true;
+    }
+    return (left == null || right == null || (left.length != right.length) ? false
+        : compareTo(left, right) == 0);
+  }
 
-	/**
-	 * Lexographically compare two arrays.
-	 * 
-	 * @param buffer1
-	 *            left operand
-	 * @param buffer2
-	 *            right operand
-	 * @param offset1
-	 *            Where to start comparing in the left buffer
-	 * @param offset2
-	 *            Where to start comparing in the right buffer
-	 * @param length1
-	 *            How much to compare from the left buffer
-	 * @param length2
-	 *            How much to compare from the right buffer
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(byte[] buffer1, int offset1, int length1,
-			byte[] buffer2, int offset2, int length2) {
-		// Bring WritableComparator code local
-		int end1 = offset1 + length1;
-		int end2 = offset2 + length2;
-		for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
-			int a = (buffer1[i] & 0xff);
-			int b = (buffer2[j] & 0xff);
-			if (a != b) {
-				return a - b;
-			}
-		}
-		return length1 - length2;
-	}
+  /**
+   * Return true if the byte array on the right is a prefix of the byte array on
+   * the left.
+   */
+  public static boolean startsWith(byte[] bytes, byte[] prefix) {
+    return bytes != null && prefix != null && bytes.length >= prefix.length
+        && compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return True if equal
-	 */
-	public static boolean equals(final byte[] left, final byte[] right) {
-		// Could use Arrays.equals?
-		// noinspection SimplifiableConditionalExpression
-		if (left == null && right == null) {
-			return true;
-		}
-		return (left == null || right == null || (left.length != right.length) ? false
-				: compareTo(left, right) == 0);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b) {
+    return hashCode(b, b.length);
+  }
 
-	/**
-	 * Return true if the byte array on the right is a prefix of the byte array
-	 * on the left.
-	 */
-	public static boolean startsWith(byte[] bytes, byte[] prefix) {
-		return bytes != null
-				&& prefix != null
-				&& bytes.length >= prefix.length
-				&& compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
-	}
+  /**
+   * @param b
+   *          value
+   * @param length
+   *          length of the value
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b, final int length) {
+    return WritableComparator.hashBytes(b, length);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b) {
-		return hashCode(b, b.length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b) {
+    return hashCode(b);
+  }
 
-	/**
-	 * @param b
-	 *            value
-	 * @param length
-	 *            length of the value
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b, final int length) {
-		return WritableComparator.hashBytes(b, length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @param length
+   *          length to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b, final int length) {
+    return hashCode(b, length);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b) {
-		return hashCode(b);
-	}
+  /**
+   * @param a
+   *          lower half
+   * @param b
+   *          upper half
+   * @return New array that has a in lower half and b in upper half.
+   */
+  public static byte[] add(final byte[] a, final byte[] b) {
+    return add(a, b, EMPTY_BYTE_ARRAY);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @param length
-	 *            length to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b, final int length) {
-		return hashCode(b, length);
-	}
+  /**
+   * @param a
+   *          first third
+   * @param b
+   *          second third
+   * @param c
+   *          third third
+   * @return New array made from a, b and c
+   */
+  public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
+    byte[] result = new byte[a.length + b.length + c.length];
+    System.arraycopy(a, 0, result, 0, a.length);
+    System.arraycopy(b, 0, result, a.length, b.length);
+    System.arraycopy(c, 0, result, a.length + b.length, c.length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            lower half
-	 * @param b
-	 *            upper half
-	 * @return New array that has a in lower half and b in upper half.
-	 */
-	public static byte[] add(final byte[] a, final byte[] b) {
-		return add(a, b, EMPTY_BYTE_ARRAY);
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to grab
+   * @return First <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] head(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, 0, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            first third
-	 * @param b
-	 *            second third
-	 * @param c
-	 *            third third
-	 * @return New array made from a, b and c
-	 */
-	public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
-		byte[] result = new byte[a.length + b.length + c.length];
-		System.arraycopy(a, 0, result, 0, a.length);
-		System.arraycopy(b, 0, result, a.length, b.length);
-		System.arraycopy(c, 0, result, a.length + b.length, c.length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to snarf
+   * @return Last <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] tail(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, a.length - length, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to grab
-	 * @return First <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] head(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, 0, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+   */
+  public static byte[] padHead(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(padding, a);
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to snarf
-	 * @return Last <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] tail(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, a.length - length, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+   */
+  public static byte[] padTail(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(a, padding);
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> prepended 0
-	 *         bytes
-	 */
-	public static byte[] padHead(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(padding, a);
-	}
+  /**
+   * Split passed range. Expensive operation relatively. Uses BigInteger math.
+   * Useful splitting ranges for MapReduce jobs.
+   * 
+   * @param a
+   *          Beginning of range
+   * @param b
+   *          End of range
+   * @param num
+   *          Number of times to split range. Pass 1 if you want to split the
+   *          range in two; i.e. one split.
+   * @return Array of dividing values
+   */
+  public static byte[][] split(final byte[] a, final byte[] b, final int num) {
+    byte[][] ret = new byte[num + 2][];
+    int i = 0;
+    Iterable<byte[]> iter = iterateOnSplits(a, b, num);
+    if (iter == null)
+      return null;
+    for (byte[] elem : iter) {
+      ret[i++] = elem;
+    }
+    return ret;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
-	 */
-	public static byte[] padTail(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(a, padding);
-	}
+  /**
+   * Iterate over keys within the passed inclusive range.
+   */
+  public static Iterable<byte[]> iterateOnSplits(final byte[] a,
+      final byte[] b, final int num) {
+    byte[] aPadded;
+    byte[] bPadded;
+    if (a.length < b.length) {
+      aPadded = padTail(a, b.length - a.length);
+      bPadded = b;
+    } else if (b.length < a.length) {
+      aPadded = a;
+      bPadded = padTail(b, a.length - b.length);
+    } else {
+      aPadded = a;
+      bPadded = b;
+    }
+    if (compareTo(aPadded, bPadded) >= 0) {
+      throw new IllegalArgumentException("b <= a");
+    }
+    if (num <= 0) {
+      throw new IllegalArgumentException("num cannot be < 0");
+    }
+    byte[] prependHeader = { 1, 0 };
+    final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+    final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+    final BigInteger diffBI = stopBI.subtract(startBI);
+    final BigInteger splitsBI = BigInteger.valueOf(num + 1);
+    if (diffBI.compareTo(splitsBI) < 0) {
+      return null;
+    }
+    final BigInteger intervalBI;
+    try {
+      intervalBI = diffBI.divide(splitsBI);
+    } catch (Exception e) {
+      LOG.error("Exception caught during division", e);
+      return null;
+    }
 
-	/**
-	 * Split passed range. Expensive operation relatively. Uses BigInteger math.
-	 * Useful splitting ranges for MapReduce jobs.
-	 * 
-	 * @param a
-	 *            Beginning of range
-	 * @param b
-	 *            End of range
-	 * @param num
-	 *            Number of times to split range. Pass 1 if you want to split
-	 *            the range in two; i.e. one split.
-	 * @return Array of dividing values
-	 */
-	public static byte[][] split(final byte[] a, final byte[] b, final int num) {
-		byte[][] ret = new byte[num + 2][];
-		int i = 0;
-		Iterable<byte[]> iter = iterateOnSplits(a, b, num);
-		if (iter == null)
-			return null;
-		for (byte[] elem : iter) {
-			ret[i++] = elem;
-		}
-		return ret;
-	}
+    final Iterator<byte[]> iterator = new Iterator<byte[]>() {
+      private int i = -1;
 
-	/**
-	 * Iterate over keys within the passed inclusive range.
-	 */
-	public static Iterable<byte[]> iterateOnSplits(final byte[] a,
-			final byte[] b, final int num) {
-		byte[] aPadded;
-		byte[] bPadded;
-		if (a.length < b.length) {
-			aPadded = padTail(a, b.length - a.length);
-			bPadded = b;
-		} else if (b.length < a.length) {
-			aPadded = a;
-			bPadded = padTail(b, a.length - b.length);
-		} else {
-			aPadded = a;
-			bPadded = b;
-		}
-		if (compareTo(aPadded, bPadded) >= 0) {
-			throw new IllegalArgumentException("b <= a");
-		}
-		if (num <= 0) {
-			throw new IllegalArgumentException("num cannot be < 0");
-		}
-		byte[] prependHeader = { 1, 0 };
-		final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
-		final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
-		final BigInteger diffBI = stopBI.subtract(startBI);
-		final BigInteger splitsBI = BigInteger.valueOf(num + 1);
-		if (diffBI.compareTo(splitsBI) < 0) {
-			return null;
-		}
-		final BigInteger intervalBI;
-		try {
-			intervalBI = diffBI.divide(splitsBI);
-		} catch (Exception e) {
-			LOG.error("Exception caught during division", e);
-			return null;
-		}
+      @Override
+      public boolean hasNext() {
+        return i < num + 1;
+      }
 
-		final Iterator<byte[]> iterator = new Iterator<byte[]>() {
-			private int i = -1;
+      @Override
+      public byte[] next() {
+        i++;
+        if (i == 0)
+          return a;
+        if (i == num + 1)
+          return b;
 
-			@Override
-			public boolean hasNext() {
-				return i < num + 1;
-			}
+        BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
+            .valueOf(i)));
+        byte[] padded = curBI.toByteArray();
+        if (padded[1] == 0)
+          padded = tail(padded, padded.length - 2);
+        else
+          padded = tail(padded, padded.length - 1);
+        return padded;
+      }
 
-			@Override
-			public byte[] next() {
-				i++;
-				if (i == 0)
-					return a;
-				if (i == num + 1)
-					return b;
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
 
-				BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
-						.valueOf(i)));
-				byte[] padded = curBI.toByteArray();
-				if (padded[1] == 0)
-					padded = tail(padded, padded.length - 2);
-				else
-					padded = tail(padded, padded.length - 1);
-				return padded;
-			}
+    };
 
-			@Override
-			public void remove() {
-				throw new UnsupportedOperationException();
-			}
+    return new Iterable<byte[]>() {
+      @Override
+      public Iterator<byte[]> iterator() {
+        return iterator;
+      }
+    };
+  }
 
-		};
+  /**
+   * @param t
+   *          operands
+   * @return Array of byte arrays made from passed array of Text
+   */
+  public static byte[][] toByteArrays(final String[] t) {
+    byte[][] result = new byte[t.length][];
+    for (int i = 0; i < t.length; i++) {
+      result[i] = Bytes.toBytes(t[i]);
+    }
+    return result;
+  }
 
-		return new Iterable<byte[]>() {
-			@Override
-			public Iterator<byte[]> iterator() {
-				return iterator;
-			}
-		};
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final String column) {
+    return toByteArrays(toBytes(column));
+  }
 
-	/**
-	 * @param t
-	 *            operands
-	 * @return Array of byte arrays made from passed array of Text
-	 */
-	public static byte[][] toByteArrays(final String[] t) {
-		byte[][] result = new byte[t.length][];
-		for (int i = 0; i < t.length; i++) {
-			result[i] = Bytes.toBytes(t[i]);
-		}
-		return result;
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final byte[] column) {
+    byte[][] result = new byte[1][];
+    result[0] = column;
+    return result;
+  }
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final String column) {
-		return toByteArrays(toBytes(column));
-	}
+  /**
+   * Binary search for keys in indexes.
+   * 
+   * @param arr
+   *          array of byte arrays to search for
+   * @param key
+   *          the key you want to find
+   * @param offset
+   *          the offset in the key you want to find
+   * @param length
+   *          the length of the key
+   * @param comparator
+   *          a comparator to compare.
+   * @return index of key
+   */
+  public static int binarySearch(byte[][] arr, byte[] key, int offset,
+      int length, RawComparator<byte[]> comparator) {
+    int low = 0;
+    int high = arr.length - 1;
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final byte[] column) {
-		byte[][] result = new byte[1][];
-		result[0] = column;
-		return result;
-	}
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+      // we have to compare in this order, because the comparator order
+      // has special logic when the 'left side' is a special key.
+      int cmp = comparator.compare(key, offset, length, arr[mid], 0,
+          arr[mid].length);
+      // key lives above the midpoint
+      if (cmp > 0)
+        low = mid + 1;
+      // key lives below the midpoint
+      else if (cmp < 0)
+        high = mid - 1;
+      // BAM. how often does this really happen?
+      else
+        return mid;
+    }
+    return -(low + 1);
+  }
 
-	/**
-	 * Binary search for keys in indexes.
-	 * 
-	 * @param arr
-	 *            array of byte arrays to search for
-	 * @param key
-	 *            the key you want to find
-	 * @param offset
-	 *            the offset in the key you want to find
-	 * @param length
-	 *            the length of the key
-	 * @param comparator
-	 *            a comparator to compare.
-	 * @return index of key
-	 */
-	public static int binarySearch(byte[][] arr, byte[] key, int offset,
-			int length, RawComparator<byte[]> comparator) {
-		int low = 0;
-		int high = arr.length - 1;
+  /**
+   * Bytewise binary increment/deincrement of long contained in byte array on
+   * given amount.
+   * 
+   * @param value
+   *          - array of bytes containing long (length <= SIZEOF_LONG)
+   * @param amount
+   *          value will be incremented on (deincremented if negative)
+   * @return array of bytes containing incremented long (length == SIZEOF_LONG)
+   * @throws IOException
+   *           - if value.length > SIZEOF_LONG
+   */
+  public static byte[] incrementBytes(byte[] value, long amount)
+      throws IOException {
+    byte[] val = value;
+    if (val.length < SIZEOF_LONG) {
+      // Hopefully this doesn't happen too often.
+      byte[] newvalue;
+      if (val[0] < 0) {
+        newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
+      } else {
+        newvalue = new byte[SIZEOF_LONG];
+      }
+      System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
+          val.length);
+      val = newvalue;
+    } else if (val.length > SIZEOF_LONG) {
+      throw new IllegalArgumentException("Increment Bytes - value too big: "
+          + val.length);
+    }
+    if (amount == 0)
+      return val;
+    if (val[0] < 0) {
+      return binaryIncrementNeg(val, amount);
+    }
+    return binaryIncrementPos(val, amount);
+  }
 
-		while (low <= high) {
-			int mid = (low + high) >>> 1;
-			// we have to compare in this order, because the comparator order
-			// has special logic when the 'left side' is a special key.
-			int cmp = comparator.compare(key, offset, length, arr[mid], 0,
-					arr[mid].length);
-			// key lives above the midpoint
-			if (cmp > 0)
-				low = mid + 1;
-			// key lives below the midpoint
-			else if (cmp < 0)
-				high = mid - 1;
-			// BAM. how often does this really happen?
-			else
-				return mid;
-		}
-		return -(low + 1);
-	}
+  /* increment/deincrement for positive value */
+  private static byte[] binaryIncrementPos(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = value[value.length - i - 1] & 0x0ff;
+      int total = val + cur;
+      if (total > 255) {
+        amo += sign;
+        total %= 256;
+      } else if (total < 0) {
+        amo -= sign;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
-	/**
-	 * Bytewise binary increment/deincrement of long contained in byte array on
-	 * given amount.
-	 * 
-	 * @param value
-	 *            - array of bytes containing long (length <= SIZEOF_LONG)
-	 * @param amount
-	 *            value will be incremented on (deincremented if negative)
-	 * @return array of bytes containing incremented long (length ==
-	 *         SIZEOF_LONG)
-	 * @throws IOException
-	 *             - if value.length > SIZEOF_LONG
-	 */
-	public static byte[] incrementBytes(byte[] value, long amount)
-			throws IOException {
-		byte[] val = value;
-		if (val.length < SIZEOF_LONG) {
-			// Hopefully this doesn't happen too often.
-			byte[] newvalue;
-			if (val[0] < 0) {
-				newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
-			} else {
-				newvalue = new byte[SIZEOF_LONG];
-			}
-			System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
-					val.length);
-			val = newvalue;
-		} else if (val.length > SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Increment Bytes - value too big: " + val.length);
-		}
-		if (amount == 0)
-			return val;
-		if (val[0] < 0) {
-			return binaryIncrementNeg(val, amount);
-		}
-		return binaryIncrementPos(val, amount);
-	}
+  /* increment/deincrement for negative value */
+  private static byte[] binaryIncrementNeg(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
+      int total = cur - val;
+      if (total >= 0) {
+        amo += sign;
+      } else if (total < -256) {
+        amo -= sign;
+        total %= 256;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
-	/* increment/deincrement for positive value */
-	private static byte[] binaryIncrementPos(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = value[value.length - i - 1] & 0x0ff;
-			int total = val + cur;
-			if (total > 255) {
-				amo += sign;
-				total %= 256;
-			} else if (total < 0) {
-				amo -= sign;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
-
-	/* increment/deincrement for negative value */
-	private static byte[] binaryIncrementNeg(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
-			int total = cur - val;
-			if (total >= 0) {
-				amo += sign;
-			} else if (total < -256) {
-				amo -= sign;
-				total %= 256;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
-
 }
Index: src/java/org/apache/nutch/util/CommandRunner.java
===================================================================
--- src/java/org/apache/nutch/util/CommandRunner.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/CommandRunner.java	(working copy)
@@ -82,11 +82,11 @@
   }
 
   public void evaluate() throws IOException {
-      this.exec();
+    this.exec();
   }
 
   /**
-   *
+   * 
    * @return process exit value (return code) or -1 if timed out.
    * @throws IOException
    */
@@ -94,13 +94,11 @@
     Process proc = Runtime.getRuntime().exec(_command);
     _barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
 
-    PullerThread so =
-      new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+    PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
     so.setDaemon(true);
     so.start();
 
-    PullerThread se =
-      new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+    PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
     se.setDaemon(true);
     se.start();
 
@@ -145,11 +143,11 @@
             Thread.sleep(1000);
             _xit = proc.exitValue();
           } catch (InterruptedException ie) {
-              if (Thread.interrupted()) {
-                  break; // stop waiting on an interrupt for this thread
-              } else {
-                  continue;
-              }
+            if (Thread.interrupted()) {
+              break; // stop waiting on an interrupt for this thread
+            } else {
+              continue;
+            }
           } catch (IllegalThreadStateException iltse) {
             continue;
           }
@@ -181,11 +179,8 @@
 
     private boolean _closeInput;
 
-    protected PumperThread(
-      String name,
-      InputStream is,
-      OutputStream os,
-      boolean closeInput) {
+    protected PumperThread(String name, InputStream is, OutputStream os,
+        boolean closeInput) {
       super(name);
       _is = is;
       _os = os;
@@ -218,12 +213,12 @@
         }
       }
       try {
-         _barrier.await();
-       } catch (InterruptedException ie) {
-         /* IGNORE */
-       } catch (BrokenBarrierException bbe) {
-         /* IGNORE */
-       }
+        _barrier.await();
+      } catch (InterruptedException ie) {
+        /* IGNORE */
+      } catch (BrokenBarrierException bbe) {
+        /* IGNORE */
+      }
     }
   }
 
@@ -269,8 +264,9 @@
 
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-timeout")) {
-        timeout = Integer.parseInt(args[++i]);;
-      } else if (i != args.length-2) {
+        timeout = Integer.parseInt(args[++i]);
+        ;
+      } else if (i != args.length - 2) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -290,6 +286,6 @@
 
     cr.evaluate();
 
-    System.err.println("output value: "+cr.getExitValue());
+    System.err.println("output value: " + cr.getExitValue());
   }
 }
Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
  */
 public class DeflateUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
   private static final int EXPECTED_COMPRESSION_RATIO = 5;
   private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an inflated copy of the input array.  If the deflated 
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an inflated copy of the input array. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in) {
     return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,37 +47,36 @@
 
   /**
    * Returns an inflated copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
     // "true" because HTTP does not provide zlib headers
     Inflater inflater = new Inflater(true);
-    InflaterInputStream inStream = 
-      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in), inflater);
 
     byte[] buf = new byte[BUF_SIZE];
     int written = 0;
     while (true) {
       try {
-	int size = inStream.read(buf);
-	if (size <= 0) 
-	  break;
-	if ((written + size) > sizeLimit) {
-	  outStream.write(buf, 0, sizeLimit - written);
-	  break;
-	}
-	outStream.write(buf, 0, size);
-	written+= size;
+        int size = inStream.read(buf);
+        if (size <= 0)
+          break;
+        if ((written + size) > sizeLimit) {
+          outStream.write(buf, 0, sizeLimit - written);
+          break;
+        }
+        outStream.write(buf, 0, size);
+        written += size;
       } catch (Exception e) {
-	LOG.info( "Caught Exception in inflateBestEffort", e );
-	break;
+        LOG.info("Caught Exception in inflateBestEffort", e);
+        break;
       }
     }
     try {
@@ -89,23 +87,24 @@
     return outStream.toByteArray();
   }
 
-
   /**
-   * Returns an inflated copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an inflated copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] inflate(byte[] in) throws IOException {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    InflaterInputStream inStream = 
-      new InflaterInputStream ( new ByteArrayInputStream(in) );
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -118,9 +117,9 @@
    * Returns a deflated copy of the input array.
    */
   public static final byte[] deflate(byte[] in) {
-    // compress using DeflaterOutputStream 
-    ByteArrayOutputStream byteOut = 
-      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+    // compress using DeflaterOutputStream
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+        / EXPECTED_COMPRESSION_RATIO);
 
     DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
 
Index: src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DomUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/DomUtil.java	(working copy)
@@ -38,7 +38,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class DomUtil {
 
   private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,16 +60,16 @@
       input.setEncoding("UTF-8");
       parser.parse(input);
       int i = 0;
-      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
-       i++;
-      } 
-      element = (Element)parser.getDocument().getChildNodes().item(i);
+      while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+        i++;
+      }
+      element = (Element) parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
-        LOG.error("Failed to find file: ", e);
+      LOG.error("Failed to find file: ", e);
     } catch (SAXException e) {
-        LOG.error("Failed with the following SAX exception: ", e);
+      LOG.error("Failed with the following SAX exception: ", e);
     } catch (IOException e) {
-        LOG.error("Failed with the following IOException", e);
+      LOG.error("Failed with the following IOException", e);
     }
     return element;
   }
@@ -93,13 +92,14 @@
       transformer.transform(source, result);
       os.flush();
     } catch (UnsupportedEncodingException e1) {
-        LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
+      LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
     } catch (IOException e1) {
-        LOG.error("Failed to with the following IOException: ", e1);
+      LOG.error("Failed to with the following IOException: ", e1);
     } catch (TransformerConfigurationException e2) {
-        LOG.error("Failed with the following TransformerConfigurationException: ", e2);
+      LOG.error(
+          "Failed with the following TransformerConfigurationException: ", e2);
     } catch (TransformerException ex) {
-       LOG.error("Failed with the following TransformerException: ", ex);
+      LOG.error("Failed with the following TransformerException: ", ex);
     }
   }
 }
Index: src/java/org/apache/nutch/util/EncodingDetector.java
===================================================================
--- src/java/org/apache/nutch/util/EncodingDetector.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/EncodingDetector.java	(working copy)
@@ -35,27 +35,26 @@
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@
 
     @Override
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence));
     }
   }
 
-  public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -164,16 +160,16 @@
 
   public void autoDetectClues(WebPage page, boolean filter) {
     autoDetectClues(page.getContent(), page.getContentType(),
-        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)), filter);
+        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)),
+        filter);
   }
 
   private void autoDetectClues(ByteBuffer dataBuffer, CharSequence typeUtf8,
-                               String encoding, boolean filter) {
+      String encoding, boolean filter) {
     int length = dataBuffer.remaining();
     String type = TableUtil.toString(typeUtf8);
 
-    if (minConfidence >= 0 && DETECTABLES.contains(type)
-        && length > MIN_LENGTH) {
+    if (minConfidence >= 0 && DETECTABLES.contains(type) && length > MIN_LENGTH) {
       CharsetMatch[] matches = null;
 
       // do all these in a try/catch; setText and detect/detectAll
@@ -214,12 +210,14 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param row URL's row
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param row
+   *          URL's row
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(WebPage page, String defaultValue) {
@@ -230,33 +228,33 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param baseUrl Base URL
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param baseUrl
+   *          Base URL
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   private String guessEncoding(String baseUrl, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
-
     if (LOG.isTraceEnabled()) {
       findDisagreements(baseUrl, clues);
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -268,8 +266,8 @@
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(baseUrl + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(baseUrl + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -289,10 +287,10 @@
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -314,9 +312,9 @@
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -331,7 +329,7 @@
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -339,13 +337,12 @@
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
+   * Parse the character encoding from the specified content type header. If the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
    * @param contentTypeUtf8
    */
   public static String parseCharacterEncoding(CharSequence contentTypeUtf8) {
@@ -361,51 +358,36 @@
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
   }
 
-  /*public static void main(String[] args) throws IOException {
-    if (args.length != 1) {
-      System.err.println("Usage: EncodingDetector <file>");
-      System.exit(1);
-    }
+  /*
+   * public static void main(String[] args) throws IOException { if (args.length
+   * != 1) { System.err.println("Usage: EncodingDetector <file>");
+   * System.exit(1); }
+   * 
+   * Configuration conf = NutchConfiguration.create(); EncodingDetector detector
+   * = new EncodingDetector(NutchConfiguration.create());
+   * 
+   * // do everything as bytes; don't want any conversion BufferedInputStream
+   * istr = new BufferedInputStream(new FileInputStream(args[0]));
+   * ByteArrayOutputStream ostr = new ByteArrayOutputStream(); byte[] bytes =
+   * new byte[1000]; boolean more = true; while (more) { int len =
+   * istr.read(bytes); if (len < bytes.length) { more = false; if (len > 0) {
+   * ostr.write(bytes, 0, len); } } else { ostr.write(bytes); } }
+   * 
+   * byte[] data = ostr.toByteArray(); MimeUtil mimeTypes = new MimeUtil(conf);
+   * 
+   * // make a fake Content Content content = new Content("", "", data,
+   * "text/html", new Metadata(), mimeTypes);
+   * 
+   * detector.autoDetectClues(content, true); String encoding =
+   * detector.guessEncoding(content,
+   * conf.get("parser.character.encoding.default"));
+   * System.out.println("Guessed encoding: " + encoding); }
+   */
 
-    Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
-
-    // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
-    ByteArrayOutputStream ostr = new ByteArrayOutputStream();
-    byte[] bytes = new byte[1000];
-    boolean more = true;
-    while (more) {
-      int len = istr.read(bytes);
-      if (len < bytes.length) {
-        more = false;
-        if (len > 0) {
-          ostr.write(bytes, 0, len);
-        }
-      } else {
-        ostr.write(bytes);
-      }
-    }
-
-    byte[] data = ostr.toByteArray();
-    MimeUtil mimeTypes = new MimeUtil(conf);
-
-    // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), mimeTypes);
-
-    detector.autoDetectClues(content, true);
-    String encoding = detector.guessEncoding(content,
-        conf.get("parser.character.encoding.default"));
-    System.out.println("Guessed encoding: " + encoding);
-  }*/
-
 }
Index: src/java/org/apache/nutch/util/FSUtils.java
===================================================================
--- src/java/org/apache/nutch/util/FSUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/FSUtils.java	(working copy)
@@ -33,16 +33,20 @@
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException {
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
Index: src/java/org/apache/nutch/util/GZIPUtils.java
===================================================================
--- src/java/org/apache/nutch/util/GZIPUtils.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/GZIPUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,19 +47,18 @@
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
@@ -67,7 +65,7 @@
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
@@ -74,7 +72,7 @@
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -142,9 +140,9 @@
       return byteOut.toByteArray();
 
     } catch (IOException e) {
-        LOG.error("Failed with IOException", e);
+      LOG.error("Failed with IOException", e);
       return null;
     }
   }
-    
+
 }
Index: src/java/org/apache/nutch/util/GenericWritableConfigurable.java
===================================================================
--- src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(working copy)
@@ -24,12 +24,15 @@
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/HadoopFSUtil.java
===================================================================
--- src/java/org/apache/nutch/util/HadoopFSUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/HadoopFSUtil.java	(working copy)
@@ -25,48 +25,48 @@
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-        };
+    };
+  }
+
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
     }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
+    }
+    return res;
+  }
 
 }
Index: src/java/org/apache/nutch/util/Histogram.java
===================================================================
--- src/java/org/apache/nutch/util/Histogram.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/Histogram.java	(working copy)
@@ -72,8 +72,8 @@
   }
 
   public List<E> sortInverseByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator for
     // the compare method
@@ -93,8 +93,8 @@
   }
 
   public List<E> sortByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator for
     // the compare method
Index: src/java/org/apache/nutch/util/IdentityPageReducer.java
===================================================================
--- src/java/org/apache/nutch/util/IdentityPageReducer.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/IdentityPageReducer.java	(working copy)
@@ -21,12 +21,12 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class IdentityPageReducer
-extends GoraReducer<String, WebPage, String, WebPage> {
+public class IdentityPageReducer extends
+    GoraReducer<String, WebPage, String, WebPage> {
 
   @Override
-  protected void reduce(String key, Iterable<WebPage> values,
-      Context context) throws IOException, InterruptedException {
+  protected void reduce(String key, Iterable<WebPage> values, Context context)
+      throws IOException, InterruptedException {
     for (WebPage page : values) {
       context.write(key, page);
     }
Index: src/java/org/apache/nutch/util/LockUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LockUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/LockUtil.java	(working copy)
@@ -28,22 +28,29 @@
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -37,7 +37,7 @@
 // Slf4j logging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
- 
+
 // imported for Javadoc
 import org.apache.nutch.protocol.ProtocolOutput;
 
@@ -45,12 +45,12 @@
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/">Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -66,7 +66,8 @@
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     tika = new Tika();
@@ -75,25 +76,26 @@
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -129,14 +131,13 @@
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
-   * is found, then that mime type is used, otherwise URL resolution is
-   * used to try and determine the mime type. However, if
-   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
-   * better-than-the-default approximation of the {@link MimeType}.
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
+   * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise URL resolution is used to try and determine
+   * the mime type. However, if <code>mime.type.magic</code> is enabled in
+   * {@link NutchConfiguration}, then mime type magic resolution is used to try
+   * and obtain a better-than-the-default approximation of the {@link MimeType}.
    * 
    * @param typeName
    *          The original mime type, returned from a {@link ProtocolOutput}.
@@ -177,7 +178,7 @@
         throw new RuntimeException(e);
       }
     } else {
-        retType = type.getName();
+      retType = type.getName();
     }
 
     // if magic is enabled use mime magic to guess if the mime type returned
@@ -195,14 +196,15 @@
         InputStream stream = TikaInputStream.get(data);
         try {
           magicType = tika.detect(stream, tikaMeta);
-       } finally {
-         stream.close();
+        } finally {
+          stream.close();
         }
-      } catch (IOException ignore) {}
+      } catch (IOException ignore) {
+      }
 
       if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
-          && !magicType.equals(MimeTypes.PLAIN_TEXT)
-          && retType != null && !retType.equals(magicType)) {
+          && !magicType.equals(MimeTypes.PLAIN_TEXT) && retType != null
+          && !retType.equals(magicType)) {
 
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType
@@ -225,12 +227,12 @@
   /**
    * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(String)}
    * method.
-   *
+   * 
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given Document
+   *         url in string form.
    */
   public String getMimeType(String url) {
     return tika.detect(url);
@@ -239,11 +241,11 @@
   /**
    * A facade interface to Tika's underlying {@link MimeTypes#forName(String)}
    * method.
-   *
+   * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, or
+   *         null otherwise.
    */
   public String forName(String name) {
     try {
@@ -258,7 +260,7 @@
   /**
    * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(File)}
    * method.
-   *
+   * 
    * @param f
    *          The {@link File} to sense the {@link MimeType} for.
    * @return The {@link MimeType} of the given {@link File}, or null if it
Index: src/java/org/apache/nutch/util/NodeWalker.java
===================================================================
--- src/java/org/apache/nutch/util/NodeWalker.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NodeWalker.java	(working copy)
@@ -22,13 +22,17 @@
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,53 +51,58 @@
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
    * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
@@ -100,16 +109,19 @@
       }
     }
   }
-  
+
   /**
    * Return the current node.
+   * 
    * @return Node
    */
   public Node getCurrentNode() {
     return currentNode;
   }
-  
-  /**   * Returns true if there are more nodes on the current stack.
+
+  /**
+   * * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {
Index: src/java/org/apache/nutch/util/NutchConfiguration.java
===================================================================
--- src/java/org/apache/nutch/util/NutchConfiguration.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchConfiguration.java	(working copy)
@@ -23,28 +23,32 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
@@ -51,9 +55,10 @@
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first <code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@
     return conf;
   }
 }
-
Index: src/java/org/apache/nutch/util/NutchJob.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJob.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchJob.java	(working copy)
@@ -35,10 +35,10 @@
 
   public NutchJob(Configuration conf, String jobName) throws IOException {
     super(conf, jobName);
-    //prefix jobName with crawlId if not empty
+    // prefix jobName with crawlId if not empty
     String crawlId = conf.get("storage.crawl.id");
     if (!StringUtils.isEmpty(crawlId)) {
-      jobName = "["+crawlId+"]"+jobName;
+      jobName = "[" + crawlId + "]" + jobName;
       setJobName(jobName);
     }
     setJarByClass(this.getClass());
Index: src/java/org/apache/nutch/util/NutchJobConf.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJobConf.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchJobConf.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJobConf extends JobConf {
 
   public NutchJobConf(Configuration conf) {
@@ -28,4 +28,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/util/NutchTool.java
===================================================================
--- src/java/org/apache/nutch/util/NutchTool.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/NutchTool.java	(working copy)
@@ -26,19 +26,20 @@
 import org.apache.nutch.metadata.Nutch;
 
 public abstract class NutchTool extends Configured {
-  
-  protected HashMap<String,Object> results = new HashMap<String,Object>();
-  protected Map<String,Object> status =
-    Collections.synchronizedMap(new HashMap<String,Object>());
+
+  protected HashMap<String, Object> results = new HashMap<String, Object>();
+  protected Map<String, Object> status = Collections
+      .synchronizedMap(new HashMap<String, Object>());
   protected Job currentJob;
   protected int numJobs;
   protected int currentJobNum;
-  
-  /** Runs the tool, using a map of arguments.
-   * May return results, or null.
+
+  /**
+   * Runs the tool, using a map of arguments. May return results, or null.
    */
-  public abstract Map<String,Object> run(Map<String,Object> args) throws Exception;
-  
+  public abstract Map<String, Object> run(Map<String, Object> args)
+      throws Exception;
+
   /** Returns relative progress of the tool, a float in range [0,1]. */
   public float getProgress() {
     float res = 0;
@@ -55,29 +56,31 @@
     }
     // take into account multiple jobs
     if (numJobs > 1) {
-      res = (currentJobNum + res) / (float)numJobs;
+      res = (currentJobNum + res) / (float) numJobs;
     }
     status.put(Nutch.STAT_PROGRESS, res);
     return res;
   }
-  
-  
+
   /** Returns current status of the running tool. */
-  public Map<String,Object> getStatus() {
+  public Map<String, Object> getStatus() {
     return status;
   }
-  
-  /** Stop the job with the possibility to resume. Subclasses should
-   * override this, since by default it calls {@link #killJob()}.
+
+  /**
+   * Stop the job with the possibility to resume. Subclasses should override
+   * this, since by default it calls {@link #killJob()}.
+   * 
    * @return true if succeeded, false otherwise
    */
   public boolean stopJob() throws Exception {
     return killJob();
   }
-  
+
   /**
-   * Kill the job immediately. Clients should assume that any results
-   * that the job produced so far are in inconsistent state or missing.
+   * Kill the job immediately. Clients should assume that any results that the
+   * job produced so far are in inconsistent state or missing.
+   * 
    * @return true if succeeded, false otherwise.
    * @throws Exception
    */
Index: src/java/org/apache/nutch/util/ObjectCache.java
===================================================================
--- src/java/org/apache/nutch/util/ObjectCache.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/ObjectCache.java	(working copy)
@@ -24,35 +24,33 @@
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
 
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>();
+
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-
Index: src/java/org/apache/nutch/util/PrefixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/PrefixStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/PrefixStringMatcher.java	(working copy)
@@ -21,46 +21,47 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection<String> prefixes) {
     super();
-    Iterator<String> iter= prefixes.iterator();
+    Iterator<String> iter = prefixes.iterator();
     while (iter.hasNext())
       addPatternForward(iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/StringUtil.java
===================================================================
--- src/java/org/apache/nutch/util/StringUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/StringUtil.java	(working copy)
@@ -20,42 +20,42 @@
 import java.nio.ByteBuffer;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
-
   /**
    * Convenience call for {@link #toHexString(ByteBuffer, String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(ByteBuffer buf) {
@@ -65,19 +65,25 @@
   /**
    * Get a text representation of a ByteBuffer as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(ByteBuffer buf, String sep, int lineLen) {
-    return toHexString(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(), sep, lineLen);
+    return toHexString(buf.array(), buf.arrayOffset() + buf.position(),
+        buf.remaining(), sep, lineLen);
   }
 
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -87,11 +93,15 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
     return toHexString(buf, 0, buf.length, sep, lineLen);
@@ -100,39 +110,53 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param of the offset into the byte[] to start reading
-   * @param cb the number of bytes to read from the byte[]
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param of
+   *          the offset into the byte[] to start reading
+   * @param cb
+   *          the number of bytes to read from the byte[]
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
-  public static String toHexString(byte[] buf, int of, int cb, String sep, int lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+  public static String toHexString(byte[] buf, int of, int cb, String sep,
+      int lineLen) {
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(cb * 2);
     for (int c = 0; c < cb; c++) {
       int b = buf[of++];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (c > 0 && (c % lineLen) == 0) res.append('\n');
-      else if (sep != null && c < lineLen - 1) res.append(sep);
+      if (c > 0 && (c % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && c < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -140,12 +164,13 @@
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -164,11 +189,12 @@
   public static boolean isEmpty(String str) {
     return (str == null) || (str.equals(""));
   }
-  
 
   /**
    * Takes in a String value and cleans out any offending "�"
-   * @param value the dirty String value.
+   * 
+   * @param value
+   *          the dirty String value.
    * @return clean String
    */
   public static String cleanField(String value) {
@@ -178,8 +204,8 @@
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }
Index: src/java/org/apache/nutch/util/SuffixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/SuffixStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/SuffixStringMatcher.java	(working copy)
@@ -21,8 +21,8 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -49,14 +49,14 @@
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} );
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/TableUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TableUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TableUtil.java	(working copy)
@@ -33,7 +33,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:8983:http/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -50,7 +50,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:http:8983/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -93,8 +93,11 @@
       pathBegin = reversedUrl.length();
     String sub = reversedUrl.substring(0, pathBegin);
 
-    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed host>, <port>, <protocol>}
-    
+    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed
+                                                                    // host>,
+                                                                    // <port>,
+                                                                    // <protocol>}
+
     buf.append(splits[1]); // add protocol
     buf.append("://");
     reverseAppendSplits(splits[0], buf); // splits[0] is reversed
@@ -110,7 +113,7 @@
   /**
    * Given a reversed url, returns the reversed host E.g
    * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
-   *
+   * 
    * @param reversedUrl
    *          Reversed url
    * @return Reversed host
@@ -120,7 +123,7 @@
   }
 
   private static void reverseAppendSplits(String string, StringBuilder buf) {
-    String[] splits = StringUtils.split(string,'.');
+    String[] splits = StringUtils.split(string, '.');
     if (splits.length > 0) {
       for (int i = splits.length - 1; i > 0; i--) {
         buf.append(splits[i]);
@@ -136,18 +139,18 @@
     StringBuilder buf = new StringBuilder();
     reverseAppendSplits(hostName, buf);
     return buf.toString();
-    
+
   }
+
   public static String unreverseHost(String reversedHostName) {
     return reverseHost(reversedHostName); // Reversible
   }
-  
-  
+
   /**
-   * Convert given Utf8 instance to String and and cleans out 
-   * any offending "�" from the String.
-   *
-   *
+   * Convert given Utf8 instance to String and and cleans out any offending "�"
+   * from the String.
+   * 
+   * 
    * @param utf8
    *          Utf8 object
    * @return string-ifed Utf8 object or null if Utf8 instance is null
Index: src/java/org/apache/nutch/util/TimingUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TimingUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TimingUtil.java	(working copy)
@@ -21,35 +21,39 @@
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
+    }
 
-        long[] elapsedTime = new long[TIME_FACTOR.length];
+    long[] elapsedTime = new long[TIME_FACTOR.length];
 
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
 
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
     }
+    return buf.toString();
+  }
 }
Index: src/java/org/apache/nutch/util/ToolUtil.java
===================================================================
--- src/java/org/apache/nutch/util/ToolUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/ToolUtil.java	(working copy)
@@ -28,7 +28,7 @@
 
 public class ToolUtil {
 
-  public static final Map<String,Object> toArgMap(Object... args) {
+  public static final Map<String, Object> toArgMap(Object... args) {
     if (args == null) {
       return null;
     }
@@ -35,7 +35,7 @@
     if (args.length % 2 != 0) {
       throw new RuntimeException("expected pairs of argName argValue");
     }
-    HashMap<String,Object> res = new HashMap<String,Object>();
+    HashMap<String, Object> res = new HashMap<String, Object>();
     for (int i = 0; i < args.length; i += 2) {
       if (args[i + 1] != null) {
         res.put(String.valueOf(args[i]), args[i + 1]);
@@ -43,20 +43,22 @@
     }
     return res;
   }
-  
+
   @SuppressWarnings("unchecked")
-  public static final void recordJobStatus(String label, Job job, Map<String,Object> results) {
-    Map<String,Object> jobs = (Map<String,Object>)results.get(Nutch.STAT_JOBS);
+  public static final void recordJobStatus(String label, Job job,
+      Map<String, Object> results) {
+    Map<String, Object> jobs = (Map<String, Object>) results
+        .get(Nutch.STAT_JOBS);
     if (jobs == null) {
-      jobs = new LinkedHashMap<String,Object>();
+      jobs = new LinkedHashMap<String, Object>();
       results.put(Nutch.STAT_JOBS, jobs);
     }
-    Map<String,Object> stats = new HashMap<String,Object>();
-    Map<String,Object> countStats = new HashMap<String,Object>();
+    Map<String, Object> stats = new HashMap<String, Object>();
+    Map<String, Object> countStats = new HashMap<String, Object>();
     try {
       Counters counters = job.getCounters();
       for (CounterGroup cg : counters) {
-        Map<String,Object> cnts = new HashMap<String,Object>();
+        Map<String, Object> cnts = new HashMap<String, Object>();
         countStats.put(cg.getDisplayName(), cnts);
         for (Counter c : cg) {
           cnts.put(c.getName(), c.getValue());
Index: src/java/org/apache/nutch/util/TrieStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/TrieStringMatcher.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/TrieStringMatcher.java	(working copy)
@@ -17,21 +17,19 @@
 
 package org.apache.nutch.util;
 
-
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
 /**
- * TrieStringMatcher is a base class for simple tree-based string
- * matching.
- *
+ * TrieStringMatcher is a base class for simple tree-based string matching.
+ * 
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
 
   protected TrieStringMatcher() {
-    this.root= new TrieNode('\000', false);
+    this.root = new TrieNode('\000', false);
   }
 
   /**
@@ -44,20 +42,19 @@
     protected boolean terminal;
 
     /**
-     * Creates a new TrieNode, which contains the given
-     * <code>nodeChar</code>.  If <code>isTerminal</code> is
-     * <code>true</code>, the new node is a <em>terminal</em> node in
-     * the trie.
-     */  
+     * Creates a new TrieNode, which contains the given <code>nodeChar</code>.
+     * If <code>isTerminal</code> is <code>true</code>, the new node is a
+     * <em>terminal</em> node in the trie.
+     */
     TrieNode(char nodeChar, boolean isTerminal) {
-      this.nodeChar= nodeChar;
-      this.terminal= isTerminal;
-      this.childrenList= new LinkedList<TrieNode>();
+      this.nodeChar = nodeChar;
+      this.terminal = isTerminal;
+      this.childrenList = new LinkedList<TrieNode>();
     }
 
     /**
-     * Returns <code>true</code> if this node is a <em>terminal</em>
-     * node in the trie.
+     * Returns <code>true</code> if this node is a <em>terminal</em> node in the
+     * trie.
      */
     boolean isTerminal() {
       return terminal;
@@ -65,67 +62,68 @@
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists, one will be is
-     * added.  If <em>isTerminal</em> is <code>true</code>, the node 
-     * will be a terminal node in the trie.
+     * <code>nextChar</code>. If no such node exists, one will be is added. If
+     * <em>isTerminal</em> is <code>true</code>, the node will be a terminal
+     * node in the trie.
      */
     TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
       if (childrenList == null) {
-        childrenList= new LinkedList<TrieNode>();
+        childrenList = new LinkedList<TrieNode>();
         childrenList.addAll(Arrays.asList(children));
-        children= null;
+        children = null;
       }
 
       if (childrenList.size() == 0) {
-        TrieNode newNode= new TrieNode(nextChar, isTerminal);
+        TrieNode newNode = new TrieNode(nextChar, isTerminal);
         childrenList.add(newNode);
         return newNode;
       }
 
-      ListIterator<TrieNode> iter= childrenList.listIterator();
-      TrieNode node= iter.next();
-      while ( (node.nodeChar < nextChar) && iter.hasNext() ) 
-        node= iter.next();
-                        
+      ListIterator<TrieNode> iter = childrenList.listIterator();
+      TrieNode node = iter.next();
+      while ((node.nodeChar < nextChar) && iter.hasNext())
+        node = iter.next();
+
       if (node.nodeChar == nextChar) {
-        node.terminal= node.terminal | isTerminal;
+        node.terminal = node.terminal | isTerminal;
         return node;
       }
 
-      if (node.nodeChar > nextChar) 
+      if (node.nodeChar > nextChar)
         iter.previous();
 
-      TrieNode newNode= new TrieNode(nextChar, isTerminal);
+      TrieNode newNode = new TrieNode(nextChar, isTerminal);
       iter.add(newNode);
-      return newNode;                   
+      return newNode;
     }
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists,
-     * <code>null</code> is returned.
+     * <code>nextChar</code>. If no such node exists, <code>null</code> is
+     * returned.
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children= childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList= null;
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
         Arrays.sort(children);
       }
 
-      int min= 0;
-      int max= children.length - 1;
-      int mid= 0;
+      int min = 0;
+      int max = children.length - 1;
+      int mid = 0;
       while (min < max) {
-        mid= (min + max) / 2;
-        if (children[mid].nodeChar == nextChar) 
+        mid = (min + max) / 2;
+        if (children[mid].nodeChar == nextChar)
           return children[mid];
         if (children[mid].nodeChar < nextChar)
-          min= mid + 1;
-        else // if (children[mid].nodeChar > nextChar)
-          max= mid - 1;
+          min = mid + 1;
+        else
+          // if (children[mid].nodeChar > nextChar)
+          max = mid - 1;
       }
 
-      if (min == max) 
+      if (min == max)
         if (children[min].nodeChar == nextChar)
           return children[min];
 
@@ -133,11 +131,11 @@
     }
 
     public int compareTo(TrieNode other) {
-      if (this.nodeChar < other.nodeChar) 
+      if (this.nodeChar < other.nodeChar)
         return -1;
-      if (this.nodeChar == other.nodeChar) 
+      if (this.nodeChar == other.nodeChar)
         return 0;
-//    if (this.nodeChar > other.nodeChar) 
+      // if (this.nodeChar > other.nodeChar)
       return 1;
     }
   }
@@ -144,8 +142,8 @@
 
   /**
    * Returns the next {@link TrieNode} visited, given that you are at
-   * <code>node</code>, and the the next character in the input is 
-   * the <code>idx</code>'th character of <code>s</code>.
+   * <code>node</code>, and the the next character in the input is the
+   * <code>idx</code>'th character of <code>s</code>.
    */
   protected final TrieNode matchChar(TrieNode node, String s, int idx) {
     return node.getChild(s.charAt(idx));
@@ -152,40 +150,38 @@
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded and the last character is
-   * represented by a terminal node.  Zero-length <code>Strings</code>
-   * are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded and the last character is represented by a terminal node.
+   * Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternForward(String s) {
-    TrieNode node= root;
-    int stop= s.length() - 1;
+    TrieNode node = root;
+    int stop = s.length() - 1;
     int i;
     if (s.length() > 0) {
-      for (i= 0; i < stop; i++)
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(i), true);
+      for (i = 0; i < stop; i++)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(i), true);
     }
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded <em>in reverse</em> and the
-   * first character is represented by a terminal node.  Zero-length
-   * <code>Strings</code> are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded <em>in reverse</em> and the first character is represented
+   * by a terminal node. Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternBackward(String s) {
-    TrieNode node= root;
+    TrieNode node = root;
     if (s.length() > 0) {
-      for (int i= s.length()-1; i > 0; i--) 
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(0), true);
+      for (int i = s.length() - 1; i > 0; i--)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(0), true);
     }
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * pattern in the trie
+   * Returns true if the given <code>String</code> is matched by a pattern in
+   * the trie
    */
   public abstract boolean matches(String input);
 
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -28,15 +28,18 @@
 public class URLUtil {
 
   /**
-   * Resolve relative URL-s and fix a java.net.URL error
-   * in handling of URLs with pure query targets.
-   * @param base base url
-   * @param target target url (may be relative)
+   * Resolve relative URL-s and fix a java.net.URL error in handling of URLs
+   * with pure query targets.
+   * 
+   * @param base
+   *          base url
+   * @param target
+   *          target url (may be relative)
    * @return resolved absolute url.
    * @throws MalformedURLException
    */
   public static URL resolveURL(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
     target = target.trim();
 
     // handle the case that there is a target that is a pure query,
@@ -58,9 +61,10 @@
   }
 
   /** Handle the case in RFC3986 section 5.4.1 example 7, and similar. */
-   static URL fixPureQueryTargets(URL base, String target)
-          throws MalformedURLException {
-    if (!target.startsWith("?")) return new URL(base, target);
+  static URL fixPureQueryTargets(URL base, String target)
+      throws MalformedURLException {
+    if (!target.startsWith("?"))
+      return new URL(base, target);
 
     String basePath = base.getPath();
     String baseRightMost = "";
@@ -69,36 +73,40 @@
       baseRightMost = basePath.substring(baseRightMostIdx + 1);
     }
 
-    if (target.startsWith("?")) target = baseRightMost + target;
+    if (target.startsWith("?"))
+      target = baseRightMost + target;
 
     return new URL(base, target);
   }
 
-  private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
+  private static Pattern IP_PATTERN = Pattern
+      .compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new URL(http://lucene.apache.org/))
    *  </code><br>
-   *  will return <br><code> apache.org</code>
-   *   */
+   * will return <br>
+   * <code> apache.org</code>
+   * */
   public static String getDomainName(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    //it seems that java returns hostnames ending with .
-    if(host.endsWith("."))
+    // it seems that java returns hostnames ending with .
+    if (host.endsWith("."))
       host = host.substring(0, host.length() - 1);
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return host;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1); 
-      if(tlds.isDomainSuffix(subCandidate)) {
-        return candidate; 
+      String subCandidate = candidate.substring(index + 1);
+      if (tlds.isDomainSuffix(subCandidate)) {
+        return candidate;
       }
       candidate = subCandidate;
     }
@@ -105,12 +113,15 @@
     return candidate;
   }
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new http://lucene.apache.org/)
    *  </code><br>
-   *  will return <br><code> apache.org</code>
+   * will return <br>
+   * <code> apache.org</code>
+   * 
    * @throws MalformedURLException
    */
   public static String getDomainName(String url) throws MalformedURLException {
@@ -117,12 +128,12 @@
     return getDomainName(new URL(url));
   }
 
-  /** Returns whether the given urls have the same domain name.
-   * As an example, <br>
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
    * <code> isSameDomain(new URL("http://lucene.apache.org")
    * , new URL("http://people.apache.org/"))
    * <br> will return true. </code>
-   *
+   * 
    * @return true if the domain names are equal
    */
   public static boolean isSameDomainName(URL url1, URL url2) {
@@ -129,36 +140,38 @@
     return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
   }
 
-  /**Returns whether the given urls have the same domain name.
-  * As an example, <br>
-  * <code> isSameDomain("http://lucene.apache.org"
-  * ,"http://people.apache.org/")
-  * <br> will return true. </code>
-  * @return true if the domain names are equal
-  * @throws MalformedURLException
-  */
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
+   * <code> isSameDomain("http://lucene.apache.org"
+   * ,"http://people.apache.org/")
+   * <br> will return true. </code>
+   * 
+   * @return true if the domain names are equal
+   * @throws MalformedURLException
+   */
   public static boolean isSameDomainName(String url1, String url2)
-    throws MalformedURLException {
+      throws MalformedURLException {
     return isSameDomainName(new URL(url1), new URL(url2));
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
   public static DomainSuffix getDomainSuffix(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return null;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1);
+      String subCandidate = candidate.substring(index + 1);
       DomainSuffix d = tlds.get(subCandidate);
-      if(d != null) {
-        return d; 
+      if (d != null) {
+        return d;
       }
       candidate = subCandidate;
     }
@@ -165,34 +178,43 @@
     return null;
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
-  public static DomainSuffix getDomainSuffix(String url) throws MalformedURLException {
+  public static DomainSuffix getDomainSuffix(String url)
+      throws MalformedURLException {
     return getDomainSuffix(new URL(url));
   }
 
-  /** Partitions of the hostname of the url by "."  */
+  /** Partitions of the hostname of the url by "." */
   public static String[] getHostBatches(URL url) {
     String host = url.getHost();
-    //return whole hostname, if it is an ipv4
-    //TODO : handle ipv6
-    if(IP_PATTERN.matcher(host).matches())
-      return new String[] {host};
+    // return whole hostname, if it is an ipv4
+    // TODO : handle ipv6
+    if (IP_PATTERN.matcher(host).matches())
+      return new String[] { host };
     return host.split("\\.");
   }
 
-  /** Partitions of the hostname of the url by "."
-   * @throws MalformedURLException */
-  public static String[] getHostBatches(String url) throws MalformedURLException {
-   return getHostBatches(new URL(url));
+  /**
+   * Partitions of the hostname of the url by "."
+   * 
+   * @throws MalformedURLException
+   */
+  public static String[] getHostBatches(String url)
+      throws MalformedURLException {
+    return getHostBatches(new URL(url));
   }
 
   /**
-   * <p>Given two urls, a src and a destination of a redirect, it returns the 
-   * representative url.<p>
+   * <p>
+   * Given two urls, a src and a destination of a redirect, it returns the
+   * representative url.
+   * <p>
    * 
-   * <p>This method implements an extended version of the algorithm used by the
+   * <p>
+   * This method implements an extended version of the algorithm used by the
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html"> How
@@ -200,27 +222,39 @@
    * <br>
    * <ol>
    * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the 
+   * <li>If different domains the keep the destination whether or not the
    * redirect is temp or perm</li>
-   * <ul><li>a.com -> b.com*</li></ul>
+   * <ul>
+   * <li>a.com -> b.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and the source is root, keep the source.</li>
-   * <ul><li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li></ul>
-   * <li>If the redirect is permanent and the source is not root and the 
+   * <ul>
+   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
+   * </ul>
+   * <li>If the redirect is permanent and the source is not root and the
    * destination is root, keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and neither the source nor the destination
    * is root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com/abc/page.html*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
+   * </ul>
    * <li>If the redirect is temporary and source is root and destination is not
    * root, then keep the source</li>
-   * <ul><li>*a.com -> a.com/xyz/index.html</li></ul>
+   * <ul>
+   * <li>*a.com -> a.com/xyz/index.html</li>
+   * </ul>
    * <li>If the redirect is temporary and source is not root and destination is
    * root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is temporary and neither the source or the destination
-   * is root, then keep the shortest url.  First check for the shortest host,
-   * and if both are equal then check by path.  Path is first by length then by
-   * the number of / path separators.</li>
+   * is root, then keep the shortest url. First check for the shortest host, and
+   * if both are equal then check by path. Path is first by length then by the
+   * number of / path separators.</li>
    * <ul>
    * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
    * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
@@ -227,19 +261,24 @@
    * </ul>
    * <li>If the redirect is temporary and both the source and the destination
    * are root, then keep the shortest sub-domain</li>
-   * <ul><li>*www.a.com -> www.news.a.com</li></ul>
+   * <ul>
+   * <li>*www.a.com -> www.news.a.com</li>
+   * </ul>
    * <br>
-   * While not in this logic there is a further piece of representative url 
-   * logic that occurs during indexing and after scoring.  During creation of 
-   * the basic fields before indexing, if a url has a representative url stored
-   * we check both the url and its representative url (which should never be 
-   * the same) against their linkrank scores and the highest scoring one is 
-   * kept as the url and the lower scoring one is held as the orig url inside 
-   * of the index.
+   * While not in this logic there is a further piece of representative url
+   * logic that occurs during indexing and after scoring. During creation of the
+   * basic fields before indexing, if a url has a representative url stored we
+   * check both the url and its representative url (which should never be the
+   * same) against their linkrank scores and the highest scoring one is kept as
+   * the url and the lower scoring one is held as the orig url inside of the
+   * index.
    * 
-   * @param src The source url.
-   * @param dst The destination url.
-   * @param temp Is the redirect a temporary redirect.
+   * @param src
+   *          The source url.
+   * @param dst
+   *          The destination url.
+   * @param temp
+   *          Is the redirect a temporary redirect.
    * 
    * @return String The representative url.
    */
@@ -251,8 +290,7 @@
     try {
       srcUrl = new URL(src);
       dstUrl = new URL(dst);
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return dst;
     }
 
@@ -270,27 +308,27 @@
 
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
-    //    
+    //
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
-    //      
+    //
     // 3) permanent and not root and dest root, keep dest
     // a.com/xyz/index.html -> a.com*
-    //      
+    //
     // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
-    //      
+    //
     // 5) temp and root and dest not root keep src
     // *a.com -> a.com/xyz/index.html
-    //  
+    //
     // 7) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
-    //  
+    //
     // 8) temp and neither root, keep shortest, if hosts equal by path else by
     // hosts. paths are first by length then by number of / separators
     // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
-    //  
+    //
     // 9) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
 
@@ -302,25 +340,21 @@
 
     // if it is a permanent redirect
     if (!temp) {
-      
+
       // if source is root return source, otherwise destination
       if (srcRoot) {
         return src;
-      }
-      else {
+      } else {
         return dst;
       }
-    }
-    else { // temporary redirect
+    } else { // temporary redirect
 
       // source root and destination not root
       if (srcRoot && !destRoot) {
         return src;
-      }
-      else if (!srcRoot && destRoot) { // destination root and source not
+      } else if (!srcRoot && destRoot) { // destination root and source not
         return dst;
-      }
-      else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
+      } else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
 
         // source and destination hosts are the same, check paths, host length
         int numSrcPaths = srcFile.split("/").length;
@@ -327,14 +361,12 @@
         int numDstPaths = dstFile.split("/").length;
         if (numSrcPaths != numDstPaths) {
           return (numDstPaths < numSrcPaths ? dst : src);
-        }
-        else {
+        } else {
           int srcPathLength = srcFile.length();
           int dstPathLength = dstFile.length();
           return (dstPathLength < srcPathLength ? dst : src);
         }
-      }
-      else {
+      } else {
 
         // different host names and both root take the shortest
         int numSrcSubs = srcHost.split("\\.").length;
@@ -348,24 +380,25 @@
    * Returns the lowercased hostname for the url or null if the url is not well
    * formed.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The hostname for the url.
    */
   public static String getHost(String url) {
     try {
       return new URL(url).getHost().toLowerCase();
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
 
   /**
-   * Returns the page for the url.  The page consists of the protocol, host,
-   * and path, but does not include the query string.  The host is lowercased
-   * but the path is not.
+   * Returns the page for the url. The page consists of the protocol, host, and
+   * path, but does not include the query string. The host is lowercased but the
+   * path is not.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The page for the url.
    */
   public static String getPage(String url) {
@@ -374,12 +407,11 @@
       url = url.toLowerCase();
       String queryStr = new URL(url).getQuery();
       return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
-  
+
   public static String toASCII(String url) {
     try {
       URL u = new URL(url);
@@ -389,17 +421,11 @@
         // also do not add additional slashes for file: URLs (NUTCH-1880)
         return url;
       }
-      URI p = new URI(u.getProtocol(),
-        u.getUserInfo(),
-        IDN.toASCII(host),
-        u.getPort(),
-        u.getPath(),
-        u.getQuery(),
-        u.getRef());
+      URI p = new URI(u.getProtocol(), u.getUserInfo(), IDN.toASCII(host),
+          u.getPort(), u.getPath(), u.getQuery(), u.getRef());
 
       return p.toString();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       return null;
     }
   }
@@ -432,26 +458,23 @@
       }
 
       return sb.toString();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       return null;
     }
   }
 
+  /** For testing */
+  public static void main(String[] args) {
 
-  /** For testing */
-  public static void main(String[] args){
-    
-    if(args.length!=1) {
+    if (args.length != 1) {
       System.err.println("Usage : URLUtil <url>");
-      return ;
+      return;
     }
-    
+
     String url = args[0];
     try {
       System.out.println(URLUtil.getDomainName(new URL(url)));
-    }
-    catch (MalformedURLException ex) {
+    } catch (MalformedURLException ex) {
       ex.printStackTrace();
     }
   }
Index: src/java/org/apache/nutch/util/WebPageWritable.java
===================================================================
--- src/java/org/apache/nutch/util/WebPageWritable.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/WebPageWritable.java	(working copy)
@@ -26,8 +26,7 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-public class WebPageWritable extends Configured
-implements Writable {
+public class WebPageWritable extends Configured implements Writable {
 
   private WebPage webPage;
 
@@ -53,7 +52,7 @@
   public WebPage getWebPage() {
     return webPage;
   }
-  
+
   public void setWebPage(WebPage webPage) {
     this.webPage = webPage;
   }
Index: src/java/org/apache/nutch/util/domain/DomainStatistics.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainStatistics.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainStatistics.java	(working copy)
@@ -71,7 +71,8 @@
   public int run(String[] args) throws IOException, ClassNotFoundException,
       InterruptedException {
     if (args.length < 2) {
-      System.out.println("usage: DomainStatistics outDir host|domain|suffix [-numReducers n] [-crawlId <id>]");
+      System.out
+          .println("usage: DomainStatistics outDir host|domain|suffix [-numReducers n] [-crawlId <id>]");
       return 1;
     }
     String outputDir = args[0];
@@ -193,9 +194,8 @@
     }
 
     @Override
-    protected void map(
-        String key, WebPage value, Context context) 
-            throws IOException, InterruptedException {
+    protected void map(String key, WebPage value, Context context)
+        throws IOException, InterruptedException {
       if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
         try {
           URL url = new URL(TableUtil.unreverseUrl(key.toString()));
Index: src/java/org/apache/nutch/util/domain/DomainSuffix.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffix.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffix.java	(working copy)
@@ -18,17 +18,18 @@
 package org.apache.nutch.util.domain;
 
 /**
- * This class represents the last part of the host name, 
- * which is operated by authoritives, not individuals. This information 
- * is needed to find the domain name of a host. The domain name of a host
- * is defined to be the last part before the domain suffix, w/o subdomain 
- * names.  As an example the domain name of <br><code> http://lucene.apache.org/ 
- * </code><br> is <code> apache.org</code>   
- * <br>
- * This class holds three fields,  
- * <strong>domain</strong> field represents the suffix (such as "co.uk")
- * <strong>boost</strong> is a float for boosting score of url's with this suffix
- * <strong>status</strong> field represents domain's status
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
  * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see TopLevelDomain
@@ -37,10 +38,10 @@
 public class DomainSuffix {
 
   /**
-   * Enumeration of the status of the tld. Please see domain-suffixes.xml. 
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
    */
-  public enum Status { INFRASTRUCTURE, SPONSORED, UNSPONSORED
-    , STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
   };
 
   private String domain;
@@ -49,7 +50,7 @@
 
   public static final float DEFAULT_BOOST = 1.0f;
   public static final Status DEFAULT_STATUS = Status.IN_USE;
-  
+
   public DomainSuffix(String domain, Status status, float boost) {
     this.domain = domain;
     this.status = status;
@@ -59,7 +60,7 @@
   public DomainSuffix(String domain) {
     this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
   }
-  
+
   public String getDomain() {
     return domain;
   }
@@ -71,7 +72,7 @@
   public float getBoost() {
     return boost;
   }
-  
+
   @Override
   public String toString() {
     return domain;
Index: src/java/org/apache/nutch/util/domain/DomainSuffixes.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(working copy)
@@ -25,40 +25,43 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * Storage class for <code>DomainSuffix</code> objects 
- * Note: this class is singleton
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class DomainSuffixes {
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixes.class);
-  
-  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>(); 
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
   private static DomainSuffixes instance;
-  
+
   /** private ctor */
   private DomainSuffixes() {
     String file = "domain-suffixes.xml";
-    InputStream input = this.getClass().getClassLoader().getResourceAsStream(file);
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
     try {
       new DomainSuffixesReader().read(this, input);
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       LOG.warn(StringUtils.stringifyException(ex));
     }
   }
-  
+
   /**
    * Singleton instance, lazy instantination
+   * 
    * @return
    */
   public static DomainSuffixes getInstance() {
-    if(instance == null) {
+    if (instance == null) {
       instance = new DomainSuffixes();
     }
     return instance;
   }
-  
+
   void addDomainSuffix(DomainSuffix tld) {
     domains.put(tld.getDomain(), tld);
   }
@@ -65,17 +68,19 @@
 
   /** return whether the extension is a registered domain entry */
   public boolean isDomainSuffix(String extension) {
-    return domains.containsKey(extension); 
+    return domains.containsKey(extension);
   }
-    
+
   /**
-   * Return the {@link DomainSuffix} object for the extension, if 
-   * extension is a top level domain returned object will be an 
-   * instance of {@link TopLevelDomain}
-   * @param extension of the domain
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
    */
   public DomainSuffix get(String extension) {
     return domains.get(extension);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(working copy)
@@ -36,16 +36,17 @@
 import org.xml.sax.SAXException;
 
 /**
- * For parsing xml files containing domain suffix definitions.
- * Parsed xml files should validate against 
- * <code>domain-suffixes.xsd</code>  
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 class DomainSuffixesReader {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
 
-  void read(DomainSuffixes tldEntries, InputStream input) throws IOException{
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
     try {
 
       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@@ -54,28 +55,29 @@
       Document document = builder.parse(new InputSource(input));
 
       Element root = document.getDocumentElement();
-      
-      if(root != null && root.getTagName().equals("domains")) {
-        
-        Element tlds = (Element)root.getElementsByTagName("tlds").item(0);
-        Element suffixes = (Element)root.getElementsByTagName("suffixes").item(0);
-        
-        //read tlds
-        readITLDs(tldEntries, (Element)tlds.getElementsByTagName("itlds").item(0));
-        readGTLDs(tldEntries, (Element)tlds.getElementsByTagName("gtlds").item(0));
-        readCCTLDs(tldEntries, (Element)tlds.getElementsByTagName("cctlds").item(0));
-        
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
         readSuffixes(tldEntries, suffixes);
-      }
-      else {
+      } else {
         throw new IOException("xml file is not valid");
       }
-    }
-    catch (ParserConfigurationException ex) {
+    } catch (ParserConfigurationException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
-    }
-    catch (SAXException ex) {
+    } catch (SAXException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
     }
@@ -83,22 +85,24 @@
 
   void readITLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.INFRASTRUCTURE));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
     }
   }
-    
+
   void readGTLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.GENERIC));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
     }
   }
 
   void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readCCTLD((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
     }
   }
 
@@ -113,39 +117,40 @@
     String domain = el.getAttribute("domain");
     Status status = readStatus(el);
     float boost = readBoost(el);
-    String countryName = readCountryName(el); 
-    return new TopLevelDomain(domain, status, boost, countryName);  
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
   }
-  
+
   /** read optional field status */
   Status readStatus(Element el) {
     NodeList list = el.getElementsByTagName("status");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_STATUS;
     return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
   }
-  
+
   /** read optional field boost */
   float readBoost(Element el) {
     NodeList list = el.getElementsByTagName("boost");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_BOOST;
     return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
   }
-  
-  /** read field countryname 
-    */
+
+  /**
+   * read field countryname
+   */
   String readCountryName(Element el) throws IOException {
     NodeList list = el.getElementsByTagName("country");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       throw new IOException("Country name should be given");
     return list.item(0).getNodeValue();
   }
-  
+
   void readSuffixes(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("suffix");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readSuffix((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
     }
   }
 
@@ -155,5 +160,5 @@
     float boost = readBoost(el);
     return new DomainSuffix(domain, status, boost);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/TopLevelDomain.java
===================================================================
--- src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(working copy)
@@ -18,10 +18,11 @@
 package org.apache.nutch.util.domain;
 
 /**
- * (From wikipedia) A top-level domain (TLD) is the last part of an 
- * Internet domain name; that is, the letters which follow the final 
- * dot of any domain name. For example, in the domain name 
- * <code>www.website.com</code>, the top-level domain is <code>com</code>.
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see http://www.iana.org/
  * @see http://en.wikipedia.org/wiki/Top-level_domain
@@ -28,31 +29,36 @@
  */
 public class TopLevelDomain extends DomainSuffix {
 
-  public enum Type { INFRASTRUCTURE, GENERIC, COUNTRY };
-  
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
   private Type type;
   private String countryName = null;
-  
-  public TopLevelDomain(String domain, Type type, Status status, float boost){
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
     super(domain, status, boost);
     this.type = type;
   }
 
-  public TopLevelDomain(String domain, Status status, float boost, String countryName){
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
     super(domain, status, boost);
     this.type = Type.COUNTRY;
     this.countryName = countryName;
   }
-  
+
   public Type getType() {
     return type;
   }
 
-  /** Returns the country name if TLD is Country Code TLD
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
    * @return country name or null
-   */ 
-  public String getCountryName(){
+   */
+  public String getCountryName() {
     return countryName;
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/package-info.java
===================================================================
--- src/java/org/apache/nutch/util/package-info.java	(revision 1650444)
+++ src/java/org/apache/nutch/util/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Miscellaneous utility classes.
  */
 package org.apache.nutch.util;
+
Index: src/java/org/apache/nutch/webui/NutchUiApplication.java
===================================================================
--- src/java/org/apache/nutch/webui/NutchUiApplication.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/NutchUiApplication.java	(working copy)
@@ -34,7 +34,8 @@
 import de.agilecoders.wicket.extensions.markup.html.bootstrap.icon.FontAwesomeCssReference;
 
 @Component
-public class NutchUiApplication extends WebApplication implements ApplicationContextAware {
+public class NutchUiApplication extends WebApplication implements
+    ApplicationContextAware {
   private static final String THEME_NAME = "bootstrap";
   private ApplicationContext context;
 
@@ -56,7 +57,8 @@
     Bootstrap.install(this, settings);
     configureTheme(settings);
 
-    getComponentInstantiationListeners().add(new SpringComponentInjector(this, context));
+    getComponentInstantiationListeners().add(
+        new SpringComponentInjector(this, context));
   }
 
   private void configureTheme(BootstrapSettings settings) {
@@ -66,7 +68,8 @@
   }
 
   @Override
-  public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
+  public void setApplicationContext(ApplicationContext applicationContext)
+      throws BeansException {
     this.context = applicationContext;
   }
 }
Index: src/java/org/apache/nutch/webui/NutchUiServer.java
===================================================================
--- src/java/org/apache/nutch/webui/NutchUiServer.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/NutchUiServer.java	(working copy)
@@ -50,7 +50,7 @@
     HelpFormatter formatter = new HelpFormatter();
     try {
       commandLine = parser.parse(options, args);
-    }  catch (Exception e) {
+    } catch (Exception e) {
       formatter.printHelp("NutchUiServer", options, true);
       StringUtils.stringifyException(e);
     }
Index: src/java/org/apache/nutch/webui/client/NutchClient.java
===================================================================
--- src/java/org/apache/nutch/webui/client/NutchClient.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/NutchClient.java	(working copy)
@@ -36,7 +36,7 @@
   public String executeJob(JobConfig jobConfig);
 
   public JobInfo getJobInfo(String jobId);
-  
+
   public Map<String, String> getNutchConfig(String config);
 
   /**
Index: src/java/org/apache/nutch/webui/client/NutchClientFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/client/NutchClientFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/NutchClientFactory.java	(working copy)
@@ -42,7 +42,8 @@
     }
   }
 
-  private static class NutchClientCacheLoader extends CacheLoader<NutchInstance, NutchClient> {
+  private static class NutchClientCacheLoader extends
+      CacheLoader<NutchInstance, NutchClient> {
     @Override
     public NutchClient load(NutchInstance key) throws Exception {
       return new NutchClientImpl(key);
Index: src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java	(working copy)
@@ -43,8 +43,8 @@
   private List<RemoteCommand> remoteCommands;
   private List<RemoteCommand> executedCommands = Lists.newArrayList();
 
-  public CrawlingCycle(CrawlingCycleListener listener, RemoteCommandExecutor executor, Crawl crawl,
-      List<RemoteCommand> commands) {
+  public CrawlingCycle(CrawlingCycleListener listener,
+      RemoteCommandExecutor executor, Crawl crawl, List<RemoteCommand> commands) {
     this.listener = listener;
     this.executor = executor;
     this.crawl = crawl;
@@ -64,7 +64,7 @@
         listener.onCrawlError(crawl, jobInfo.getMsg());
         return;
       }
-      
+
       executedCommands.add(command);
       listener.commandExecuted(crawl, command, calculateProgress());
     }
@@ -75,7 +75,8 @@
     if (CollectionUtils.isEmpty(remoteCommands)) {
       return 0;
     }
-    return (int) ((float) executedCommands.size() / (float) remoteCommands.size() * 100);
+    return (int) ((float) executedCommands.size()
+        / (float) remoteCommands.size() * 100);
   }
 
 }
Index: src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java	(working copy)
@@ -46,7 +46,8 @@
 
   public void createClient() {
     ClientConfig clientConfig = new DefaultClientConfig();
-    clientConfig.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING, true);
+    clientConfig.getFeatures()
+        .put(JSONConfiguration.FEATURE_POJO_MAPPING, true);
     this.client = Client.create(clientConfig);
     this.nutchResource = client.resource(instance.getUrl());
   }
@@ -53,7 +54,8 @@
 
   @Override
   public NutchStatus getNutchStatus() {
-    return nutchResource.path("/admin").type(APPLICATION_JSON).get(NutchStatus.class);
+    return nutchResource.path("/admin").type(APPLICATION_JSON)
+        .get(NutchStatus.class);
   }
 
   @Override
@@ -66,12 +68,14 @@
 
   @Override
   public String executeJob(JobConfig jobConfig) {
-    return nutchResource.path("/job/create").type(APPLICATION_JSON).post(String.class, jobConfig);
+    return nutchResource.path("/job/create").type(APPLICATION_JSON)
+        .post(String.class, jobConfig);
   }
 
   @Override
   public JobInfo getJobInfo(String jobId) {
-    return nutchResource.path("/job/" + jobId).type(APPLICATION_JSON).get(JobInfo.class);
+    return nutchResource.path("/job/" + jobId).type(APPLICATION_JSON)
+        .get(JobInfo.class);
   }
 
   @Override
@@ -82,11 +86,13 @@
   @SuppressWarnings("unchecked")
   @Override
   public Map<String, String> getNutchConfig(String config) {
-    return nutchResource.path("/config/" + config).type(APPLICATION_JSON).get(Map.class);
+    return nutchResource.path("/config/" + config).type(APPLICATION_JSON)
+        .get(Map.class);
   }
-  
+
   @Override
   public String createSeed(SeedList seedList) {
-    return nutchResource.path("/seed/create").type(APPLICATION_JSON).post(String.class, seedList);
+    return nutchResource.path("/seed/create").type(APPLICATION_JSON)
+        .post(String.class, seedList);
   }
 }
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommand.java	(working copy)
@@ -70,6 +70,7 @@
     if (jobInfo != null) {
       statusInfo = MessageFormat.format("{0}", jobInfo.getState());
     }
-    return MessageFormat.format("{0} status: {1}", jobConfig.getType(), statusInfo);
+    return MessageFormat.format("{0} status: {1}", jobConfig.getType(),
+        statusInfo);
   }
 }
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java	(working copy)
@@ -40,10 +40,12 @@
     jobConfig.setConfId(configId);
     return this;
   }
+
   public RemoteCommandBuilder withCrawlId(String crawlId) {
     jobConfig.setCrawlId(crawlId);
     return this;
   }
+
   public RemoteCommandBuilder withArgument(String key, String value) {
     jobConfig.setArgument(key, value);
     return this;
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java	(working copy)
@@ -56,7 +56,8 @@
   public JobInfo executeRemoteJob(RemoteCommand command) {
     try {
       String jobId = client.executeJob(command.getJobConfig());
-      Future<JobInfo> chekerFuture = executor.submit(new JobStateChecker(jobId));
+      Future<JobInfo> chekerFuture = executor
+          .submit(new JobStateChecker(jobId));
       return chekerFuture.get(getTimeout(command), TimeUnit.MILLISECONDS);
     } catch (Exception e) {
       log.error("Remote command failed", e);
Index: src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java	(working copy)
@@ -62,8 +62,9 @@
   }
 
   private RemoteCommand inject() {
-    RemoteCommandBuilder builder = RemoteCommandBuilder.instance(JobType.INJECT)
-        .withCrawlId(crawl.getCrawlId()).withArgument("seedDir", crawl.getSeedDirectory());
+    RemoteCommandBuilder builder = RemoteCommandBuilder
+        .instance(JobType.INJECT).withCrawlId(crawl.getCrawlId())
+        .withArgument("seedDir", crawl.getSeedDirectory());
     return builder.build();
   }
 
@@ -72,7 +73,8 @@
   }
 
   private RemoteCommand createFetchCommand() {
-    return createBuilder(JobType.FETCH).withTimeout(Duration.standardSeconds(50)).build();
+    return createBuilder(JobType.FETCH).withTimeout(
+        Duration.standardSeconds(50)).build();
   }
 
   private RemoteCommand createParseCommand() {
@@ -88,8 +90,8 @@
   }
 
   private RemoteCommandBuilder createBuilder(JobType jobType) {
-    return RemoteCommandBuilder.instance(jobType).withCrawlId(crawl.getCrawlId())
-        .withArgument("batch", batchId);
+    return RemoteCommandBuilder.instance(jobType)
+        .withCrawlId(crawl.getCrawlId()).withArgument("batch", batchId);
   }
 
 }
Index: src/java/org/apache/nutch/webui/client/model/JobConfig.java
===================================================================
--- src/java/org/apache/nutch/webui/client/model/JobConfig.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/model/JobConfig.java	(working copy)
@@ -34,7 +34,7 @@
   public void setArgument(String key, String value) {
     args.put(key, value);
   }
-  
+
   public String getCrawlId() {
     return crawlId;
   }
Index: src/java/org/apache/nutch/webui/client/model/NutchStatus.java
===================================================================
--- src/java/org/apache/nutch/webui/client/model/NutchStatus.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/client/model/NutchStatus.java	(working copy)
@@ -22,7 +22,7 @@
 import java.util.Set;
 
 public class NutchStatus implements Serializable {
-  
+
   private Date startDate;
   private Set<String> configuration;
   private Collection<JobInfo> jobs;
Index: src/java/org/apache/nutch/webui/config/CustomDaoFactory.java
===================================================================
--- src/java/org/apache/nutch/webui/config/CustomDaoFactory.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/CustomDaoFactory.java	(working copy)
@@ -27,7 +27,8 @@
 
 public class CustomDaoFactory {
   private ConnectionSource connectionSource;
-  private List<Dao<?, ?>> registredDaos = Collections.synchronizedList(new ArrayList<Dao<?, ?>>());
+  private List<Dao<?, ?>> registredDaos = Collections
+      .synchronizedList(new ArrayList<Dao<?, ?>>());
 
   public CustomDaoFactory(ConnectionSource connectionSource) {
     this.connectionSource = connectionSource;
Index: src/java/org/apache/nutch/webui/config/CustomTableCreator.java
===================================================================
--- src/java/org/apache/nutch/webui/config/CustomTableCreator.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/CustomTableCreator.java	(working copy)
@@ -30,7 +30,8 @@
   private ConnectionSource connectionSource;
   private List<Dao<?, ?>> configuredDaos;
 
-  public CustomTableCreator(ConnectionSource connectionSource, List<Dao<?, ?>> configuredDaos) {
+  public CustomTableCreator(ConnectionSource connectionSource,
+      List<Dao<?, ?>> configuredDaos) {
     this.connectionSource = connectionSource;
     this.configuredDaos = configuredDaos;
     initialize();
@@ -38,7 +39,8 @@
 
   private void initialize() {
     if (configuredDaos == null) {
-      throw new IllegalStateException("configuredDaos was not set in " + getClass().getSimpleName());
+      throw new IllegalStateException("configuredDaos was not set in "
+          + getClass().getSimpleName());
     }
 
     for (Dao<?, ?> dao : configuredDaos) {
Index: src/java/org/apache/nutch/webui/config/SpringConfiguration.java
===================================================================
--- src/java/org/apache/nutch/webui/config/SpringConfiguration.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/config/SpringConfiguration.java	(working copy)
@@ -51,8 +51,8 @@
 
   @Bean
   public JdbcConnectionSource getConnectionSource() throws SQLException {
-    JdbcConnectionSource source = new JdbcConnectionSource("jdbc:h2:~/.nutch/config",
-        new H2DatabaseType());
+    JdbcConnectionSource source = new JdbcConnectionSource(
+        "jdbc:h2:~/.nutch/config", new H2DatabaseType());
     source.initialize();
     return source;
   }
@@ -84,7 +84,8 @@
 
   @Bean
   public CustomTableCreator createTableCreator() throws SQLException {
-    return new CustomTableCreator(getConnectionSource(), getDaoFactory().getCreatedDaos());
+    return new CustomTableCreator(getConnectionSource(), getDaoFactory()
+        .getCreatedDaos());
   }
 
 }
Index: src/java/org/apache/nutch/webui/model/NutchConfig.java
===================================================================
--- src/java/org/apache/nutch/webui/model/NutchConfig.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/model/NutchConfig.java	(working copy)
@@ -3,16 +3,19 @@
 public class NutchConfig {
   private String name = "name";
   private String value;
-  
-  public void setName (String name){
+
+  public void setName(String name) {
     this.name = name;
   }
-  public String getName(){
+
+  public String getName() {
     return this.name;
   }
+
   public String getValue() {
     return value;
   }
+
   public void setValue(String value) {
     this.value = value;
   }
Index: src/java/org/apache/nutch/webui/model/SeedUrl.java
===================================================================
--- src/java/org/apache/nutch/webui/model/SeedUrl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/model/SeedUrl.java	(working copy)
@@ -57,7 +57,7 @@
   public void setUrl(String url) {
     this.url = url;
   }
-  
+
   @JsonIgnore
   public SeedList getSeedList() {
     return seedList;
Index: src/java/org/apache/nutch/webui/pages/AbstractBasePage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/AbstractBasePage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/AbstractBasePage.java	(working copy)
@@ -74,16 +74,25 @@
     navbar.setPosition(Position.TOP);
     add(navbar);
 
-    addMenuItem(DashboardPage.class, "navbar.menu.dashboard", FontAwesomeIconType.dashboard);
-    addMenuItem(StatisticsPage.class, "navbar.menu.statistics", FontAwesomeIconType.bar_chart_o);
-    addMenuItem(InstancesPage.class, "navbar.menu.instances", FontAwesomeIconType.gears);
-    addMenuItem(SettingsPage.class, "navbar.menu.settings", FontAwesomeIconType.wrench);
-    addMenuItem(CrawlsPage.class, "navbar.menu.crawls", FontAwesomeIconType.refresh);
-    addMenuItem(SchedulingPage.class, "navbar.menu.scheduling", FontAwesomeIconType.clock_o);
-    addMenuItem(SearchPage.class, "navbar.menu.search", FontAwesomeIconType.search);
-    addMenuItem(SeedListsPage.class, "navbar.menu.seedLists", FontAwesomeIconType.file);
+    addMenuItem(DashboardPage.class, "navbar.menu.dashboard",
+        FontAwesomeIconType.dashboard);
+    addMenuItem(StatisticsPage.class, "navbar.menu.statistics",
+        FontAwesomeIconType.bar_chart_o);
+    addMenuItem(InstancesPage.class, "navbar.menu.instances",
+        FontAwesomeIconType.gears);
+    addMenuItem(SettingsPage.class, "navbar.menu.settings",
+        FontAwesomeIconType.wrench);
+    addMenuItem(CrawlsPage.class, "navbar.menu.crawls",
+        FontAwesomeIconType.refresh);
+    addMenuItem(SchedulingPage.class, "navbar.menu.scheduling",
+        FontAwesomeIconType.clock_o);
+    addMenuItem(SearchPage.class, "navbar.menu.search",
+        FontAwesomeIconType.search);
+    addMenuItem(SeedListsPage.class, "navbar.menu.seedLists",
+        FontAwesomeIconType.file);
 
-    navbar.addComponents(transform(ComponentPosition.RIGHT, addInstancesMenuMenu()));
+    navbar.addComponents(transform(ComponentPosition.RIGHT,
+        addInstancesMenuMenu()));
     navbar.addComponents(transform(ComponentPosition.RIGHT, addUserMenu()));
 
     add(new NotificationPanel("globalNotificationPanel"));
@@ -99,11 +108,13 @@
       @Override
       protected List<AbstractLink> newSubMenuButtons(final String buttonMarkupId) {
         List<AbstractLink> subMenu = Lists.newArrayList();
-        subMenu.add(new MenuBookmarkablePageLink<Void>(UserSettingsPage.class, new ResourceModel(
-            "navbar.userMenu.settings")).setIconType(FontAwesomeIconType.gear));
+        subMenu.add(new MenuBookmarkablePageLink<Void>(UserSettingsPage.class,
+            new ResourceModel("navbar.userMenu.settings"))
+            .setIconType(FontAwesomeIconType.gear));
         subMenu.add(new MenuDivider());
-        subMenu.add(new MenuBookmarkablePageLink<Void>(LogOutPage.class, new ResourceModel(
-            "navbar.userMenu.logout")).setIconType(FontAwesomeIconType.power_off));
+        subMenu.add(new MenuBookmarkablePageLink<Void>(LogOutPage.class,
+            new ResourceModel("navbar.userMenu.logout"))
+            .setIconType(FontAwesomeIconType.power_off));
         return subMenu;
       }
     }.setIconType(FontAwesomeIconType.user);
@@ -119,7 +130,8 @@
         List<NutchInstance> instances = instanceService.getInstances();
         List<AbstractLink> subMenu = Lists.newArrayList();
         for (NutchInstance instance : instances) {
-          subMenu.add(new Link<NutchInstance>(buttonMarkupId, Model.of(instance)) {
+          subMenu.add(new Link<NutchInstance>(buttonMarkupId, Model
+              .of(instance)) {
             @Override
             public void onClick() {
               currentInstance.setObject(getModelObject());
@@ -134,8 +146,10 @@
     return instancesMenu;
   }
 
-  private <P extends Page> void addMenuItem(Class<P> page, String label, IconType icon) {
-    Component button = new NavbarButton<Void>(page, Model.of(getString(label))).setIconType(icon);
+  private <P extends Page> void addMenuItem(Class<P> page, String label,
+      IconType icon) {
+    Component button = new NavbarButton<Void>(page, Model.of(getString(label)))
+        .setIconType(icon);
     navbar.addComponents(NavbarComponents.transform(LEFT, button));
   }
 
Index: src/java/org/apache/nutch/webui/pages/LogOutPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/LogOutPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/LogOutPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class LogOutPage extends AbstractBasePage{
+public class LogOutPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/SchedulingPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/SchedulingPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/SchedulingPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class SchedulingPage extends AbstractBasePage{
+public class SchedulingPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/SearchPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/SearchPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/SearchPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class SearchPage extends AbstractBasePage{
+public class SearchPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/StatisticsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/StatisticsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/StatisticsPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class StatisticsPage extends AbstractBasePage{
+public class StatisticsPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/UrlsUploadPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class UrlsUploadPage extends AbstractBasePage{
+public class UrlsUploadPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/UserSettingsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/UserSettingsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/UserSettingsPage.java	(working copy)
@@ -16,6 +16,6 @@
  */
 package org.apache.nutch.webui.pages;
 
-public class UserSettingsPage extends AbstractBasePage{
+public class UserSettingsPage extends AbstractBasePage {
 
 }
Index: src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/components/ColorEnumLabel.java	(working copy)
@@ -63,7 +63,8 @@
     }
   }
 
-  public static <E extends Enum<E>> ColorEnumLabelBuilder<E> getBuilder(String id) {
+  public static <E extends Enum<E>> ColorEnumLabelBuilder<E> getBuilder(
+      String id) {
     return new ColorEnumLabelBuilder<E>(id);
   }
 
Index: src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/components/CpmIteratorAdapter.java	(working copy)
@@ -21,9 +21,11 @@
 import org.apache.wicket.model.IModel;
 
 /**
- * This is iterator adapter, which wraps iterable items with CompoundPropertyModel.
+ * This is iterator adapter, which wraps iterable items with
+ * CompoundPropertyModel.
+ * 
  * @author feodor
- *
+ * 
  * @param <T>
  */
 public class CpmIteratorAdapter<T> extends ModelIteratorAdapter<T> {
Index: src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/crawls/CrawlPanel.java	(working copy)
@@ -65,8 +65,9 @@
     form.add(new TextField<String>("crawlName").setRequired(true));
 
     form.add(new DropDownChoice<Integer>("numberOfRounds", getNumbersOfRounds()));
-    form.add(new DropDownChoice<SeedList>("seedList", seedListService.findAll(),
-        new ChoiceRenderer<SeedList>("name")).setRequired(true));
+    form.add(new DropDownChoice<SeedList>("seedList",
+        seedListService.findAll(), new ChoiceRenderer<SeedList>("name"))
+        .setRequired(true));
 
     addButton(new AjaxSubmitLink("button", form) {
       @Override
Index: src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/crawls/CrawlsPage.java	(working copy)
@@ -103,7 +103,7 @@
       }
     }.add(new Label("crawlName")));
     item.add(new Label("seedList.name"));
-    
+
     item.add(new Label("progress"));
     item.add(createStatusLabel());
     item.add(new Link<Crawl>("start", item.getModel()) {
@@ -132,8 +132,8 @@
   }
 
   private EnumLabel<CrawlStatus> createStatusLabel() {
-    return new ColorEnumLabelBuilder<CrawlStatus>("status").withEnumColor(NEW, Default)
-        .withEnumColor(ERROR, Danger).withEnumColor(FINISHED, Success)
-        .withEnumColor(CRAWLING, Info).build();
+    return new ColorEnumLabelBuilder<CrawlStatus>("status")
+        .withEnumColor(NEW, Default).withEnumColor(ERROR, Danger)
+        .withEnumColor(FINISHED, Success).withEnumColor(CRAWLING, Info).build();
   }
 }
Index: src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/instances/InstancePanel.java	(working copy)
@@ -37,7 +37,8 @@
     form.add(new TextField<String>("host").setRequired(true));
     form.add(new TextField<Integer>("port").setRequired(true));
     form.add(new TextField<String>("username"));
-    form.add(new PasswordTextField("password").setResetPassword(false).setRequired(false));
+    form.add(new PasswordTextField("password").setResetPassword(false)
+        .setRequired(false));
 
     addButton(new AjaxSubmitLink("button", form) {
       @Override
Index: src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/instances/InstancesPage.java	(working copy)
@@ -53,11 +53,13 @@
   }
 
   private RefreshingView<NutchInstance> refreshingView() {
-    RefreshingView<NutchInstance> instances = new RefreshingView<NutchInstance>("instances") {
+    RefreshingView<NutchInstance> instances = new RefreshingView<NutchInstance>(
+        "instances") {
 
       @Override
       protected Iterator<IModel<NutchInstance>> getItemModels() {
-        return new CpmIteratorAdapter<NutchInstance>(instanceService.getInstances());
+        return new CpmIteratorAdapter<NutchInstance>(
+            instanceService.getInstances());
       }
 
       @Override
@@ -72,7 +74,8 @@
     return new AjaxLink<NutchInstance>("addInstance") {
       @Override
       public void onClick(AjaxRequestTarget target) {
-        instancePanel.setModel(new CompoundPropertyModel<NutchInstance>(new NutchInstance()));
+        instancePanel.setModel(new CompoundPropertyModel<NutchInstance>(
+            new NutchInstance()));
         target.add(instancePanel);
         instancePanel.appendShowDialogJavaScript(target);
       }
Index: src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/menu/VerticalMenu.java	(working copy)
@@ -18,7 +18,7 @@
 
 import de.agilecoders.wicket.core.markup.html.bootstrap.navbar.Navbar;
 
-public class VerticalMenu extends Navbar{
+public class VerticalMenu extends Navbar {
 
   public VerticalMenu(String componentId) {
     super(componentId);
Index: src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/seed/SeedListsPage.java	(working copy)
@@ -44,7 +44,8 @@
 
   public SeedListsPage() {
 
-    RefreshingView<SeedList> seedLists = new RefreshingView<SeedList>("seedLists") {
+    RefreshingView<SeedList> seedLists = new RefreshingView<SeedList>(
+        "seedLists") {
 
       @Override
       protected Iterator<IModel<SeedList>> getItemModels() {
@@ -56,7 +57,8 @@
         PageParameters params = new PageParameters();
         params.add("id", item.getModelObject().getId());
 
-        Link<Void> edit = new BookmarkablePageLink<Void>("edit", SeedPage.class, params);
+        Link<Void> edit = new BookmarkablePageLink<Void>("edit",
+            SeedPage.class, params);
         edit.add(new Label("name"));
         item.add(edit);
 
Index: src/java/org/apache/nutch/webui/pages/seed/SeedPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/seed/SeedPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/seed/SeedPage.java	(working copy)
@@ -122,7 +122,8 @@
   }
 
   private void addUrlForm() {
-    urlForm = new Form<SeedUrl>("urlForm", CompoundPropertyModel.of(Model.of(new SeedUrl())));
+    urlForm = new Form<SeedUrl>("urlForm", CompoundPropertyModel.of(Model
+        .of(new SeedUrl())));
     urlForm.setOutputMarkupId(true);
     urlForm.add(new TextField<String>("url"));
     urlForm.add(new AjaxSubmitLink("addUrl", urlForm) {
Index: src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java
===================================================================
--- src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/pages/settings/SettingsPage.java	(working copy)
@@ -26,12 +26,14 @@
   public SettingsPage() {
     settingsTable = new WebMarkupContainer("settingsTable");
     settingsTable.setOutputMarkupId(true);
-    RefreshingView<NutchConfig> nutchConfig = new RefreshingView<NutchConfig>("settings") {
+    RefreshingView<NutchConfig> nutchConfig = new RefreshingView<NutchConfig>(
+        "settings") {
 
       @Override
       protected Iterator<IModel<NutchConfig>> getItemModels() {
         return new CpmIteratorAdapter<NutchConfig>(
-            convertNutchConfig(nutchService.getNutchConfig(getCurrentInstance().getId())));
+            convertNutchConfig(nutchService.getNutchConfig(getCurrentInstance()
+                .getId())));
       }
 
       @Override
Index: src/java/org/apache/nutch/webui/service/CrawlService.java
===================================================================
--- src/java/org/apache/nutch/webui/service/CrawlService.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/CrawlService.java	(working copy)
@@ -22,7 +22,7 @@
 import org.apache.nutch.webui.model.NutchInstance;
 
 public interface CrawlService {
-  
+
   public void saveCrawl(Crawl crawl);
 
   public List<Crawl> getCrawls();
Index: src/java/org/apache/nutch/webui/service/NutchService.java
===================================================================
--- src/java/org/apache/nutch/webui/service/NutchService.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/NutchService.java	(working copy)
@@ -24,8 +24,8 @@
 public interface NutchService {
   public ConnectionStatus getConnectionStatus(Long instanceId);
 
-  public  Map<String, String> getNutchConfig(Long instanceId);
-  
+  public Map<String, String> getNutchConfig(Long instanceId);
+
   public NutchStatus getNutchStatus(Long instanceId);
 
 }
Index: src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/impl/CrawlServiceImpl.java	(working copy)
@@ -67,7 +67,7 @@
 
       CrawlingCycle cycle = new CrawlingCycle(this, executor, crawl, commands);
       cycle.executeCrawlCycle();
-      
+
     } catch (Exception e) {
       crawl.setStatus(CrawlStatus.ERROR);
       saveCrawl(crawl);
Index: src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java
===================================================================
--- src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java	(revision 1650444)
+++ src/java/org/apache/nutch/webui/service/impl/NutchServiceImpl.java	(working copy)
@@ -36,7 +36,8 @@
 
 @Service
 public class NutchServiceImpl implements NutchService {
-  private static final Logger logger = LoggerFactory.getLogger(NutchServiceImpl.class);
+  private static final Logger logger = LoggerFactory
+      .getLogger(NutchServiceImpl.class);
 
   @Resource
   private NutchClientFactory nutchClientFactory;
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(working copy)
@@ -38,101 +38,101 @@
 
 /** Adds basic searchable fields to a document. */
 public class CCIndexingFilter implements IndexingFilter {
-	public static final Logger LOG = LoggerFactory.getLogger(CCIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CCIndexingFilter.class);
 
-	/** The name of the document field we use. */
-	public static String FIELD = "cc";
+  /** The name of the document field we use. */
+  public static String FIELD = "cc";
 
-	private Configuration conf;
+  private Configuration conf;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	/**
-	 * Add the features represented by a license URL. Urls are of the form
-	 * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
-	 * license feature.
-	 */
-	public void addUrlFeatures(NutchDocument doc, String urlString) {
-		try {
-			URL url = new URL(urlString);
+  /**
+   * Add the features represented by a license URL. Urls are of the form
+   * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
+   * license feature.
+   */
+  public void addUrlFeatures(NutchDocument doc, String urlString) {
+    try {
+      URL url = new URL(urlString);
 
-			// tokenize the path of the url, breaking at slashes and dashes
-			StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
+      // tokenize the path of the url, breaking at slashes and dashes
+      StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
 
-			if (names.hasMoreTokens())
-				names.nextToken(); // throw away "licenses"
+      if (names.hasMoreTokens())
+        names.nextToken(); // throw away "licenses"
 
-			// add a feature per component after "licenses"
-			while (names.hasMoreTokens()) {
-				String feature = names.nextToken();
-				addFeature(doc, feature);
-			}
-		} catch (MalformedURLException e) {
-			if (LOG.isWarnEnabled()) {
-				LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
-			}
-		}
-	}
+      // add a feature per component after "licenses"
+      while (names.hasMoreTokens()) {
+        String feature = names.nextToken();
+        addFeature(doc, feature);
+      }
+    } catch (MalformedURLException e) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
+      }
+    }
+  }
 
-	private void addFeature(NutchDocument doc, String feature) {
-		doc.add(FIELD, feature);
-	}
+  private void addFeature(NutchDocument doc, String feature) {
+    doc.add(FIELD, feature);
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
 
-		ByteBuffer blicense = page.getMetadata().get(new Utf8(
-				CreativeCommons.LICENSE_URL));
-		if (blicense != null) {
-			String licenseUrl = Bytes.toString(blicense);
-			if (LOG.isInfoEnabled()) {
-				LOG.info("CC: indexing " + licenseUrl + " for: "
-						+ url.toString());
-			}
+    ByteBuffer blicense = page.getMetadata().get(
+        new Utf8(CreativeCommons.LICENSE_URL));
+    if (blicense != null) {
+      String licenseUrl = Bytes.toString(blicense);
+      if (LOG.isInfoEnabled()) {
+        LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
+      }
 
-			// add the entire license as cc:license=xxx
-			addFeature(doc, "license=" + licenseUrl);
+      // add the entire license as cc:license=xxx
+      addFeature(doc, "license=" + licenseUrl);
 
-			// index license attributes extracted of the license url
-			addUrlFeatures(doc, licenseUrl);
-		}
+      // index license attributes extracted of the license url
+      addUrlFeatures(doc, licenseUrl);
+    }
 
-		// index the license location as cc:meta=xxx
-		ByteBuffer blicenseloc = page.getMetadata().get(new Utf8(
-				CreativeCommons.LICENSE_LOCATION));
-		if (blicenseloc != null) {
-			String licenseLocation = Bytes.toString(blicenseloc);
-			addFeature(doc, "meta=" + licenseLocation);
-		}
+    // index the license location as cc:meta=xxx
+    ByteBuffer blicenseloc = page.getMetadata().get(
+        new Utf8(CreativeCommons.LICENSE_LOCATION));
+    if (blicenseloc != null) {
+      String licenseLocation = Bytes.toString(blicenseloc);
+      addFeature(doc, "meta=" + licenseLocation);
+    }
 
-		// index the work type cc:type=xxx
-		ByteBuffer bworkType = page.getMetadata().get(new Utf8(
-				CreativeCommons.WORK_TYPE));
-		if (bworkType != null) {
-			String workType = Bytes.toString(bworkType);
-			addFeature(doc, workType);
-		}
+    // index the work type cc:type=xxx
+    ByteBuffer bworkType = page.getMetadata().get(
+        new Utf8(CreativeCommons.WORK_TYPE));
+    if (bworkType != null) {
+      String workType = Bytes.toString(bworkType);
+      addFeature(doc, workType);
+    }
 
-		return doc;
-	}
+    return doc;
+  }
 
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -55,8 +55,8 @@
     }
 
     /** Scan the document adding attributes to metadata. */
-    public static void walk(Node doc, URL base, WebPage page,
-        Configuration conf) throws ParseException {
+    public static void walk(Node doc, URL base, WebPage page, Configuration conf)
+        throws ParseException {
 
       // walk the DOM tree, scanning for license data
       Walker walker = new Walker(base);
@@ -67,7 +67,7 @@
       String licenseLocation = null;
       if (walker.rdfLicense != null) { // 1st choice: subject in RDF
         licenseLocation = "rdf";
-	licenseUrl = walker.rdfLicense;
+        licenseUrl = walker.rdfLicense;
       } else if (walker.relLicense != null) { // 2nd: anchor w/
         // rel=license
         licenseLocation = "rel";
@@ -74,29 +74,30 @@
         licenseUrl = walker.relLicense.toString();
       } else if (walker.anchorLicense != null) { // 3rd: anchor w/ CC
         // license
-	licenseLocation = "a";
-	licenseUrl = walker.anchorLicense.toString();
+        licenseLocation = "a";
+        licenseUrl = walker.anchorLicense.toString();
       } else if (conf.getBoolean("creativecommons.exclude.unlicensed", false)) {
-          throw new ParseException("No CC license.  Excluding.");
+        throw new ParseException("No CC license.  Excluding.");
       }
 
       // add license to metadata
       if (licenseUrl != null) {
         if (LOG.isDebugEnabled()) {
-	  LOG.debug("CC: found " + licenseUrl + " in " + licenseLocation + " of " + base);
-	}
-	page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_URL),
-	ByteBuffer.wrap(licenseUrl.getBytes()));
-	page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_LOCATION),
-	    ByteBuffer.wrap(licenseLocation.getBytes()));
+          LOG.debug("CC: found " + licenseUrl + " in " + licenseLocation
+              + " of " + base);
+        }
+        page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_URL),
+            ByteBuffer.wrap(licenseUrl.getBytes()));
+        page.getMetadata().put(new Utf8(CreativeCommons.LICENSE_LOCATION),
+            ByteBuffer.wrap(licenseLocation.getBytes()));
       }
 
       if (walker.workType != null) {
         if (LOG.isDebugEnabled()) {
-	  LOG.debug("CC: found " + walker.workType + " in " + base);
-	}
-	page.getMetadata().put(new Utf8(CreativeCommons.WORK_TYPE),
-	   ByteBuffer.wrap(walker.workType.getBytes()));
+          LOG.debug("CC: found " + walker.workType + " in " + base);
+        }
+        page.getMetadata().put(new Utf8(CreativeCommons.WORK_TYPE),
+            ByteBuffer.wrap(walker.workType.getBytes()));
       }
 
     }
@@ -121,8 +122,8 @@
     }
 
     /**
-     * Extract license url from element, if any. Thse are the href attribute
-     * of anchor elements with rel="license". These must also point to
+     * Extract license url from element, if any. Thse are the href attribute of
+     * anchor elements with rel="license". These must also point to
      * http://creativecommons.org/licenses/.
      */
     private void findLicenseUrl(Element element) {
@@ -137,27 +138,27 @@
       try {
         URL url = new URL(base, href); // resolve the url
         // check that it's a CC license URL
-	if ("http".equalsIgnoreCase(url.getProtocol())
-	    && "creativecommons.org".equalsIgnoreCase(url.getHost())
-	    && url.getPath() != null && url.getPath().startsWith("/licenses/")
-	    && url.getPath().length() > "/licenses/".length()) {
+        if ("http".equalsIgnoreCase(url.getProtocol())
+            && "creativecommons.org".equalsIgnoreCase(url.getHost())
+            && url.getPath() != null && url.getPath().startsWith("/licenses/")
+            && url.getPath().length() > "/licenses/".length()) {
 
-	  // check rel="license"
-	  String rel = element.getAttribute("rel");
-	  if (rel != null && "license".equals(rel)
-	      && this.relLicense == null) {
-	    this.relLicense = url; // found rel license
-	  } else if (this.anchorLicense == null) {
-	    this.anchorLicense = url; // found anchor license
-	  }
-	}
+          // check rel="license"
+          String rel = element.getAttribute("rel");
+          if (rel != null && "license".equals(rel) && this.relLicense == null) {
+            this.relLicense = url; // found rel license
+          } else if (this.anchorLicense == null) {
+            this.anchorLicense = url; // found anchor license
+          }
+        }
       } catch (MalformedURLException e) { // ignore malformed urls
       }
     }
 
     /** Configure a namespace aware XML parser. */
-    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory.newInstance();
-      
+    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory
+        .newInstance();
+
     static {
       FACTORY.setNamespaceAware(true);
     }
@@ -177,129 +178,132 @@
       if (rdfPosition < 0)
         return; // no RDF, abort
       int nsPosition = comment.indexOf(CC_NS);
-        if (nsPosition < 0)
-	  return; // no RDF, abort
-	// try to parse the XML
-	Document doc;
-	try {
-          DocumentBuilder parser = FACTORY.newDocumentBuilder();
-	  doc = parser.parse(new InputSource(new StringReader(comment)));
-	} catch (Exception e) {
-	  if (LOG.isWarnEnabled()) {
-	    LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
-	  }
-	  // e.printStackTrace();
-	  return;
-	}
+      if (nsPosition < 0)
+        return; // no RDF, abort
+      // try to parse the XML
+      Document doc;
+      try {
+        DocumentBuilder parser = FACTORY.newDocumentBuilder();
+        doc = parser.parse(new InputSource(new StringReader(comment)));
+      } catch (Exception e) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
+        }
+        // e.printStackTrace();
+        return;
+      }
 
-	// check that root is rdf:RDF
-	NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
-	if (roots.getLength() != 1) {
-	  if (LOG.isWarnEnabled()) {
-	    LOG.warn("CC: No RDF root in " + base);
-	  }
-	  return;
-	}
-	Element rdf = (Element) roots.item(0);
+      // check that root is rdf:RDF
+      NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
+      if (roots.getLength() != 1) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: No RDF root in " + base);
+        }
+        return;
+      }
+      Element rdf = (Element) roots.item(0);
 
-	// get cc:License nodes inside rdf:RDF
-	NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
-	for (int i = 0; i < licenses.getLength(); i++) {
-          Element l = (Element) licenses.item(i);
-	  // license is rdf:about= attribute from cc:License
-	  this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
+      // get cc:License nodes inside rdf:RDF
+      NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
+      for (int i = 0; i < licenses.getLength(); i++) {
+        Element l = (Element) licenses.item(i);
+        // license is rdf:about= attribute from cc:License
+        this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
 
-          // walk predicates of cc:License
-	  NodeList predicates = l.getChildNodes();
-	  for (int j = 0; j < predicates.getLength(); j++) {
-	    Node predicateNode = predicates.item(j);
-	    if (!(predicateNode instanceof Element))
-	      continue;
-	      Element predicateElement = (Element) predicateNode;
-              // extract predicates of cc:xxx predicates
-	      if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
-	        continue;
-	      }
-	      String predicate = predicateElement.getLocalName();
-              // object is rdf:resource from cc:xxx predicates
-	      String object = predicateElement.getAttributeNodeNS(RDF_NS, "resource").getValue();
-              // add object and predicate to metadata
-	      // metadata.put(object, predicate);
-	      //if (LOG.isInfoEnabled()) {
-	      // LOG.info("CC: found: "+predicate+"="+object);
-	      // }
-	  }
-	}
+        // walk predicates of cc:License
+        NodeList predicates = l.getChildNodes();
+        for (int j = 0; j < predicates.getLength(); j++) {
+          Node predicateNode = predicates.item(j);
+          if (!(predicateNode instanceof Element))
+            continue;
+          Element predicateElement = (Element) predicateNode;
+          // extract predicates of cc:xxx predicates
+          if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
+            continue;
+          }
+          String predicate = predicateElement.getLocalName();
+          // object is rdf:resource from cc:xxx predicates
+          String object = predicateElement.getAttributeNodeNS(RDF_NS,
+              "resource").getValue();
+          // add object and predicate to metadata
+          // metadata.put(object, predicate);
+          // if (LOG.isInfoEnabled()) {
+          // LOG.info("CC: found: "+predicate+"="+object);
+          // }
+        }
+      }
 
-	// get cc:Work nodes from rdf:RDF
-	NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
-	for (int i = 0; i < works.getLength(); i++) {
-	  Element l = (Element) works.item(i);
+      // get cc:Work nodes from rdf:RDF
+      NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
+      for (int i = 0; i < works.getLength(); i++) {
+        Element l = (Element) works.item(i);
 
-	  // get dc:type nodes from cc:Work
-	  NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
-	  for (int j = 0; j < types.getLength(); j++) {
-	    Element type = (Element) types.item(j);
-	    String workUri = type.getAttributeNodeNS(RDF_NS, "resource").getValue();
-	    this.workType = (String) WORK_TYPE_NAMES.get(workUri);
-	    break;
-	  }
-	}
+        // get dc:type nodes from cc:Work
+        NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
+        for (int j = 0; j < types.getLength(); j++) {
+          Element type = (Element) types.item(j);
+          String workUri = type.getAttributeNodeNS(RDF_NS, "resource")
+              .getValue();
+          this.workType = (String) WORK_TYPE_NAMES.get(workUri);
+          break;
+        }
       }
     }
+  }
 
-    private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
-      
-    static {
-      FIELDS.add(WebPage.Field.BASE_URL);
-      FIELDS.add(WebPage.Field.METADATA);
-    }
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-    private static final HashMap<String,String> WORK_TYPE_NAMES = new HashMap<String,String>();
-        
-    static {
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive", "interactive");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
-      WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
-    }
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-    private Configuration conf;
+  private static final HashMap<String, String> WORK_TYPE_NAMES = new HashMap<String, String>();
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-    }
+  static {
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
+        "interactive");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
+  }
 
-    public Configuration getConf() {
-      return this.conf;
-    }
+  private Configuration conf;
 
-    @Override
-    public Collection<Field> getFields() {
-      return FIELDS;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-    /**
-     * Adds metadata or otherwise modifies a parse of an HTML document, given
-     * the DOM tree of a page.
-     */
-    @Override
-    public Parse filter(String url, WebPage page, Parse parse,
-        HTMLMetaTags metaTags, DocumentFragment doc) {
-      // construct base url
-      URL base;
-      try {
-        base = new URL(page.getBaseUrl().toString());
-	// extract license metadata
-	Walker.walk(doc, base, page, getConf());
-      } catch (Exception e) {
-        LOG.error("Error parsing " + url, e);
-	return ParseStatusUtils.getEmptyParse(e, getConf());
-      }
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-      return parse;
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
+
+  /**
+   * Adds metadata or otherwise modifies a parse of an HTML document, given the
+   * DOM tree of a page.
+   */
+  @Override
+  public Parse filter(String url, WebPage page, Parse parse,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+    // construct base url
+    URL base;
+    try {
+      base = new URL(page.getBaseUrl().toString());
+      // extract license metadata
+      Walker.walk(doc, base, page, getConf());
+    } catch (Exception e) {
+      LOG.error("Error parsing " + url, e);
+      return ParseStatusUtils.getEmptyParse(e, getConf());
     }
+
+    return parse;
+  }
 }
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 1650444)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -36,52 +36,50 @@
 
 public class TestCCParseFilter {
 
-	private static final File testDir = new File(
-			System.getProperty("test.input"));
+  private static final File testDir = new File(System.getProperty("test.input"));
 
   @Test
-	public void testPages() throws Exception {
-		pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
-		// Tika returns <a> whereas parse-html returns <rel>
-		// check later
-		pageTest(new File(testDir, "rel.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
-		// Tika returns <a> whereas parse-html returns <rdf>
-		// check later
-		pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
-	}
+  public void testPages() throws Exception {
+    pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
+    // Tika returns <a> whereas parse-html returns <rel>
+    // check later
+    pageTest(new File(testDir, "rel.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
+    // Tika returns <a> whereas parse-html returns <rdf>
+    // check later
+    pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
+  }
 
-	public void pageTest(File file, String url, String license,
-			String location, String type) throws Exception {
+  public void pageTest(File file, String url, String license, String location,
+      String type) throws Exception {
 
-		InputStream in = new FileInputStream(file);
-		ByteArrayOutputStream out = new ByteArrayOutputStream(
-				(int) file.length());
-		byte[] buffer = new byte[1024];
-		int i;
-		while ((i = in.read(buffer)) != -1) {
-			out.write(buffer, 0, i);
-		}
-		in.close();
-		byte[] bytes = out.toByteArray();
-		Configuration conf = NutchConfiguration.create();
+    InputStream in = new FileInputStream(file);
+    ByteArrayOutputStream out = new ByteArrayOutputStream((int) file.length());
+    byte[] buffer = new byte[1024];
+    int i;
+    while ((i = in.read(buffer)) != -1) {
+      out.write(buffer, 0, i);
+    }
+    in.close();
+    byte[] bytes = out.toByteArray();
+    Configuration conf = NutchConfiguration.create();
 
-		WebPage page = WebPage.newBuilder().build();
-		page.setBaseUrl(new Utf8(url));
-		page.setContent(ByteBuffer.wrap(bytes));
-		MimeUtil mimeutil = new MimeUtil(conf);
-		String mtype = mimeutil.getMimeType(file);
-		page.setContentType(new Utf8(mtype));
+    WebPage page = WebPage.newBuilder().build();
+    page.setBaseUrl(new Utf8(url));
+    page.setContent(ByteBuffer.wrap(bytes));
+    MimeUtil mimeutil = new MimeUtil(conf);
+    String mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype));
 
-		new ParseUtil(conf).parse(url, page);
+    new ParseUtil(conf).parse(url, page);
 
-		ByteBuffer bb = page.getMetadata().get(new Utf8("License-Url"));
-		assertEquals(license, Bytes.toString(bb));
-		bb = page.getMetadata().get(new Utf8("License-Location"));
-		assertEquals(location, Bytes.toString(bb));
-		bb = page.getMetadata().get(new Utf8("Work-Type"));
-        assertEquals(type, Bytes.toString(bb));
-	}
+    ByteBuffer bb = page.getMetadata().get(new Utf8("License-Url"));
+    assertEquals(license, Bytes.toString(bb));
+    bb = page.getMetadata().get(new Utf8("License-Location"));
+    assertEquals(location, Bytes.toString(bb));
+    bb = page.getMetadata().get(new Utf8("Work-Type"));
+    assertEquals(type, Bytes.toString(bb));
+  }
 }
Index: src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(working copy)
@@ -32,13 +32,15 @@
 import java.util.Map.Entry;
 
 /**
- * Indexing filter that offers an option to either index all inbound anchor text for 
- * a document or deduplicate anchors. Deduplication does have it's con's, 
+ * Indexing filter that offers an option to either index all inbound anchor text
+ * for a document or deduplicate anchors. Deduplication does have it's con's,
+ * 
  * @see {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
  */
 public class AnchorIndexingFilter implements IndexingFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(AnchorIndexingFilter.class);
   private Configuration conf;
   private boolean deduplicate = false;
 
@@ -47,7 +49,7 @@
   static {
     FIELDS.add(WebPage.Field.INLINKS);
   }
-  
+
   /**
    * Set the {@link Configuration} object
    */
@@ -57,7 +59,7 @@
     deduplicate = conf.getBoolean("anchorIndexingFilter.deduplicate", false);
     LOG.info("Anchor deduplication is: " + (deduplicate ? "on" : "off"));
   }
-  
+
   /**
    * Get the {@link Configuration} object
    */
@@ -64,18 +66,21 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
   public void addIndexBackendOptions(Configuration conf) {
   }
-  
+
   /**
-   * The {@link AnchorIndexingFilter} filter object which supports boolean 
-   * configuration settings for the deduplication of anchors. 
-   * See {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
-   *  
-   * @param doc The {@link NutchDocument} object
-   * @param url URL to be filtered for anchor text
-   * @param page {@link WebPage} object relative to the URL
+   * The {@link AnchorIndexingFilter} filter object which supports boolean
+   * configuration settings for the deduplication of anchors. See
+   * {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
+   * 
+   * @param doc
+   *          The {@link NutchDocument} object
+   * @param url
+   *          URL to be filtered for anchor text
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @return filtered NutchDocument
    */
   @Override
@@ -82,15 +87,16 @@
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
     HashSet<String> set = null;
-    
+
     for (Entry<CharSequence, CharSequence> e : page.getInlinks().entrySet()) {
       String anchor = TableUtil.toString(e.getValue());
-      
-      if(anchor.equals(""))
+
+      if (anchor.equals(""))
         continue;
-      
+
       if (deduplicate) {
-        if (set == null) set = new HashSet<String>();
+        if (set == null)
+          set = new HashSet<String>();
         String lcAnchor = anchor.toLowerCase();
 
         // Check if already processed the current anchor
@@ -104,15 +110,14 @@
         doc.add("anchor", anchor);
       }
     }
-    
+
     return doc;
   }
-  
+
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<WebPage.Field> getFields() {
Index: src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java	(working copy)
@@ -25,13 +25,12 @@
 import static org.junit.Assert.*;
 
 /**
- * JUnit test case which tests
- * 1. that anchor text is obtained
- * 2. that anchor deduplication functionality is working
- *
+ * JUnit test case which tests 1. that anchor text is obtained 2. that anchor
+ * deduplication functionality is working
+ * 
  */
 public class TestAnchorIndexingFilter {
-  
+
   @Test
   public void testDeduplicateAnchor() throws Exception {
     Configuration conf = NutchConfiguration.create();
@@ -40,14 +39,19 @@
     filter.setConf(conf);
     NutchDocument doc = new NutchDocument();
     WebPage page = WebPage.newBuilder().build();
-    page.getInlinks().put(new Utf8("http://example1.com/"), new Utf8("cool site"));
-    page.getInlinks().put(new Utf8("http://example2.com/"), new Utf8("cool site"));
-    page.getInlinks().put(new Utf8("http://example3.com/"), new Utf8("fun site"));
+    page.getInlinks().put(new Utf8("http://example1.com/"),
+        new Utf8("cool site"));
+    page.getInlinks().put(new Utf8("http://example2.com/"),
+        new Utf8("cool site"));
+    page.getInlinks().put(new Utf8("http://example3.com/"),
+        new Utf8("fun site"));
     filter.filter(doc, "http://myurldoesnotmatter.com/", page);
-    
-    assertTrue("test if there is an anchor at all", doc.getFieldNames().contains("anchor"));
-    
-    assertEquals("test dedup, we expect 2", 2, doc.getFieldValues("anchor").size());
+
+    assertTrue("test if there is an anchor at all", doc.getFieldNames()
+        .contains("anchor"));
+
+    assertEquals("test dedup, we expect 2", 2, doc.getFieldValues("anchor")
+        .size());
   }
 
 }
Index: src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
===================================================================
--- src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(working copy)
@@ -36,17 +36,17 @@
 import java.util.Date;
 import java.util.HashSet;
 
-/** Adds basic searchable fields to a document. The fields are:
- * host - add host as un-stored, indexed and tokenized
- * url - url is both stored and indexed, so it's both searchable and returned. 
- * This is also a required field.
- * content - content is indexed, so that it's searchable, but not stored in index
- * title - title is stored and indexed
- * cache - add cached content/summary display policy, if available
- * tstamp - add timestamp when fetched, for deduplication
+/**
+ * Adds basic searchable fields to a document. The fields are: host - add host
+ * as un-stored, indexed and tokenized url - url is both stored and indexed, so
+ * it's both searchable and returned. This is also a required field. content -
+ * content is indexed, so that it's searchable, but not stored in index title -
+ * title is stored and indexed cache - add cached content/summary display
+ * policy, if available tstamp - add timestamp when fetched, for deduplication
  */
 public class BasicIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(BasicIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicIndexingFilter.class);
 
   private int MAX_TITLE_LENGTH;
   private Configuration conf;
@@ -60,13 +60,16 @@
   }
 
   /**
-   * The {@link BasicIndexingFilter} filter object which supports boolean 
-   * configurable value for length of characters permitted within the 
-   * title @see {@code indexer.max.title.length} in nutch-default.xml
-   *  
-   * @param doc The {@link NutchDocument} object
-   * @param url URL to be filtered for anchor text
-   * @param page {@link WebPage} object relative to the URL
+   * The {@link BasicIndexingFilter} filter object which supports boolean
+   * configurable value for length of characters permitted within the title @see
+   * {@code indexer.max.title.length} in nutch-default.xml
+   * 
+   * @param doc
+   *          The {@link NutchDocument} object
+   * @param url
+   *          URL to be filtered for anchor text
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @return filtered NutchDocument
    */
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
@@ -73,9 +76,9 @@
       throws IndexingException {
 
     String reprUrl = null;
-//    if (page.isReadable(WebPage.Field.REPR_URL.getIndex())) {
-      reprUrl = TableUtil.toString(page.getReprUrl());
-//    }
+    // if (page.isReadable(WebPage.Field.REPR_URL.getIndex())) {
+    reprUrl = TableUtil.toString(page.getReprUrl());
+    // }
 
     String host = null;
     try {
@@ -103,7 +106,10 @@
 
     // title
     String title = TableUtil.toString(page.getTitle());
-    if (MAX_TITLE_LENGTH > -1 && title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
+    if (MAX_TITLE_LENGTH > -1 && title.length() > MAX_TITLE_LENGTH) { // truncate
+                                                                      // title
+                                                                      // if
+                                                                      // needed
       title = title.substring(0, MAX_TITLE_LENGTH);
     }
     if (title.length() > 0) {
@@ -111,8 +117,8 @@
       doc.add("title", title);
     }
     // add cached content/summary display policy, if available
-    ByteBuffer cachingRaw = page
-        .getMetadata().get(Nutch.CACHING_FORBIDDEN_KEY_UTF8);
+    ByteBuffer cachingRaw = page.getMetadata().get(
+        Nutch.CACHING_FORBIDDEN_KEY_UTF8);
     String caching = Bytes.toString(cachingRaw);
     if (caching != null && !caching.equals(Nutch.CACHING_FORBIDDEN_NONE)) {
       doc.add("cache", caching);
@@ -119,7 +125,8 @@
     }
 
     // add timestamp when fetched, for deduplication
-    String tstamp = DateUtil.getThreadLocalDateFormat().format(new Date(page.getFetchTime()));
+    String tstamp = DateUtil.getThreadLocalDateFormat().format(
+        new Date(page.getFetchTime()));
     doc.add("tstamp", tstamp);
 
     return doc;
@@ -134,7 +141,8 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);
-    LOG.info("Maximum title length for indexing set to: " + this.MAX_TITLE_LENGTH);
+    LOG.info("Maximum title length for indexing set to: "
+        + this.MAX_TITLE_LENGTH);
   }
 
   /**
@@ -145,10 +153,9 @@
   }
 
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<WebPage.Field> getFields() {
Index: src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
===================================================================
--- src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java	(working copy)
@@ -29,66 +29,69 @@
 import static org.junit.Assert.*;
 
 /**
- * JUnit test case which tests
- * 1. that the host, url, content, title, cache and tstamp fields 
- * are obtained by the filter.
- * 2. that configurable maximum length functionality for titles actually works. .
- * This property defaults at 100 characters @see {@code indexer.max.title.length} 
- * in nutch-default.xml but has been set to 10 for this test.
+ * JUnit test case which tests 1. that the host, url, content, title, cache and
+ * tstamp fields are obtained by the filter. 2. that configurable maximum length
+ * functionality for titles actually works. . This property defaults at 100
+ * characters @see {@code indexer.max.title.length} in nutch-default.xml but has
+ * been set to 10 for this test.
  * 
  * @author lewismc
  */
 
 public class TestBasicIndexingFilter {
-  
+
   @Test
   public void testBasicFields() throws Exception {
-	Configuration conf = NutchConfiguration.create();
-	BasicIndexingFilter filter = new BasicIndexingFilter();
-	filter.setConf(conf);
-	assertNotNull(filter);
-	NutchDocument doc = new NutchDocument();
-	WebPage page = WebPage.newBuilder().build();
-	page.getInlinks().put(new Utf8("http://nutch.apache.org/"), new Utf8("Welcome to Nutch"));
-	page.setTitle(new Utf8("Welcome to Nutch"));
+    Configuration conf = NutchConfiguration.create();
+    BasicIndexingFilter filter = new BasicIndexingFilter();
+    filter.setConf(conf);
+    assertNotNull(filter);
+    NutchDocument doc = new NutchDocument();
+    WebPage page = WebPage.newBuilder().build();
+    page.getInlinks().put(new Utf8("http://nutch.apache.org/"),
+        new Utf8("Welcome to Nutch"));
+    page.setTitle(new Utf8("Welcome to Nutch"));
     page.setReprUrl(new Utf8("http://www.urldoesnotmatter.org"));
     byte[] bytes = new byte[10];
     ByteBuffer bbuf = ByteBuffer.wrap(bytes);
     page.getMetadata().put(Nutch.CACHING_FORBIDDEN_KEY_UTF8, bbuf);
     page.setFetchTime(System.currentTimeMillis());
-	try {
-	  filter.filter(doc, "http://www.apache.org/", page);
-	} catch(Exception e) {
-	  e.printStackTrace();
-	  fail(e.getMessage());
-	}
-	assertNotNull(doc);
-	assertTrue("check for host field ", doc.getFieldNames().contains("host"));
-	assertTrue("check for url field", doc.getFieldNames().contains("url"));
-	assertTrue("check for content field", doc.getFieldNames().contains("content"));
-	assertTrue("check for title field", doc.getFieldNames().contains("title"));
-	assertTrue("check for cache field", doc.getFieldNames().contains("cache"));
-	assertTrue("check for tstamp field", doc.getFieldNames().contains("tstamp"));
+    try {
+      filter.filter(doc, "http://www.apache.org/", page);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+    assertNotNull(doc);
+    assertTrue("check for host field ", doc.getFieldNames().contains("host"));
+    assertTrue("check for url field", doc.getFieldNames().contains("url"));
+    assertTrue("check for content field",
+        doc.getFieldNames().contains("content"));
+    assertTrue("check for title field", doc.getFieldNames().contains("title"));
+    assertTrue("check for cache field", doc.getFieldNames().contains("cache"));
+    assertTrue("check for tstamp field", doc.getFieldNames().contains("tstamp"));
   }
-  
+
   @Test
   public void testTitleFieldLength() throws Exception {
-	Configuration conf = NutchConfiguration.create();
-	conf.setInt("indexer.max.title.length", 10);
-	BasicIndexingFilter filter = new BasicIndexingFilter();
-	filter.setConf(conf);
-	assertNotNull(filter);
-	NutchDocument doc = new NutchDocument();
-	WebPage page = WebPage.newBuilder().build();
-	page.getInlinks().put(new Utf8("http://exceedmaximumtitleurl.org/"), new Utf8("exceeding title site"));
-	page.setTitle(new Utf8("This title exceeds maximum characters"));
-	try {
-	  filter.filter(doc, "http://www.apache.org/", page);
-	} catch (Exception e) {
-	  e.printStackTrace();
-	  fail(e.getMessage());
-	}
-	assertNotNull(doc);
-	assertEquals("assert title field only has 10 characters", 10, doc.getFieldValue("title").length());
+    Configuration conf = NutchConfiguration.create();
+    conf.setInt("indexer.max.title.length", 10);
+    BasicIndexingFilter filter = new BasicIndexingFilter();
+    filter.setConf(conf);
+    assertNotNull(filter);
+    NutchDocument doc = new NutchDocument();
+    WebPage page = WebPage.newBuilder().build();
+    page.getInlinks().put(new Utf8("http://exceedmaximumtitleurl.org/"),
+        new Utf8("exceeding title site"));
+    page.setTitle(new Utf8("This title exceeds maximum characters"));
+    try {
+      filter.filter(doc, "http://www.apache.org/", page);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+    assertNotNull(doc);
+    assertEquals("assert title field only has 10 characters", 10, doc
+        .getFieldValue("title").length());
   }
 }
Index: src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
===================================================================
--- src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java	(revision 1650444)
+++ src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java	(working copy)
@@ -42,7 +42,7 @@
 
 public class MetadataIndexer implements IndexingFilter {
   private Configuration conf;
-  private static Map<Utf8,String> parseFieldnames;
+  private static Map<Utf8, String> parseFieldnames;
   private static final String PARSE_CONF_PROPERTY = "index.metadata";
   private static final String INDEX_PREFIX = "meta_";
   private static final String PARSE_META_PREFIX = "meta_";
@@ -56,7 +56,7 @@
 
     // add the fields from parsemd
     if (parseFieldnames != null) {
-      for (Entry<Utf8,String> metatag : parseFieldnames.entrySet()) {
+      for (Entry<Utf8, String> metatag : parseFieldnames.entrySet()) {
         ByteBuffer bvalues = page.getMetadata().get(metatag.getKey());
         if (bvalues != null) {
           String key = metatag.getValue();
@@ -75,7 +75,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     String[] metatags = conf.getStrings(PARSE_CONF_PROPERTY);
-    parseFieldnames = new TreeMap<Utf8,String>();
+    parseFieldnames = new TreeMap<Utf8, String>();
     for (int i = 0; i < metatags.length; i++) {
       parseFieldnames.put(
           new Utf8(PARSE_META_PREFIX + metatags[i].toLowerCase(Locale.ROOT)),
Index: src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
===================================================================
--- src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java	(revision 1650444)
+++ src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * Metadata may come from CrawlDb, parse or content metadata.
  */
 package org.apache.nutch.indexer.metadata;
+
Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -30,10 +30,12 @@
  * Add (or reset) a few metaData properties as respective fields (if they are
  * available), so that they can be accurately used within the search index.
  * 
- * 'lastModifed' is indexed to support query by date, 'contentLength' obtains content length from the HTTP
- * header, 'type' field is indexed to support query by type and finally the 'title' field is an attempt 
- * to reset the title if a content-disposition hint exists. The logic is that such a presence is indicative 
- * that the content provider wants the filename therein to be used as the title.
+ * 'lastModifed' is indexed to support query by date, 'contentLength' obtains
+ * content length from the HTTP header, 'type' field is indexed to support query
+ * by type and finally the 'title' field is an attempt to reset the title if a
+ * content-disposition hint exists. The logic is that such a presence is
+ * indicative that the content provider wants the filename therein to be used as
+ * the title.
  * 
  * Still need to make content-length searchable!
  * 
@@ -41,7 +43,8 @@
  */
 
 public class MoreIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(MoreIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(MoreIndexingFilter.class);
 
   /** Get the MimeTypes resolver instance. */
   private MimeUtil MIME;
@@ -68,12 +71,13 @@
   // last-modified, or, if that's not present, use fetch time.
   private NutchDocument addTime(NutchDocument doc, WebPage page, String url) {
     long time = -1;
-    CharSequence lastModified = page
-        .getHeaders().get(new Utf8(HttpHeaders.LAST_MODIFIED));
+    CharSequence lastModified = page.getHeaders().get(
+        new Utf8(HttpHeaders.LAST_MODIFIED));
     // String lastModified = data.getMeta(Metadata.LAST_MODIFIED);
     if (lastModified != null) { // try parse last-modified
       time = getTime(lastModified.toString(), url); // use as time
-      String formlastModified = DateUtil.getThreadLocalDateFormat().format(new Date(time));
+      String formlastModified = DateUtil.getThreadLocalDateFormat().format(
+          new Date(time));
       // store as string
       doc.add("lastModified", formlastModified);
     }
@@ -82,7 +86,8 @@
       time = page.getModifiedTime(); // use Modified time
     }
 
-    String dateString = DateUtil.getThreadLocalDateFormat().format(new Date(time));
+    String dateString = DateUtil.getThreadLocalDateFormat().format(
+        new Date(time));
 
     // un-stored, indexed and un-tokenized
     doc.add("date", dateString);
@@ -97,17 +102,19 @@
     } catch (ParseException e) {
       // try to parse it as date in alternative format
       try {
-        Date parsedDate = DateUtils.parseDate(date, new String[] {
-            "EEE MMM dd HH:mm:ss yyyy", "EEE MMM dd HH:mm:ss yyyy zzz",
-            "EEE MMM dd HH:mm:ss zzz yyyy", "EEE, dd MMM yyyy HH:mm:ss zzz",
-            "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
-            "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
-            "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
-            "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
-            "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
-            "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
-            "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
-            "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz", "yyyy-MM-dd'T'HH:mm:ss'Z'" });
+        Date parsedDate = DateUtils.parseDate(date,
+            new String[] { "EEE MMM dd HH:mm:ss yyyy",
+                "EEE MMM dd HH:mm:ss yyyy zzz", "EEE MMM dd HH:mm:ss zzz yyyy",
+                "EEE, dd MMM yyyy HH:mm:ss zzz",
+                "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
+                "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
+                "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
+                "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
+                "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
+                "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
+                "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
+                "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz",
+                "yyyy-MM-dd'T'HH:mm:ss'Z'" });
         time = parsedDate.getTime();
         // if (LOG.isWarnEnabled()) {
         // LOG.warn(url + ": parsed date: " + date +" to:"+time);
@@ -123,8 +130,8 @@
 
   // Add Content-Length
   private NutchDocument addLength(NutchDocument doc, WebPage page, String url) {
-    CharSequence contentLength = page.getHeaders().get(new Utf8(
-            HttpHeaders.CONTENT_LENGTH));
+    CharSequence contentLength = page.getHeaders().get(
+        new Utf8(HttpHeaders.CONTENT_LENGTH));
     if (contentLength != null) {
       // NUTCH-1010 ContentLength not trimmed
       String trimmed = contentLength.toString().trim();
@@ -188,7 +195,7 @@
     if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
       String[] parts = getParts(mimeType);
 
-      for(String part: parts) {
+      for (String part : parts) {
         doc.add("type", part);
       }
     }
@@ -233,8 +240,8 @@
   }
 
   private NutchDocument resetTitle(NutchDocument doc, WebPage page, String url) {
-    CharSequence contentDisposition = page.getHeaders().get(new Utf8(
-        HttpHeaders.CONTENT_DISPOSITION));
+    CharSequence contentDisposition = page.getHeaders().get(
+        new Utf8(HttpHeaders.CONTENT_DISPOSITION));
     if (contentDisposition == null)
       return doc;
 
Index: src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(revision 1650444)
+++ src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(working copy)
@@ -37,7 +37,7 @@
     assertContentType(conf, "text/html", "text/html");
     assertContentType(conf, "text/html; charset=UTF-8", "text/html");
   }
-  
+
   public void testGetParts() {
     String[] parts = MoreIndexingFilter.getParts("text/html");
     assertParts(parts, 2, "text", "html");
@@ -48,26 +48,26 @@
    * @since NUTCH-901
    */
   @Test
-  public void testNoParts(){
-     Configuration conf = NutchConfiguration.create();
-     conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
-     MoreIndexingFilter filter = new MoreIndexingFilter();
-     filter.setConf(conf);
-     assertNotNull(filter);
-     NutchDocument doc = new NutchDocument();
-     try{
-       filter.filter(doc, "http://nutch.apache.org/index.html", WebPage.newBuilder().build());
-     }
-     catch(Exception e){
-       e.printStackTrace();
-       fail(e.getMessage());
-     }
-     assertNotNull(doc);
-     assertTrue(doc.getFieldNames().contains("type"));
-     assertEquals(1, doc.getFieldValues("type").size());
-     assertEquals("text/html", doc.getFieldValue("type"));     
+  public void testNoParts() {
+    Configuration conf = NutchConfiguration.create();
+    conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
+    MoreIndexingFilter filter = new MoreIndexingFilter();
+    filter.setConf(conf);
+    assertNotNull(filter);
+    NutchDocument doc = new NutchDocument();
+    try {
+      filter.filter(doc, "http://nutch.apache.org/index.html", WebPage
+          .newBuilder().build());
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+    assertNotNull(doc);
+    assertTrue(doc.getFieldNames().contains("type"));
+    assertEquals(1, doc.getFieldValues("type").size());
+    assertEquals("text/html", doc.getFieldValue("type"));
   }
-  
+
   private void assertParts(String[] parts, int count, String... expected) {
     assertEquals(count, parts.length);
     for (int i = 0; i < expected.length; i++) {
@@ -74,8 +74,9 @@
       assertEquals(expected[i], parts[i]);
     }
   }
-  
-  private void assertContentType(Configuration conf, String source, String expected) throws IndexingException {
+
+  private void assertContentType(Configuration conf, String source,
+      String expected) throws IndexingException {
     MoreIndexingFilter filter = new MoreIndexingFilter();
     filter.setConf(conf);
     WebPage page = WebPage.newBuilder().build();
Index: src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
===================================================================
--- src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java	(revision 1650444)
+++ src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Index writer plugin for <a href="http://www.elasticsearch.org/">Elasticsearch</a>.
  */
 package org.apache.nutch.indexwriter.elastic;
+
Index: src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
===================================================================
--- src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java	(revision 1650444)
+++ src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java	(working copy)
@@ -22,7 +22,7 @@
   public static final String SERVER_URL = SOLR_PREFIX + "server.url";
 
   public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
-  
+
   public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
@@ -32,15 +32,15 @@
   public static final String USERNAME = SOLR_PREFIX + "auth.username";
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
===================================================================
--- src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java	(revision 1650444)
+++ src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java	(working copy)
@@ -144,7 +144,9 @@
   public void commit() throws IOException {
     try {
       solr.commit();
-      LOG.info("Total " + documentCount + (documentCount > 1 ? " documents are " : " document is ") + "added.");
+      LOG.info("Total " + documentCount
+          + (documentCount > 1 ? " documents are " : " document is ")
+          + "added.");
     } catch (SolrServerException e) {
       throw makeIOException(e);
     }
Index: src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
===================================================================
--- src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java	(revision 1650444)
+++ src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java	(working copy)
@@ -38,16 +38,17 @@
 
 public class SolrMappingReader {
   public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
-  
+
   private Configuration conf;
-  
+
   private Map<String, String> keyMap = new HashMap<String, String>();
   private Map<String, String> copyMap = new HashMap<String, String>();
   private String uniqueKey = "id";
-  
+
   public static synchronized SolrMappingReader getInstance(Configuration conf) {
     ObjectCache cache = ObjectCache.get(conf);
-    SolrMappingReader instance = (SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+    SolrMappingReader instance = (SolrMappingReader) cache
+        .getObject(SolrMappingReader.class.getName());
     if (instance == null) {
       instance = new SolrMappingReader(conf);
       cache.setObject(SolrMappingReader.class.getName(), instance);
@@ -60,9 +61,10 @@
     parseMapping();
   }
 
-  private void parseMapping() {    
+  private void parseMapping() {
     InputStream ssInputStream = null;
-    ssInputStream = conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
+    ssInputStream = conf.getConfResourceAsInputStream(conf.get(
+        SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
 
     InputSource inputSource = new InputSource(ssInputStream);
     try {
@@ -74,8 +76,10 @@
       if (fieldList.getLength() > 0) {
         for (int i = 0; i < fieldList.getLength(); i++) {
           Element element = (Element) fieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          keyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          keyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
@@ -82,8 +86,10 @@
       if (copyFieldList.getLength() > 0) {
         for (int i = 0; i < copyFieldList.getLength(); i++) {
           Element element = (Element) copyFieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          copyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
@@ -90,32 +96,30 @@
       if (uniqueKeyItem.getLength() > 1) {
         LOG.warn("More than one unique key definitions found in solr index mapping, using default 'id'");
         uniqueKey = "id";
-      }
-      else if (uniqueKeyItem.getLength() == 0) {
+      } else if (uniqueKeyItem.getLength() == 0) {
         LOG.warn("No unique key definition found in solr index mapping using, default 'id'");
+      } else {
+        uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
       }
-      else{
-    	  uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
-      }
     } catch (MalformedURLException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (SAXException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (IOException e) {
-    	LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (ParserConfigurationException e) {
-    	LOG.warn(e.toString());
-    } 
+      LOG.warn(e.toString());
+    }
   }
-	  
+
   public Map<String, String> getKeyMap() {
     return keyMap;
   }
-	  
+
   public Map<String, String> getCopyMap() {
     return copyMap;
   }
-	  
+
   public String getUniqueKey() {
     return uniqueKey;
   }
@@ -128,7 +132,7 @@
   }
 
   public String mapKey(String key) throws IOException {
-    if(keyMap.containsKey(key)) {
+    if (keyMap.containsKey(key)) {
       key = keyMap.get(key);
     }
     return key;
@@ -135,7 +139,7 @@
   }
 
   public String mapCopyKey(String key) throws IOException {
-    if(copyMap.containsKey(key)) {
+    if (copyMap.containsKey(key)) {
       key = copyMap.get(key);
     }
     return key;
Index: src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
===================================================================
--- src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java	(revision 1650444)
+++ src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java	(working copy)
@@ -1,6 +1,5 @@
 package org.apache.nutch.indexwriter.solr;
 
-
 import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
@@ -17,7 +16,8 @@
 
   public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class);
 
-  public static HttpSolrServer getHttpSolrServer(Configuration job) throws MalformedURLException {
+  public static HttpSolrServer getHttpSolrServer(Configuration job)
+      throws MalformedURLException {
     DefaultHttpClient client = new DefaultHttpClient();
 
     // Check for username/password
@@ -26,10 +26,14 @@
 
       LOG.info("Authenticating as: " + username);
 
-      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT,
+          AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
 
-      client.getCredentialsProvider().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
-      
+      client.getCredentialsProvider().setCredentials(
+          scope,
+          new UsernamePasswordCredentials(username, job
+              .get(SolrConstants.PASSWORD)));
+
       HttpParams params = client.getParams();
       HttpClientParams.setAuthenticating(params, true);
 
@@ -46,12 +50,14 @@
     for (int i = 0; i < input.length(); i++) {
       ch = input.charAt(i);
 
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
+      // Strip all non-characters
+      // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line and
+      // carriage return
       if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
-              ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
-              (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
-              (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+          ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+          (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+          (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
 
         retval.append(ch);
       }
Index: src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
===================================================================
--- src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java	(revision 1650444)
+++ src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java	(working copy)
@@ -19,3 +19,4 @@
  * Index writer plugin for <a href="http://lucene.apache.org/solr/">Apache Solr</a>.
  */
 package org.apache.nutch.indexwriter.solr;
+
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 1650444)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -47,7 +47,8 @@
  */
 public class HTMLLanguageParser implements ParseFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(HTMLLanguageParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HTMLLanguageParser.class);
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
@@ -113,8 +114,8 @@
     }
 
     if (lang != null) {
-      page.getMetadata().put(new Utf8(Metadata.LANGUAGE), ByteBuffer.wrap(lang
-              .getBytes()));
+      page.getMetadata().put(new Utf8(Metadata.LANGUAGE),
+          ByteBuffer.wrap(lang.getBytes()));
       return parse;
     }
 
@@ -135,7 +136,8 @@
       return lang;
     }
 
-    CharSequence ulang = page.getHeaders().get(new Utf8(Response.CONTENT_LANGUAGE));
+    CharSequence ulang = page.getHeaders().get(
+        new Utf8(Response.CONTENT_LANGUAGE));
     if (ulang != null) {
       lang = ulang.toString();
     }
@@ -154,7 +156,7 @@
 
       String content = parse.getText();
       if (content != null) {
-       text.append(" ").append(content.toString());
+        text.append(" ").append(content.toString());
       }
 
       LanguageIdentifier identifier = new LanguageIdentifier(text.toString());
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(revision 1650444)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(working copy)
@@ -35,11 +35,10 @@
 /**
  * An {@link org.apache.nutch.indexer.IndexingFilter} that adds a
  * <code>lang</code> (language) field to the document.
- *
- * It tries to find the language of the document by checking
- * if {@link HTMLLanguageParser} has added some language
- * information
- *
+ * 
+ * It tries to find the language of the document by checking if
+ * {@link HTMLLanguageParser} has added some language information
+ * 
  * @author Sami Siren
  * @author Jerome Charron
  */
@@ -56,7 +55,8 @@
   /**
    * Constructs a new Language Indexing Filter.
    */
-  public LanguageIndexingFilter() {}
+  public LanguageIndexingFilter() {
+  }
 
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
Index: src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(revision 1650444)
+++ src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(working copy)
@@ -96,8 +96,8 @@
         { "torp, stuga, uthyres, bed & breakfast", null } };
 
     for (int i = 0; i < 44; i++) {
-      assertEquals(tests[i][1], HTMLLanguageParser.LanguageParser
-          .parseLanguage(tests[i][0]));
+      assertEquals(tests[i][1],
+          HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
     }
   }
 
@@ -151,8 +151,8 @@
     page.setBaseUrl(BASE);
     page.setContent(ByteBuffer.wrap(text.getBytes()));
     page.setContentType(new Utf8("text/html"));
-    page
-        .getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/html"));
+    page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8,
+        new Utf8("text/html"));
     return page;
   }
 }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(revision 1650444)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(working copy)
@@ -19,7 +19,7 @@
 
 @SuppressWarnings("serial")
 public class BlockedException extends HttpException {
-  
+
   public BlockedException(String msg) {
     super(msg);
   }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(revision 1650444)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(working copy)
@@ -44,7 +44,7 @@
 import crawlercommons.robots.BaseRobotRules;
 
 public abstract class HttpBase implements Protocol {
-  
+
   private final static Utf8 RESPONSE_TIME = new Utf8("_rs_");
 
   public static final int BUFFER_SIZE = 8 * 1024;
@@ -69,15 +69,12 @@
   protected int maxContent = 64 * 1024;
 
   /** The Nutch 'User-Agent' request header */
-  protected String userAgent = getAgentString(
-      "NutchCVS", null, "Nutch",
-      "http://nutch.apache.org/bot.html",
-      "agent@nutch.apache.org");
+  protected String userAgent = getAgentString("NutchCVS", null, "Nutch",
+      "http://nutch.apache.org/bot.html", "agent@nutch.apache.org");
 
-
   /** The "Accept-Language" request header value. */
   protected String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
-  
+
   /** The "Accept" request header value. */
   protected String accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
 
@@ -97,13 +94,13 @@
 
   /** Response Time */
   protected boolean responseTime = true;
-  
+
   /** Which TLS/SSL protocols to support */
   protected Set<String> tlsPreferredProtocols;
-  
+
   /** Which TLS/SSL cipher suites to support */
   protected Set<String> tlsPreferredCipherSuites;
-  
+
   /** Creates a new instance of HttpBase */
   public HttpBase() {
     this(null);
@@ -125,8 +122,9 @@
     this.useProxy = (proxyHost != null && proxyHost.length() > 0);
     this.timeout = conf.getInt("http.timeout", 10000);
     this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
-    this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
-        .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
+    this.userAgent = getAgentString(conf.get("http.agent.name"),
+        conf.get("http.agent.version"), conf.get("http.agent.description"),
+        conf.get("http.agent.url"), conf.get("http.agent.email"));
     this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
     this.accept = conf.get("http.accept", accept);
     this.mimeTypes = new MimeUtil(conf);
@@ -133,29 +131,53 @@
     this.useHttp11 = conf.getBoolean("http.useHttp11", false);
     this.responseTime = conf.getBoolean("http.store.responsetime", true);
     this.robots.setConf(conf);
-    
-    String[] protocols = conf.getStrings("http.tls.supported.protocols", "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
-    String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites", 
-        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
-        "TLS_RSA_WITH_AES_256_CBC_SHA256","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
-        "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256","TLS_DHE_DSS_WITH_AES_256_CBC_SHA256","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
-        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA","TLS_RSA_WITH_AES_256_CBC_SHA","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
-        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA","TLS_DHE_RSA_WITH_AES_256_CBC_SHA","TLS_DHE_DSS_WITH_AES_256_CBC_SHA",
-        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256","TLS_RSA_WITH_AES_128_CBC_SHA256",
-        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256","TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
-        "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
-        "TLS_RSA_WITH_AES_128_CBC_SHA","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA",
-        "TLS_DHE_RSA_WITH_AES_128_CBC_SHA","TLS_DHE_DSS_WITH_AES_128_CBC_SHA","TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
-        "TLS_ECDHE_RSA_WITH_RC4_128_SHA","SSL_RSA_WITH_RC4_128_SHA","TLS_ECDH_ECDSA_WITH_RC4_128_SHA",
-        "TLS_ECDH_RSA_WITH_RC4_128_SHA","TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
-        "SSL_RSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",
-        "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA","SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA","SSL_RSA_WITH_RC4_128_MD5",
-        "TLS_EMPTY_RENEGOTIATION_INFO_SCSV","TLS_RSA_WITH_NULL_SHA256","TLS_ECDHE_ECDSA_WITH_NULL_SHA",
-        "TLS_ECDHE_RSA_WITH_NULL_SHA","SSL_RSA_WITH_NULL_SHA","TLS_ECDH_ECDSA_WITH_NULL_SHA","TLS_ECDH_RSA_WITH_NULL_SHA",
-        "SSL_RSA_WITH_NULL_MD5","SSL_RSA_WITH_DES_CBC_SHA","SSL_DHE_RSA_WITH_DES_CBC_SHA","SSL_DHE_DSS_WITH_DES_CBC_SHA",
-        "TLS_KRB5_WITH_RC4_128_SHA","TLS_KRB5_WITH_RC4_128_MD5","TLS_KRB5_WITH_3DES_EDE_CBC_SHA","TLS_KRB5_WITH_3DES_EDE_CBC_MD5",
-        "TLS_KRB5_WITH_DES_CBC_SHA","TLS_KRB5_WITH_DES_CBC_MD5");
-    
+
+    String[] protocols = conf.getStrings("http.tls.supported.protocols",
+        "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
+    String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites",
+        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
+        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
+        "TLS_RSA_WITH_AES_256_CBC_SHA256",
+        "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",
+        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
+        "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
+        "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256",
+        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
+        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA",
+        "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
+        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA",
+        "TLS_DHE_RSA_WITH_AES_256_CBC_SHA", "TLS_DHE_DSS_WITH_AES_256_CBC_SHA",
+        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
+        "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",
+        "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256",
+        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
+        "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_128_CBC_SHA",
+        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",
+        "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA",
+        "TLS_DHE_RSA_WITH_AES_128_CBC_SHA", "TLS_DHE_DSS_WITH_AES_128_CBC_SHA",
+        "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
+        "SSL_RSA_WITH_RC4_128_SHA", "TLS_ECDH_ECDSA_WITH_RC4_128_SHA",
+        "TLS_ECDH_RSA_WITH_RC4_128_SHA",
+        "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",
+        "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_3DES_EDE_CBC_SHA",
+        "TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA",
+        "TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",
+        "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA",
+        "SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_RC4_128_MD5",
+        "TLS_EMPTY_RENEGOTIATION_INFO_SCSV", "TLS_RSA_WITH_NULL_SHA256",
+        "TLS_ECDHE_ECDSA_WITH_NULL_SHA", "TLS_ECDHE_RSA_WITH_NULL_SHA",
+        "SSL_RSA_WITH_NULL_SHA", "TLS_ECDH_ECDSA_WITH_NULL_SHA",
+        "TLS_ECDH_RSA_WITH_NULL_SHA", "SSL_RSA_WITH_NULL_MD5",
+        "SSL_RSA_WITH_DES_CBC_SHA", "SSL_DHE_RSA_WITH_DES_CBC_SHA",
+        "SSL_DHE_DSS_WITH_DES_CBC_SHA", "TLS_KRB5_WITH_RC4_128_SHA",
+        "TLS_KRB5_WITH_RC4_128_MD5", "TLS_KRB5_WITH_3DES_EDE_CBC_SHA",
+        "TLS_KRB5_WITH_3DES_EDE_CBC_MD5", "TLS_KRB5_WITH_DES_CBC_SHA",
+        "TLS_KRB5_WITH_DES_CBC_MD5");
+
     tlsPreferredProtocols = new HashSet<String>(Arrays.asList(protocols));
     tlsPreferredCipherSuites = new HashSet<String>(Arrays.asList(ciphers));
 
@@ -171,21 +193,21 @@
 
     try {
       URL u = new URL(url);
-      
+
       long startTime = System.currentTimeMillis();
       Response response = getResponse(u, page, false); // make a request
-      int elapsedTime =(int) (System.currentTimeMillis() - startTime);
-      
-      if(this.responseTime) {
-        page.getMetadata().put(RESPONSE_TIME, ByteBuffer.wrap(Bytes.toBytes(elapsedTime)));
+      int elapsedTime = (int) (System.currentTimeMillis() - startTime);
+
+      if (this.responseTime) {
+        page.getMetadata().put(RESPONSE_TIME,
+            ByteBuffer.wrap(Bytes.toBytes(elapsedTime)));
       }
-      
+
       int code = response.getCode();
       byte[] content = response.getContent();
       Content c = new Content(u.toString(), u.toString(),
           (content == null ? EMPTY_CONTENT : content),
-          response.getHeader("Content-Type"),
-          response.getHeaders(), mimeTypes);
+          response.getHeader("Content-Type"), response.getHeaders(), mimeTypes);
 
       if (code == 200) { // got a good response
         return new ProtocolOutput(c); // return it
@@ -192,24 +214,26 @@
       } else if (code >= 300 && code < 400) { // handle redirect
         String location = response.getHeader("Location");
         // some broken servers, such as MS IIS, use lowercase header name...
-        if (location == null) location = response.getHeader("location");
-        if (location == null) location = "";
+        if (location == null)
+          location = response.getHeader("location");
+        if (location == null)
+          location = "";
         u = new URL(u, location);
         int protocolStatusCode;
         switch (code) {
-        case 300:   // multiple choices, preferred value in Location
+        case 300: // multiple choices, preferred value in Location
           protocolStatusCode = ProtocolStatusCodes.MOVED;
           break;
-        case 301:   // moved permanently
-        case 305:   // use proxy (Location is URL of proxy)
+        case 301: // moved permanently
+        case 305: // use proxy (Location is URL of proxy)
           protocolStatusCode = ProtocolStatusCodes.MOVED;
           break;
-        case 302:   // found (temporarily moved)
-        case 303:   // see other (redirect after POST)
-        case 307:   // temporary redirect
+        case 302: // found (temporarily moved)
+        case 303: // see other (redirect after POST)
+        case 307: // temporary redirect
           protocolStatusCode = ProtocolStatusUtils.TEMP_MOVED;
           break;
-        case 304:   // not modified
+        case 304: // not modified
           protocolStatusCode = ProtocolStatusUtils.NOTMODIFIED;
           break;
         default:
@@ -216,36 +240,43 @@
           protocolStatusCode = ProtocolStatusUtils.MOVED;
         }
         // handle this in the higher layer.
-        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(protocolStatusCode, u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            protocolStatusCode, u));
       } else if (code == 400) { // bad request, mark as GONE
-        if (logger.isTraceEnabled()) { logger.trace("400 Bad request: " + u); }
-        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.GONE, u));
-      } else if (code == 401) { // requires authorization, but no valid auth provided.
-        if (logger.isTraceEnabled()) { logger.trace("401 Authentication Required"); }
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.ACCESS_DENIED,
-                "Authentication required: "+ url));
+        if (logger.isTraceEnabled()) {
+          logger.trace("400 Bad request: " + u);
+        }
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.GONE, u));
+      } else if (code == 401) { // requires authorization, but no valid auth
+                                // provided.
+        if (logger.isTraceEnabled()) {
+          logger.trace("401 Authentication Required");
+        }
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.ACCESS_DENIED, "Authentication required: "
+                + url));
       } else if (code == 404) {
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.NOTFOUND, u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.NOTFOUND, u));
       } else if (code == 410) { // permanently GONE
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.GONE, "Http: " + code + " url=" + u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.GONE, "Http: " + code + " url=" + u));
       } else {
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.EXCEPTION, "Http code=" + code + ", url="
-                + u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.EXCEPTION, "Http code=" + code + ", url=" + u));
       }
     } catch (Throwable e) {
       logger.error("Failed with the following error: ", e);
-      return new ProtocolOutput(null,
-          ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.EXCEPTION, e.toString()));
+      return new ProtocolOutput(null, ProtocolStatusUtils.makeStatus(
+          ProtocolStatusCodes.EXCEPTION, e.toString()));
     }
   }
 
-  /* -------------------------- *
-   * </implementation:Protocol> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Protocol> *
+   * --------------------------
+   */
   public String getProxyHost() {
     return proxyHost;
   }
@@ -269,37 +300,36 @@
   public String getUserAgent() {
     return userAgent;
   }
-  
-  /** Value of "Accept-Language" request header sent by Nutch.
+
+  /**
+   * Value of "Accept-Language" request header sent by Nutch.
+   * 
    * @return The value of the header "Accept-Language" header.
    */
   public String getAcceptLanguage() {
-         return acceptLanguage;
+    return acceptLanguage;
   }
 
   public String getAccept() {
-         return accept;
+    return accept;
   }
 
   public boolean getUseHttp11() {
     return useHttp11;
   }
-  
+
   public Set<String> getTlsPreferredCipherSuites() {
     return tlsPreferredCipherSuites;
   }
-  
+
   public Set<String> getTlsPreferredProtocols() {
     return tlsPreferredProtocols;
   }
 
-  private static String getAgentString(String agentName,
-      String agentVersion,
-      String agentDesc,
-      String agentURL,
-      String agentEmail) {
+  private static String getAgentString(String agentName, String agentVersion,
+      String agentDesc, String agentURL, String agentEmail) {
 
-    if ( (agentName == null) || (agentName.trim().length() == 0) ) {
+    if ((agentName == null) || (agentName.trim().length() == 0)) {
       // TODO : NUTCH-258
       if (LOGGER.isErrorEnabled()) {
         LOGGER.error("No User-Agent string set (http.agent.name)!");
@@ -306,7 +336,7 @@
       }
     }
 
-    StringBuffer buf= new StringBuffer();
+    StringBuffer buf = new StringBuffer();
 
     buf.append(agentName);
     if (agentVersion != null) {
@@ -313,14 +343,14 @@
       buf.append("/");
       buf.append(agentVersion);
     }
-    if ( ((agentDesc != null) && (agentDesc.length() != 0))
+    if (((agentDesc != null) && (agentDesc.length() != 0))
         || ((agentEmail != null) && (agentEmail.length() != 0))
-        || ((agentURL != null) && (agentURL.length() != 0)) ) {
+        || ((agentURL != null) && (agentURL.length() != 0))) {
       buf.append(" (");
 
       if ((agentDesc != null) && (agentDesc.length() != 0)) {
         buf.append(agentDesc);
-        if ( (agentURL != null) || (agentEmail != null) )
+        if ((agentURL != null) || (agentEmail != null))
           buf.append("; ");
       }
 
@@ -350,9 +380,12 @@
     }
   }
 
-  public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException {
+  public byte[] processGzipEncoded(byte[] compressed, URL url)
+      throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("uncompressing....");
+    }
 
     byte[] content;
     if (getMaxContent() >= 0) {
@@ -366,17 +399,21 @@
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-          + " bytes of compressed content (expanded to "
-          + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
 
-  public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
+  public byte[] processDeflateEncoded(byte[] compressed, URL url)
+      throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("inflating....");
+    }
 
-    byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());
+    byte[] content = DeflateUtils
+        .inflateBestEffort(compressed, getMaxContent());
 
     if (content == null)
       throw new IOException("inflateBestEffort returned null");
@@ -383,8 +420,8 @@
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-                 + " bytes of compressed content (expanded to "
-                 + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
@@ -409,18 +446,20 @@
       } else if (i != args.length - 1) {
         System.err.println(usage);
         System.exit(-1);
-      } else // root is required parameter
+      } else
+        // root is required parameter
         url = args[i];
     }
 
-    ProtocolOutput out = http.getProtocolOutput(url, WebPage.newBuilder().build());
+    ProtocolOutput out = http.getProtocolOutput(url, WebPage.newBuilder()
+        .build());
     Content content = out.getContent();
 
     System.out.println("Status: " + out.getStatus());
     if (content != null) {
       System.out.println("Content Type: " + content.getContentType());
-      System.out.println("Content Length: " +
-          content.getMetadata().get(Response.CONTENT_LENGTH));
+      System.out.println("Content Length: "
+          + content.getMetadata().get(Response.CONTENT_LENGTH));
       System.out.println("Content:");
       String text = new String(content.getContent());
       System.out.println(text);
@@ -427,9 +466,8 @@
     }
   }
 
-  protected abstract Response getResponse(URL url,
-      WebPage page, boolean followRedirects)
-  throws ProtocolException, IOException;
+  protected abstract Response getResponse(URL url, WebPage page,
+      boolean followRedirects) throws ProtocolException, IOException;
 
   @Override
   public BaseRobotRules getRobotRules(String url, WebPage page) {
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(revision 1650444)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(working copy)
@@ -19,7 +19,6 @@
 // Nutch imports
 import org.apache.nutch.protocol.ProtocolException;
 
-
 public class HttpException extends ProtocolException {
 
   public HttpException() {
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java	(revision 1650444)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java	(working copy)
@@ -30,16 +30,18 @@
 import java.net.URL;
 
 /**
- * This class is used for parsing robots for urls belonging to HTTP protocol.
- * It extends the generic {@link RobotRulesParser} class and contains 
- * Http protocol specific implementation for obtaining the robots file.
+ * This class is used for parsing robots for urls belonging to HTTP protocol. It
+ * extends the generic {@link RobotRulesParser} class and contains Http protocol
+ * specific implementation for obtaining the robots file.
  */
 public class HttpRobotRulesParser extends RobotRulesParser {
-  
-  public static final Logger LOG = LoggerFactory.getLogger(HttpRobotRulesParser.class);
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpRobotRulesParser.class);
   protected boolean allowForbidden = false;
 
-  HttpRobotRulesParser() { }
+  HttpRobotRulesParser() {
+  }
 
   public HttpRobotRulesParser(Configuration conf) {
     super(conf);
@@ -48,14 +50,17 @@
 
   /** Compose unique key to store and access robot rules in cache for given URL */
   protected static String getCacheKey(URL url) {
-    String protocol = url.getProtocol().toLowerCase();  // normalize to lower case
-    String host = url.getHost().toLowerCase();          // normalize to lower case
+    String protocol = url.getProtocol().toLowerCase(); // normalize to lower
+                                                       // case
+    String host = url.getHost().toLowerCase(); // normalize to lower case
     int port = url.getPort();
     if (port == -1) {
       port = url.getDefaultPort();
     }
-   /* Robot rules apply only to host, protocol, and port where robots.txt is
-    * hosted (cf. NUTCH-1752). Consequently  */
+    /*
+     * Robot rules apply only to host, protocol, and port where robots.txt is
+     * hosted (cf. NUTCH-1752). Consequently
+     */
     String cacheKey = protocol + ":" + host + ":" + port;
     return cacheKey;
   }
@@ -71,7 +76,7 @@
    *          The {@link Protocol} object
    * @param url
    *          URL robots.txt applies to
-   *
+   * 
    * @return {@link BaseRobotRules} holding the rules from robots.txt
    */
   public BaseRobotRules getRobotRulesSet(Protocol http, URL url) {
@@ -80,13 +85,15 @@
     BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(cacheKey);
 
     boolean cacheRule = true;
-    
-    if (robotRules == null) {                     // cache miss
+
+    if (robotRules == null) { // cache miss
       URL redir = null;
-      if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("cache miss " + url);
+      }
       try {
-        Response response = ((HttpBase)http).getResponse(new URL(url, "/robots.txt"),
-                                             WebPage.newBuilder().build(), true);
+        Response response = ((HttpBase) http).getResponse(new URL(url,
+            "/robots.txt"), WebPage.newBuilder().build(), true);
         // try one level of redirection ?
         if (response.getCode() == 301 || response.getCode() == 302) {
           String redirection = response.getHeader("Location");
@@ -101,23 +108,23 @@
             } else {
               redir = new URL(redirection);
             }
-            
-            response = ((HttpBase)http).getResponse(redir, WebPage.newBuilder().build(), true);
+
+            response = ((HttpBase) http).getResponse(redir, WebPage
+                .newBuilder().build(), true);
           }
         }
 
-        if (response.getCode() == 200)               // found rules: parse them
-          robotRules =  parseRules(url.toString(), response.getContent(), 
-                                   response.getHeader("Content-Type"), 
-                                   agentNames);
+        if (response.getCode() == 200) // found rules: parse them
+          robotRules = parseRules(url.toString(), response.getContent(),
+              response.getHeader("Content-Type"), agentNames);
 
-        else if ( (response.getCode() == 403) && (!allowForbidden) )
-          robotRules = FORBID_ALL_RULES;            // use forbid all
+        else if ((response.getCode() == 403) && (!allowForbidden))
+          robotRules = FORBID_ALL_RULES; // use forbid all
         else if (response.getCode() >= 500) {
           cacheRule = false;
           robotRules = EMPTY_RULES;
-        }else                                        
-          robotRules = EMPTY_RULES;                 // use default rules
+        } else
+          robotRules = EMPTY_RULES; // use default rules
       } catch (Throwable t) {
         if (LOG.isInfoEnabled()) {
           LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
@@ -127,7 +134,7 @@
       }
 
       if (cacheRule) {
-        CACHE.put(cacheKey, robotRules);  // cache rules for host
+        CACHE.put(cacheKey, robotRules); // cache rules for host
         if (redir != null && !redir.getHost().equalsIgnoreCase(url.getHost())) {
           // cache also for the redirected host
           CACHE.put(getCacheKey(redir), robotRules);
Index: src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(revision 1650444)
+++ src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(working copy)
@@ -25,10 +25,10 @@
 import static org.junit.Assert.*;
 
 /**
- * JUnit test case which tests
- * 1. that robots filtering is performed correctly as per the agent name
- * 2. that crawl delay is extracted correctly from the robots file
- *
+ * JUnit test case which tests 1. that robots filtering is performed correctly
+ * as per the agent name 2. that crawl delay is extracted correctly from the
+ * robots file
+ * 
  */
 public class TestRobotRulesParser {
 
@@ -37,40 +37,33 @@
   private static final String MULTIPLE_AGENTS = "Agent2, Agent1";
   private static final String UNKNOWN_AGENT = "AgentABC";
   private static final String CR = "\r";
-  
-  private static final String ROBOTS_STRING = 
-      "User-Agent: Agent1 #foo" + CR 
-      + "Disallow: /a" + CR 
-      + "Disallow: /b/a" + CR 
-      + "#Disallow: /c" + CR 
-      + "Crawl-delay: 10" + CR  // set crawl delay for Agent1 as 10 sec
-      + "" + CR 
-      + "" + CR 
-      + "User-Agent: Agent2" + CR 
-      + "Disallow: /a/bloh" + CR 
-      + "Disallow: /c" + CR
-      + "Disallow: /foo" + CR
-      + "Crawl-delay: 20" + CR
-      + "" + CR 
-      + "User-Agent: *" + CR 
-      + "Disallow: /foo/bar/" + CR;   // no crawl delay for other agents
-  
+
+  private static final String ROBOTS_STRING = "User-Agent: Agent1 #foo" + CR
+      + "Disallow: /a" + CR + "Disallow: /b/a" + CR + "#Disallow: /c"
+      + CR
+      + "Crawl-delay: 10"
+      + CR // set crawl delay for Agent1 as 10 sec
+      + "" + CR + "" + CR + "User-Agent: Agent2" + CR + "Disallow: /a/bloh"
+      + CR + "Disallow: /c" + CR + "Disallow: /foo" + CR + "Crawl-delay: 20"
+      + CR + "" + CR + "User-Agent: *" + CR + "Disallow: /foo/bar/" + CR; // no
+                                                                          // crawl
+                                                                          // delay
+                                                                          // for
+                                                                          // other
+                                                                          // agents
+
   private static final String[] TEST_PATHS = new String[] {
-    "http://example.com/a",
-    "http://example.com/a/bloh/foo.html",
-    "http://example.com/b",
-    "http://example.com/c",
-    "http://example.com/b/a/index.html",
-    "http://example.com/foo/bar/baz.html"
-  };
+      "http://example.com/a", "http://example.com/a/bloh/foo.html",
+      "http://example.com/b", "http://example.com/c",
+      "http://example.com/b/a/index.html",
+      "http://example.com/foo/bar/baz.html" };
 
-  private static final boolean[] RESULTS = new boolean[] {
-    false,  //  /a
-    false,  //  /a/bloh/foo.html
-    true,   //  /b
-    true,   //  /c
-    false,  //  /b/a/index.html
-    true    //  /foo/bar/baz.html
+  private static final boolean[] RESULTS = new boolean[] { false, // /a
+      false, // /a/bloh/foo.html
+      true, // /b
+      true, // /c
+      false, // /b/a/index.html
+      true // /foo/bar/baz.html
   };
 
   private HttpRobotRulesParser parser;
@@ -82,41 +75,52 @@
   }
 
   /**
-  * Test that the robots rules are interpreted correctly by the robots rules parser. 
-  */
+   * Test that the robots rules are interpreted correctly by the robots rules
+   * parser.
+   */
   @Test
   public void testRobotsAgent() {
-    rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), CONTENT_TYPE, SINGLE_AGENT);
+    rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(),
+        CONTENT_TYPE, SINGLE_AGENT);
 
-    for(int counter = 0; counter < TEST_PATHS.length; counter++) {
-      assertTrue("testing on agent (" + SINGLE_AGENT + "), and " 
-              + "path " + TEST_PATHS[counter] 
-              + " got " + rules.isAllowed(TEST_PATHS[counter]),
-              rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
+    for (int counter = 0; counter < TEST_PATHS.length; counter++) {
+      assertTrue(
+          "testing on agent (" + SINGLE_AGENT + "), and " + "path "
+              + TEST_PATHS[counter] + " got "
+              + rules.isAllowed(TEST_PATHS[counter]),
+          rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
     }
 
-    rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), CONTENT_TYPE, MULTIPLE_AGENTS);
+    rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(),
+        CONTENT_TYPE, MULTIPLE_AGENTS);
 
-    for(int counter = 0; counter < TEST_PATHS.length; counter++) {
-      assertTrue("testing on agents (" + MULTIPLE_AGENTS + "), and " 
-              + "path " + TEST_PATHS[counter] 
-              + " got " + rules.isAllowed(TEST_PATHS[counter]),
-              rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
+    for (int counter = 0; counter < TEST_PATHS.length; counter++) {
+      assertTrue(
+          "testing on agents (" + MULTIPLE_AGENTS + "), and " + "path "
+              + TEST_PATHS[counter] + " got "
+              + rules.isAllowed(TEST_PATHS[counter]),
+          rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
     }
   }
 
   /**
-  * Test that the crawl delay is extracted from the robots file for respective agent. 
-  * If its not specified for a given agent, default value must be returned.
-  */
+   * Test that the crawl delay is extracted from the robots file for respective
+   * agent. If its not specified for a given agent, default value must be
+   * returned.
+   */
   @Test
   public void testCrawlDelay() {
-    // for SINGLE_AGENT, the crawl delay of 10 sec ie. 10000 msec must be returned by the parser
-    rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), CONTENT_TYPE, SINGLE_AGENT);
-    assertTrue("testing crawl delay for agent "+ SINGLE_AGENT +" : ", (rules.getCrawlDelay() == 10000));
-    
+    // for SINGLE_AGENT, the crawl delay of 10 sec ie. 10000 msec must be
+    // returned by the parser
+    rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(),
+        CONTENT_TYPE, SINGLE_AGENT);
+    assertTrue("testing crawl delay for agent " + SINGLE_AGENT + " : ",
+        (rules.getCrawlDelay() == 10000));
+
     // for UNKNOWN_AGENT, the default crawl delay must be returned.
-    rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), CONTENT_TYPE, UNKNOWN_AGENT);
-    assertTrue("testing crawl delay for agent "+ UNKNOWN_AGENT +" : ", (rules.getCrawlDelay() == Long.MIN_VALUE));
+    rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(),
+        CONTENT_TYPE, UNKNOWN_AGENT);
+    assertTrue("testing crawl delay for agent " + UNKNOWN_AGENT + " : ",
+        (rules.getCrawlDelay() == Long.MIN_VALUE));
   }
 }
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(revision 1650444)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(working copy)
@@ -16,11 +16,9 @@
  */
 package org.apache.nutch.urlfilter.api;
 
-
-
 /**
  * A generic regular expression rule.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexRule {
@@ -29,13 +27,15 @@
 
   /**
    * Constructs a new regular expression rule.
-   *
-   * @param sign specifies if this rule must filter-in or filter-out.
-   *        A <code>true</code> value means that any url matching this rule
-   *        must be accepted, a <code>false</code> value means that any url
-   *        matching this rule must be rejected.
-   * @param regex is the regular expression used for matching (see
-   *        {@link #match(String)} method).
+   * 
+   * @param sign
+   *          specifies if this rule must filter-in or filter-out. A
+   *          <code>true</code> value means that any url matching this rule must
+   *          be accepted, a <code>false</code> value means that any url
+   *          matching this rule must be rejected.
+   * @param regex
+   *          is the regular expression used for matching (see
+   *          {@link #match(String)} method).
    */
   protected RegexRule(boolean sign, String regex) {
     this.sign = sign;
@@ -43,19 +43,22 @@
 
   /**
    * Return if this rule is used for filtering-in or out.
-   *
+   * 
    * @return <code>true</code> if any url matching this rule must be accepted,
    *         otherwise <code>false</code>.
    */
-  protected boolean accept() { return sign; }
-  
+  protected boolean accept() {
+    return sign;
+  }
+
   /**
    * Checks if a url matches this rule.
-   * @param url is the url to check.
-   * @return <code>true</code> if the specified url matches this rule,
-   *         otherwise <code>false</code>.
+   * 
+   * @param url
+   *          is the url to check.
+   * @return <code>true</code> if the specified url matches this rule, otherwise
+   *         <code>false</code>.
    */
   protected abstract boolean match(String url);
 
 }
-
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(revision 1650444)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(working copy)
@@ -37,27 +37,30 @@
 // Nutch imports
 import org.apache.nutch.net.*;
 
-
 /**
- * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on
- * regular expressions.
- *
- * <p>The regular expressions rules are expressed in a file. The file of rules
- * is provided by each implementation using the
- * {@link #getRulesFile(Configuration)} method.</p>
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on regular
+ * expressions.
  * 
- * <p>The format of this file is made of many rules (one per line):<br/>
+ * <p>
+ * The regular expressions rules are expressed in a file. The file of rules is
+ * provided by each implementation using the
+ * {@link #getRulesFile(Configuration)} method.
+ * </p>
+ * 
+ * <p>
+ * The format of this file is made of many rules (one per line):<br/>
  * <code>
  * [+-]&lt;regex&gt;
  * </code><br/>
- * where plus (<code>+</code>)means go ahead and index it and minus 
- * (<code>-</code>)means no.</p>
-
+ * where plus (<code>+</code>)means go ahead and index it and minus (
+ * <code>-</code>)means no.
+ * </p>
  */
 public abstract class RegexURLFilterBase implements URLFilter {
 
   /** My logger */
-  private final static Logger LOG = LoggerFactory.getLogger(RegexURLFilterBase.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBase.class);
 
   /** An array of applicable rules */
   private List<RegexRule> rules;
@@ -65,24 +68,28 @@
   /** The current configuration */
   private Configuration conf;
 
-
   /**
    * Constructs a new empty RegexURLFilterBase
    */
-  public RegexURLFilterBase() { }
+  public RegexURLFilterBase() {
+  }
 
   /**
    * Constructs a new RegexURLFilter and init it with a file of rules.
-   * @param filename is the name of rules file.
+   * 
+   * @param filename
+   *          is the name of rules file.
    */
-  public RegexURLFilterBase(File filename)
-    throws IOException, IllegalArgumentException {
+  public RegexURLFilterBase(File filename) throws IOException,
+      IllegalArgumentException {
     this(new FileReader(filename));
   }
-  
+
   /**
    * Constructs a new RegexURLFilter and inits it with a list of rules.
-   * @param rules string with a list of rules, one rule per line
+   * 
+   * @param rules
+   *          string with a list of rules, one rule per line
    * @throws IOException
    * @throws IllegalArgumentException
    */
@@ -93,36 +100,44 @@
 
   /**
    * Constructs a new RegexURLFilter and init it with a Reader of rules.
-   * @param reader is a reader of rules.
+   * 
+   * @param reader
+   *          is a reader of rules.
    */
-  protected RegexURLFilterBase(Reader reader)
-    throws IOException, IllegalArgumentException {
+  protected RegexURLFilterBase(Reader reader) throws IOException,
+      IllegalArgumentException {
     rules = readRules(reader);
   }
-  
+
   /**
    * Creates a new {@link RegexRule}.
-   * @param sign of the regular expression.
-   *        A <code>true</code> value means that any URL matching this rule
-   *        must be included, whereas a <code>false</code>
-   *        value means that any URL matching this rule must be excluded.
-   * @param regex is the regular expression associated to this rule.
+   * 
+   * @param sign
+   *          of the regular expression. A <code>true</code> value means that
+   *          any URL matching this rule must be included, whereas a
+   *          <code>false</code> value means that any URL matching this rule
+   *          must be excluded.
+   * @param regex
+   *          is the regular expression associated to this rule.
    */
   protected abstract RegexRule createRule(boolean sign, String regex);
-  
+
   /**
-   * Returns the name of the file of rules to use for
-   * a particular implementation.
-   * @param conf is the current configuration.
+   * Returns the name of the file of rules to use for a particular
+   * implementation.
+   * 
+   * @param conf
+   *          is the current configuration.
    * @return the name of the resource containing the rules to use.
    */
-  protected abstract Reader getRulesReader(Configuration conf) throws IOException;
-  
-  
-  /* -------------------------- *
-   * <implementation:URLFilter> *
-   * -------------------------- */
-  
+  protected abstract Reader getRulesReader(Configuration conf)
+      throws IOException;
+
+  /*
+   * -------------------------- * <implementation:URLFilter> *
+   * --------------------------
+   */
+
   // Inherited Javadoc
   public String filter(String url) {
     for (RegexRule rule : rules) {
@@ -129,19 +144,21 @@
       if (rule.match(url)) {
         return rule.accept() ? url : null;
       }
-    };
+    }
+    ;
     return null;
   }
 
-  /* --------------------------- *
-   * </implementation:URLFilter> *
-   * --------------------------- */
-  
-  
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
+  /*
+   * --------------------------- * </implementation:URLFilter> *
+   * ---------------------------
+   */
+
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
+
   public void setConf(Configuration conf) {
     this.conf = conf;
     Reader reader = null;
@@ -148,13 +165,17 @@
     try {
       reader = getRulesReader(conf);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
-      throw new RuntimeException(e.getMessage(), e);      
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
+      throw new RuntimeException(e.getMessage(), e);
     }
     try {
       rules = readRules(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
@@ -162,45 +183,51 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
 
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
   /**
    * Read the specified file of rules.
-   * @param reader is a reader of regular expressions rules.
+   * 
+   * @param reader
+   *          is a reader of regular expressions rules.
    * @return the corresponding {@RegexRule rules}.
    */
-  private List<RegexRule> readRules(Reader reader)
-    throws IOException, IllegalArgumentException {
+  private List<RegexRule> readRules(Reader reader) throws IOException,
+      IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(reader);
     List<RegexRule> rules = new ArrayList<RegexRule>();
     String line;
-       
-    while((line=in.readLine())!=null) {
+
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0) {
         continue;
       }
-      char first=line.charAt(0);
-      boolean sign=false;
+      char first = line.charAt(0);
+      boolean sign = false;
       switch (first) {
-      case '+' : 
-        sign=true;
+      case '+':
+        sign = true;
         break;
-      case '-' :
-        sign=false;
+      case '-':
+        sign = false;
         break;
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-        throw new IOException("Invalid first character: "+line);
+      default:
+        throw new IOException("Invalid first character: " + line);
       }
 
       String regex = line.substring(1);
-      if (LOG.isTraceEnabled()) { LOG.trace("Adding rule [" + regex + "]"); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Adding rule [" + regex + "]");
+      }
       RegexRule rule = createRule(sign, regex);
       rules.add(rule);
     }
@@ -209,18 +236,20 @@
 
   /**
    * Filter the standard input using a RegexURLFilterBase.
-   * @param filter is the RegexURLFilterBase to use for filtering the
-   *        standard input.
-   * @param args some optional parameters (not used).
+   * 
+   * @param filter
+   *          is the RegexURLFilterBase to use for filtering the standard input.
+   * @param args
+   *          some optional parameters (not used).
    */
   public static void main(RegexURLFilterBase filter, String args[])
-    throws IOException, IllegalArgumentException {
+      throws IOException, IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       String out = filter.filter(line);
-      if (out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
===================================================================
--- src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(revision 1650444)
+++ src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(working copy)
@@ -42,52 +42,52 @@
  * JUnit based test of class <code>RegexURLFilterBase</code>.
  */
 
-//@RunWith(Suite.class)
-//@Suite.SuiteClasses({TestAutomatonURLFilter.class, TestRegexURLFilter.class})
+// @RunWith(Suite.class)
+// @Suite.SuiteClasses({TestAutomatonURLFilter.class, TestRegexURLFilter.class})
 public abstract class RegexURLFilterBaseTest {
-  
+
   /** My logger */
-  protected static final Logger LOG = LoggerFactory.getLogger(RegexURLFilterBaseTest.class);  
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBaseTest.class);
 
-  private final static String SEPARATOR = System.getProperty("file.separator");  
+  private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
-  
+
   protected abstract URLFilter getURLFilter(Reader rules);
 
   protected void bench(int loops, String file) {
     try {
-      bench(loops,
-            new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-            new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+      bench(loops, new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void bench(int loops, Reader rules, Reader urls) {
     long start = System.currentTimeMillis();
     try {
       URLFilter filter = getURLFilter(rules);
       FilteredURL[] expected = readURLFile(urls);
-      for (int i=0; i<loops; i++) {
+      for (int i = 0; i < loops; i++) {
         test(filter, expected);
       }
     } catch (Exception e) {
       fail(e.toString());
     }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis()-start) + "ms");
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
   }
-  
+
   protected void test(String file) {
     try {
       test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-           new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void test(Reader rules, Reader urls) {
     try {
       test(getURLFilter(rules), readURLFile(urls));
@@ -95,9 +95,9 @@
       fail(e.toString());
     }
   }
-  
+
   protected void test(URLFilter filter, FilteredURL[] expected) {
-    for (int i=0; i<expected.length; i++) {
+    for (int i = 0; i < expected.length; i++) {
       String result = filter.filter(expected[i].url);
       if (result != null) {
         assertTrue(expected[i].url, expected[i].sign);
@@ -106,12 +106,12 @@
       }
     }
   }
-  
+
   private static FilteredURL[] readURLFile(Reader reader) throws IOException {
     BufferedReader in = new BufferedReader(reader);
     List<FilteredURL> list = new ArrayList<FilteredURL>();
     String line;
-    while((line=in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() != 0) {
         list.add(new FilteredURL(line));
       }
@@ -118,25 +118,25 @@
     }
     return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]);
   }
-    
+
   private static class FilteredURL {
-  
+
     boolean sign;
     String url;
 
     FilteredURL(String line) {
       switch (line.charAt(0)) {
-      case '+' : 
+      case '+':
         sign = true;
         break;
-      case '-' :
+      case '-':
         sign = false;
         break;
-      default :
+      default:
         // Simply ignore...
       }
       url = line.substring(1);
     }
   }
-  
+
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(revision 1650444)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(working copy)
@@ -39,7 +39,7 @@
  * @author J&eacute;r&ocirc;me Charron
  */
 public class RelTagIndexingFilter implements IndexingFilter {
-  
+
   private Configuration conf;
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
@@ -50,10 +50,9 @@
   }
 
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<Field> getFields() {
@@ -73,24 +72,28 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
   /**
    * The {@link RelTagIndexingFilter} filter object.
-   *  
-   * @param doc The {@link NutchDocument} object
-   * @param url URL to be filtered for rel-tag's
-   * @param page {@link WebPage} object relative to the URL
+   * 
+   * @param doc
+   *          The {@link NutchDocument} object
+   * @param url
+   *          URL to be filtered for rel-tag's
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @return filtered NutchDocument
    */
   @Override
-  public NutchDocument filter(NutchDocument doc, String url, WebPage page) throws IndexingException {
-  // Check if some Rel-Tags found, possibly put there by RelTagParser
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
+    // Check if some Rel-Tags found, possibly put there by RelTagParser
     ByteBuffer bb = page.getMetadata().get(new Utf8(RelTagParser.REL_TAG));
-		
+
     if (bb != null) {
       String[] tags = Bytes.toString(bb).split("\t");
       for (int i = 0; i < tags.length; i++) {
-	    doc.add("tag", tags[i]);
+        doc.add("tag", tags[i]);
       }
     }
     return doc;
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(revision 1650444)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(working copy)
@@ -74,26 +74,26 @@
       if (node.getNodeType() == Node.ELEMENT_NODE) {
         // Look for <a> tag
         if ("a".equalsIgnoreCase(node.getNodeName())) {
-	  NamedNodeMap attrs = node.getAttributes();
-	  Node hrefNode = attrs.getNamedItem("href");
-	  // Checks that it contains a href attribute
-	  if (hrefNode != null) {
-	    Node relNode = attrs.getNamedItem("rel");
-	    // Checks that it contains a rel attribute too
-	    if (relNode != null) {
-	      // Finaly checks that rel=tag
-	      if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
-	        String tag = parseTag(hrefNode.getNodeValue());
-	        if (!StringUtil.isEmpty(tag)) {
-	          if(!tags.contains(tag)){
+          NamedNodeMap attrs = node.getAttributes();
+          Node hrefNode = attrs.getNamedItem("href");
+          // Checks that it contains a href attribute
+          if (hrefNode != null) {
+            Node relNode = attrs.getNamedItem("rel");
+            // Checks that it contains a rel attribute too
+            if (relNode != null) {
+              // Finaly checks that rel=tag
+              if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
+                String tag = parseTag(hrefNode.getNodeValue());
+                if (!StringUtil.isEmpty(tag)) {
+                  if (!tags.contains(tag)) {
                     tags.add(tag);
-		    LOG.debug("Adding tag: " + tag + " to tag set.");
+                    LOG.debug("Adding tag: " + tag + " to tag set.");
                   }
-	        }
-	      }
-	    }
-	  }
-	}
+                }
+              }
+            }
+          }
+        }
       }
 
       // Recurse
@@ -108,11 +108,13 @@
       try {
         URL u = new URL(url);
         String path = u.getPath();
-        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1), "UTF-8");
+        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1),
+            "UTF-8");
       } catch (Exception e) {
         // Malformed tag...
         tag = null;
-      } return tag;
+      }
+      return tag;
     }
   }
 
@@ -136,12 +138,11 @@
     FIELDS.add(WebPage.Field.BASE_URL);
     FIELDS.add(WebPage.Field.METADATA);
   }
-  
+
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<Field> getFields() {
Index: src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagIndexingFilter.java
===================================================================
--- src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagIndexingFilter.java	(revision 1650444)
+++ src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagIndexingFilter.java	(working copy)
@@ -28,13 +28,13 @@
 import static org.junit.Assert.*;
 
 /**
- *JUnit test case for {@link RelTagIndexingFilter} which 
- *simply asserts that a 'tag' field is obtained by the filter.
- *
- *@author lewismc
+ * JUnit test case for {@link RelTagIndexingFilter} which simply asserts that a
+ * 'tag' field is obtained by the filter.
+ * 
+ * @author lewismc
  */
 
-  public class TestRelTagIndexingFilter {
+public class TestRelTagIndexingFilter {
 
   @Test
   public void testRelTagFields() throws Exception {
@@ -57,4 +57,3 @@
     assertTrue("check for 'tag' field", doc.getFieldNames().contains("tag"));
   }
 }
-  
\ No newline at end of file
Index: src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java
===================================================================
--- src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java	(revision 1650444)
+++ src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java	(working copy)
@@ -38,15 +38,15 @@
 import static org.junit.Assert.assertEquals;
 
 /**
- * Junit test for {@link RelTagParser} based mainly John Xing's parser tests.
- * We are not concerned with actual parse text within the sample file, instead
- * we assert that the rel-tags we expect are found in the WebPage metadata.
- * To check the parser is working as expected we unwrap the ByteBuffer obtained 
- * from metadata, the same type as we use in expected (String). So just the 
+ * Junit test for {@link RelTagParser} based mainly John Xing's parser tests. We
+ * are not concerned with actual parse text within the sample file, instead we
+ * assert that the rel-tags we expect are found in the WebPage metadata. To
+ * check the parser is working as expected we unwrap the ByteBuffer obtained
+ * from metadata, the same type as we use in expected (String). So just the
  * other way around as we wrapped the metadata value.
  * 
  * @author lewismc
- *
+ * 
  */
 public class TestRelTagParser {
 
@@ -58,14 +58,15 @@
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/microformats-reltag/build.xml during plugin compilation.
   private String sampleFile = "microformats_reltag_test.html";
-  
+
   // rel-tag's we expect to be extracted from page.getMetadata()
   private String expectedRelTags = "Category:Specifications	Category:rel-tag	";
-  
+
   private Configuration conf;
-  
+
   @Test
-  public void testRelTagParser() throws ParseException, ProtocolException, IOException {
+  public void testRelTagParser() throws ParseException, ProtocolException,
+      IOException {
     conf = NutchConfiguration.create();
     conf.set("file.content.limit", "-1");
     @SuppressWarnings("unused")
@@ -85,14 +86,14 @@
     String mtype = mimeutil.getMimeType(file);
     page.setContentType(new Utf8(mtype));
     parse = new ParseUtil(conf).parse(urlString, page);
-    //begin assertion for tests
+    // begin assertion for tests
     ByteBuffer bbuf = page.getMetadata().get(new Utf8("Rel-Tag"));
     byte[] byteArray = new byte[bbuf.remaining()];
     bbuf.get(byteArray);
     String s = new String(byteArray);
-    //bbuf.flip();
-    assertEquals("We expect 2 tab-separated rel-tag's extracted by the filter", 
-      expectedRelTags, s);
+    // bbuf.flip();
+    assertEquals("We expect 2 tab-separated rel-tag's extracted by the filter",
+        expectedRelTags, s);
   }
-  
+
 }
\ No newline at end of file
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(revision 1650444)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-public class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+public class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack<Element> m_elemStack = new Stack<Element>();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  public DOMBuilder(Document doc, Node node)
-  {
+  public DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  public DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  public DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  public DOMBuilder(Document doc)
-  {
+  public DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  public Node getRootNode()
-  {
+  public Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  public Node getCurrentNode()
-  {
+  public Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  public java.io.Writer getWriter()
-  {
+  public java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,31 +168,34 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
@@ -210,13 +202,13 @@
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
@@ -223,15 +215,15 @@
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
@@ -238,53 +230,56 @@
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
@@ -291,20 +286,21 @@
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while in DOM
+          // they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,27 +319,32 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
     m_currentNode = m_elemStack.isEmpty() ? null : m_elemStack.peek();
   }
@@ -352,12 +351,13 @@
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
@@ -364,35 +364,42 @@
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,63 +523,69 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
@@ -572,70 +592,79 @@
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
@@ -642,11 +671,10 @@
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
@@ -653,88 +681,86 @@
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(revision 1650444)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(working copy)
@@ -33,9 +33,9 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 public class DOMContentUtils {
 
@@ -42,25 +42,25 @@
   public static class LinkParams {
     public String elName;
     public String attrName;
-      public int childLen;
-      
-      public LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-      public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    public int childLen;
+
+    public LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
+
   private HashMap<String, LinkParams> linkParams = new HashMap<String, LinkParams>();
-  
+
   public DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -81,39 +81,38 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuilder} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuilder</code>.
-   *
+   * This method takes a {@link StringBuilder} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuilder</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  public boolean getText(StringBuilder sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  public boolean getText(StringBuilder sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   public void getText(StringBuilder sb, Node node) {
@@ -120,20 +119,19 @@
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuilder sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuilder sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -145,7 +143,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -156,36 +154,37 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   public boolean getTitle(StringBuilder sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
@@ -192,8 +191,8 @@
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -201,28 +200,29 @@
   public URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -233,10 +233,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -245,27 +244,28 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
@@ -272,23 +272,21 @@
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -296,57 +294,54 @@
 
     return false;
   }
-  
+
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, 
-                                       Node node) {
-    
+  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
         LinkParams params = linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuilder linkText = new StringBuilder();
             getText(linkText, currentNode, true);
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
@@ -354,14 +349,15 @@
               try {
 
                 URL url = URLUtil.resolveURL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
@@ -368,4 +364,3 @@
   }
 
 }
-
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(revision 1650444)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,31 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 public class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  public static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +62,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +73,43 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
             metaTags.getGeneralTags().add(name, contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,7 +122,7 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
@@ -131,7 +129,8 @@
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -142,9 +141,11 @@
               URL refreshUrl = null;
               if (metaTags.getRefresh() && idx != -1) { // set the URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry with just the
+                                 // url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
@@ -187,13 +188,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 1650444)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -61,23 +61,23 @@
 import org.xml.sax.SAXException;
 
 public class HtmlParser implements Parser {
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.html");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.html");
 
-  // I used 1000 bytes at first, but  found that some documents have
+  // I used 1000 bytes at first, but found that some documents have
   // meta tag well past the first 1000 bytes.
   // (e.g. http://cn.promo.yahoo.com/customcare/music.html)
   private static final int CHUNK_SIZE = 2000;
 
   // NUTCH-1006 Meta equiv with single quotes not accepted
-  private static Pattern metaPattern =
-    Pattern.compile("<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
-        Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPattern =
-    Pattern.compile("charset=\\s*([a-z][_\\-0-9a-z]*)",
-        Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPatternHTML5 =
-		  Pattern.compile("<meta\\s+charset\\s*=\\s*[\"']?([a-z][_\\-0-9a-z]*)[^>]*>",
-				  Pattern.CASE_INSENSITIVE);
+  private static Pattern metaPattern = Pattern.compile(
+      "<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
+      Pattern.CASE_INSENSITIVE);
+  private static Pattern charsetPattern = Pattern.compile(
+      "charset=\\s*([a-z][_\\-0-9a-z]*)", Pattern.CASE_INSENSITIVE);
+  private static Pattern charsetPatternHTML5 = Pattern.compile(
+      "<meta\\s+charset\\s*=\\s*[\"']?([a-z][_\\-0-9a-z]*)[^>]*>",
+      Pattern.CASE_INSENSITIVE);
 
   private static Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
@@ -89,19 +89,19 @@
 
   /**
    * Given a <code>ByteBuffer</code> representing an html file of an
-   * <em>unknown</em> encoding,  read out 'charset' parameter in the meta tag   
-   * from the first <code>CHUNK_SIZE</code> bytes.
-   * If there's no meta tag for Content-Type or no charset is specified,
-   * the content is checked for a Unicode Byte Order Mark (BOM).
-   * This will also cover non-byte oriented character encodings (UTF-16 only).
-   * If no character set can be determined,
-   * <code>null</code> is returned.  <br />
-   * See also http://www.w3.org/International/questions/qa-html-encoding-declarations,
+   * <em>unknown</em> encoding, read out 'charset' parameter in the meta tag
+   * from the first <code>CHUNK_SIZE</code> bytes. If there's no meta tag for
+   * Content-Type or no charset is specified, the content is checked for a
+   * Unicode Byte Order Mark (BOM). This will also cover non-byte oriented
+   * character encodings (UTF-16 only). If no character set can be determined,
+   * <code>null</code> is returned. <br />
+   * See also
+   * http://www.w3.org/International/questions/qa-html-encoding-declarations,
    * http://www.w3.org/TR/2011/WD-html5-diff-20110405/#character-encoding, and
-   * http://www.w3.org/TR/REC-xml/#sec-guessing
-   * <br />
-   *
-   * @param content <code>ByteBuffer</code> representation of an html file
+   * http://www.w3.org/TR/REC-xml/#sec-guessing <br />
+   * 
+   * @param content
+   *          <code>ByteBuffer</code> representation of an html file
    */
 
   private static String sniffCharacterEncoding(ByteBuffer content) {
@@ -113,8 +113,8 @@
     // {U+0041, U+0082, U+00B7}.
     String str = "";
     try {
-      str = new String(content.array(), content.arrayOffset() + content.position(),
-          length, Charset.forName("ASCII").toString());
+      str = new String(content.array(), content.arrayOffset()
+          + content.position(), length, Charset.forName("ASCII").toString());
     } catch (UnsupportedEncodingException e) {
       // code should never come here, but just in case...
       return null;
@@ -136,17 +136,14 @@
     }
     if (encoding == null) {
       // check for BOM
-    	if (length >= 3
-          && content.get(0) == (byte) 0xEF
-          && content.get(1) == (byte) 0xBB
-          && content.get(2) == (byte) 0xBF) {
+      if (length >= 3 && content.get(0) == (byte) 0xEF
+          && content.get(1) == (byte) 0xBB && content.get(2) == (byte) 0xBF) {
         encoding = "UTF-8";
       } else if (length >= 2) {
-        if (content.get(0) == (byte)0xFF
-            && content.get(1) == (byte)0xFE) {
+        if (content.get(0) == (byte) 0xFF && content.get(1) == (byte) 0xFE) {
           encoding = "UTF-16LE";
-        } else if (content.get(0) == (byte)0xFE
-            && content.get(1) == (byte)0xFF) {
+        } else if (content.get(0) == (byte) 0xFE
+            && content.get(1) == (byte) 0xFF) {
           encoding = "UTF-16BE";
         }
       }
@@ -184,8 +181,9 @@
     DocumentFragment root;
     try {
       ByteBuffer contentInOctets = page.getContent();
-      InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets.array(),
-          contentInOctets.arrayOffset() + contentInOctets.position(), contentInOctets.remaining()));
+      InputSource input = new InputSource(new ByteArrayInputStream(
+          contentInOctets.array(), contentInOctets.arrayOffset()
+              + contentInOctets.position(), contentInOctets.remaining()));
 
       EncodingDetector detector = new EncodingDetector(conf);
       detector.autoDetectClues(page, true);
@@ -192,11 +190,15 @@
       detector.addClue(sniffCharacterEncoding(contentInOctets), "sniffed");
       String encoding = detector.guessEncoding(page, defaultCharEncoding);
 
-      page.getMetadata().put(new Utf8(Metadata.ORIGINAL_CHAR_ENCODING), ByteBuffer.wrap(Bytes.toBytes(encoding)));
-      page.getMetadata().put(new Utf8(Metadata.CHAR_ENCODING_FOR_CONVERSION), ByteBuffer.wrap(Bytes.toBytes(encoding)));
+      page.getMetadata().put(new Utf8(Metadata.ORIGINAL_CHAR_ENCODING),
+          ByteBuffer.wrap(Bytes.toBytes(encoding)));
+      page.getMetadata().put(new Utf8(Metadata.CHAR_ENCODING_FOR_CONVERSION),
+          ByteBuffer.wrap(Bytes.toBytes(encoding)));
 
       input.setEncoding(encoding);
-      if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Parsing...");
+      }
       root = parse(input);
     } catch (IOException e) {
       LOG.error("Failed with the following IOException: ", e);
@@ -218,40 +220,47 @@
       LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
     }
     // check meta directives
-    if (!metaTags.getNoIndex()) {               // okay to index
+    if (!metaTags.getNoIndex()) { // okay to index
       StringBuilder sb = new StringBuilder();
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); }
-      utils.getText(sb, root);          // extract text
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting text...");
+      }
+      utils.getText(sb, root); // extract text
       text = sb.toString();
       sb.setLength(0);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); }
-      utils.getTitle(sb, root);         // extract title
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting title...");
+      }
+      utils.getTitle(sb, root); // extract title
       title = sb.toString().trim();
     }
 
-    if (!metaTags.getNoFollow()) {              // okay to follow links
-      ArrayList<Outlink> l = new ArrayList<Outlink>();   // extract outlinks
+    if (!metaTags.getNoFollow()) { // okay to follow links
+      ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks
       URL baseTag = utils.getBase(root);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); }
-      utils.getOutlinks(baseTag!=null?baseTag:base, l, root);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting links...");
+      }
+      utils.getOutlinks(baseTag != null ? baseTag : base, l, root);
       outlinks = l.toArray(new Outlink[l.size()]);
       if (LOG.isTraceEnabled()) {
-        LOG.trace("found "+outlinks.length+" outlinks in "+ url);
+        LOG.trace("found " + outlinks.length + " outlinks in " + url);
       }
     }
 
     ParseStatus status = ParseStatus.newBuilder().build();
-    status.setMajorCode((int)ParseStatusCodes.SUCCESS);
+    status.setMajorCode((int) ParseStatusCodes.SUCCESS);
     if (metaTags.getRefresh()) {
-      status.setMinorCode((int)ParseStatusCodes.SUCCESS_REDIRECT);
+      status.setMinorCode((int) ParseStatusCodes.SUCCESS_REDIRECT);
       status.getArgs().add(new Utf8(metaTags.getRefreshHref().toString()));
-      status.getArgs().add(new Utf8(Integer.toString(metaTags.getRefreshTime())));
+      status.getArgs().add(
+          new Utf8(Integer.toString(metaTags.getRefreshTime())));
     }
 
     Parse parse = new Parse(text, title, outlinks, status);
     parse = htmlParseFilters.filter(url, page, parse, metaTags, root);
 
-    if (metaTags.getNoCache()) {             // not okay to cache
+    if (metaTags.getNoCache()) { // not okay to cache
       page.getMetadata().put(new Utf8(Nutch.CACHING_FORBIDDEN_KEY),
           ByteBuffer.wrap(Bytes.toBytes(cachingPolicy)));
     }
@@ -262,7 +271,8 @@
   private DocumentFragment parse(InputSource input) throws Exception {
     if (parserImpl.equalsIgnoreCase("tagsoup"))
       return parseTagSoup(input);
-    else return parseNeko(input);
+    else
+      return parseNeko(input);
   }
 
   private DocumentFragment parseTagSoup(InputSource input) throws Exception {
@@ -273,7 +283,8 @@
     reader.setContentHandler(builder);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
-    reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
+    reader
+        .setProperty("http://xml.org/sax/properties/lexical-handler", builder);
     reader.parse(input);
     return frag;
   }
@@ -281,21 +292,30 @@
   private DocumentFragment parseNeko(InputSource input) throws Exception {
     DOMFragmentParser parser = new DOMFragmentParser();
     try {
-      parser.setFeature("http://cyberneko.org/html/features/scanner/allow-selfclosing-iframe",
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/scanner/allow-selfclosing-iframe",
               true);
       parser.setFeature("http://cyberneko.org/html/features/augmentations",
           true);
-      parser.setProperty("http://cyberneko.org/html/properties/default-encoding",
+      parser.setProperty(
+          "http://cyberneko.org/html/properties/default-encoding",
           defaultCharEncoding);
-      parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset",
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/scanner/ignore-specified-charset",
+              true);
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
+              false);
+      parser.setFeature(
+          "http://cyberneko.org/html/features/balance-tags/document-fragment",
           true);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
-          false);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",
-          true);
       parser.setFeature("http://cyberneko.org/html/features/report-errors",
           LOG.isTraceEnabled());
-    } catch (SAXException e) {}
+    } catch (SAXException e) {
+    }
     // convert Document to DocumentFragment
     HTMLDocumentImpl doc = new HTMLDocumentImpl();
     doc.setErrorChecking(false);
@@ -305,18 +325,21 @@
     res.appendChild(frag);
 
     try {
-      while(true) {
+      while (true) {
         frag = doc.createDocumentFragment();
         parser.parse(input, frag);
-        if (!frag.hasChildNodes()) break;
+        if (!frag.hasChildNodes())
+          break;
         if (LOG.isInfoEnabled()) {
-          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
+          LOG.info(" - new frag, " + frag.getChildNodes().getLength()
+              + " nodes.");
         }
         res.appendChild(frag);
       }
-    } catch (Exception x) { 
+    } catch (Exception x) {
       LOG.error("Failed with the following Exception: ", x);
-      };
+    }
+    ;
     return res;
   }
 
@@ -341,11 +364,11 @@
   }
 
   public static void main(String[] args) throws Exception {
-    //LOG.setLevel(Level.FINE);
+    // LOG.setLevel(Level.FINE);
     String name = args[0];
-    String url = "file:"+name;
+    String url = "file:" + name;
     File file = new File(name);
-    byte[] bytes = new byte[(int)file.length()];
+    byte[] bytes = new byte[(int) file.length()];
     DataInputStream in = new DataInputStream(new FileInputStream(file));
     in.readFully(bytes);
     Configuration conf = NutchConfiguration.create();
@@ -356,8 +379,8 @@
     page.setContent(ByteBuffer.wrap(bytes));
     page.setContentType(new Utf8("text/html"));
     Parse parse = parser.getParse(url, page);
-    System.out.println("title: "+parse.getTitle());
-    System.out.println("text: "+parse.getText());
+    System.out.println("title: " + parse.getTitle());
+    System.out.println("text: " + parse.getText());
     System.out.println("outlinks: " + Arrays.toString(parse.getOutlinks()));
 
   }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(revision 1650444)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.html;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-public class XMLCharacterRecognizer
-{
+public class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  public static boolean isWhiteSpace(char ch)
-  {
+  public static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  public static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  public static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,17 +71,16 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(StringBuffer buf)
-  {
+  public static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
@@ -86,22 +87,20 @@
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(String s)
-  {
+  public static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(revision 1650444)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(working copy)
@@ -36,284 +36,211 @@
 import org.junit.Test;
 import static org.junit.Assert.*;
 
-/** 
+/**
  * Unit tests for DOMContentUtils.
  */
 public class TestDOMContentUtils {
 
-  private static final String[] testPages= { 
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"http://www.nutch.org\">"
-               + " anchor </a><!--comment-->"
-               + "</body></html>"),
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"/\">"
-               + " home </a><!--comment-->"
-               + "<style> style </style>"
-               + " <a href=\"bot.html\">"
-               + " bots </a>"
-               + "</body></html>"),
-    new String("<html><head><title> </title>"
-               + "</head><body> "
-               + "<a href=\"/\"> separate this "
-               + "<a href=\"ok\"> from this"
-               + "</a></a>"
-               + "</body></html>"),
-    // this one relies on certain neko fixup behavior, possibly
-    // distributing the anchors into the LI's-but not the other
-    // anchors (outside of them, instead)!  So you get a tree that
-    // looks like:
-    // ... <li> <a href=/> home </a> </li>
-    //     <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-    //     <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-    new String("<html><head><title> my title </title>"
-               + "</head><body> body "
-               + "<ul>"
-               + "<li> <a href=\"/\"> home"
-               + "<li> <a href=\"1\"> 1"
-               + "<li> <a href=\"2\"> 2"
-               + "</ul>"
-               + "</body></html>"),
-    // test frameset link extraction. The invalid frame in the middle will be
-    // fixed to a third standalone frame.
-    new String("<html><head><title> my title </title>"
-               + "</head><frameset rows=\"20,*\"> "
-               + "<frame src=\"top.html\">"
-               + "</frame>"
-               + "<frameset cols=\"20,*\">"
-               + "<frame src=\"left.html\">"
-               + "<frame src=\"invalid.html\"/>"
-               + "</frame>"
-               + "<frame src=\"right.html\">"
-               + "</frame>"
-               + "</frameset>"
-               + "</frameset>"
-               + "</body></html>"),
-    // test <area> and <iframe> link extraction + url normalization
-    new String("<html><head><title> my title </title>"
-               + "</head><body>"
-               + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-			   + "<map name=\"green\">"
-			   + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-			   + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-			   + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-			   + "</map>"
-               + "<a name=\"bottom\"/><h1> the bottom </h1> "
-               + "<iframe src=\"../docs/index.html\"/>"
-               + "</body></html>"),
-    // test whitespace processing for plain text extraction
-    new String("<html><head>\n <title> my\t\n  title\r\n </title>\n"
-               + " </head>\n"
-               + " <body>\n"
-               + "    <h1> Whitespace\ttest  </h1> \n"
-               + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-               + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-               + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-               + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-               + "<table>"
-               + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-               + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-               + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-               + "</table>put some text here<Br>and there."
-               + "<h2>End\tthis\rmadness\n!</h2>\r\n"
-               + "         .        .        .         ."
-               + "</body>  </html>"),
+  private static final String[] testPages = {
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"http://www.nutch.org\">"
+          + " anchor </a><!--comment-->" + "</body></html>"),
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"/\">" + " home </a><!--comment-->"
+          + "<style> style </style>" + " <a href=\"bot.html\">" + " bots </a>"
+          + "</body></html>"),
+      new String("<html><head><title> </title>" + "</head><body> "
+          + "<a href=\"/\"> separate this " + "<a href=\"ok\"> from this"
+          + "</a></a>" + "</body></html>"),
+      // this one relies on certain neko fixup behavior, possibly
+      // distributing the anchors into the LI's-but not the other
+      // anchors (outside of them, instead)! So you get a tree that
+      // looks like:
+      // ... <li> <a href=/> home </a> </li>
+      // <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+      // <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+      new String("<html><head><title> my title </title>"
+          + "</head><body> body " + "<ul>" + "<li> <a href=\"/\"> home"
+          + "<li> <a href=\"1\"> 1" + "<li> <a href=\"2\"> 2" + "</ul>"
+          + "</body></html>"),
+      // test frameset link extraction. The invalid frame in the middle will be
+      // fixed to a third standalone frame.
+      new String("<html><head><title> my title </title>"
+          + "</head><frameset rows=\"20,*\"> " + "<frame src=\"top.html\">"
+          + "</frame>" + "<frameset cols=\"20,*\">"
+          + "<frame src=\"left.html\">" + "<frame src=\"invalid.html\"/>"
+          + "</frame>" + "<frame src=\"right.html\">" + "</frame>"
+          + "</frameset>" + "</frameset>" + "</body></html>"),
+      // test <area> and <iframe> link extraction + url normalization
+      new String(
+          "<html><head><title> my title </title>"
+              + "</head><body>"
+              + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+              + "<map name=\"green\">"
+              + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+              + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+              + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+              + "</map>" + "<a name=\"bottom\"/><h1> the bottom </h1> "
+              + "<iframe src=\"../docs/index.html\"/>" + "</body></html>"),
+      // test whitespace processing for plain text extraction
+      new String(
+          "<html><head>\n <title> my\t\n  title\r\n </title>\n"
+              + " </head>\n"
+              + " <body>\n"
+              + "    <h1> Whitespace\ttest  </h1> \n"
+              + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+              + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+              + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+              + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+              + "<table>"
+              + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+              + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+              + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+              + "</table>put some text here<Br>and there."
+              + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+              + "         .        .        .         ." + "</body>  </html>"),
 
-    // test that <a rel=nofollow> links are not returned
-    new String("<html><head></head><body>"
-               + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-               + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-               + "</body></html>"),
-    // test that POST form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    // test that all form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    new String("<html><head><title> title </title>"
-      + "</head><body>"
-      + "<a href=\";x\">anchor1</a>"
-      + "<a href=\"g;x\">anchor2</a>"
-      + "<a href=\"g;x?y#s\">anchor3</a>"
-      + "</body></html>"),  
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\">anchor1</a>"
-        + "<a href=\"g?y#s\">anchor2</a>"
-        + "<a href=\"?y=1\">anchor3</a>"
-        + "<a href=\"?y=1#s\">anchor4</a>"
-        + "<a href=\"?y=1;somethingelse\">anchor5</a>"
-        + "</body></html>"), 
-  };
-  
+      // test that <a rel=nofollow> links are not returned
+      new String("<html><head></head><body>"
+          + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+          + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+          + "</body></html>"),
+      // test that POST form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // test that all form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\";x\">anchor1</a>" + "<a href=\"g;x\">anchor2</a>"
+          + "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\">anchor1</a>" + "<a href=\"g?y#s\">anchor2</a>"
+          + "<a href=\"?y=1\">anchor3</a>" + "<a href=\"?y=1#s\">anchor4</a>"
+          + "<a href=\"?y=1;somethingelse\">anchor5</a>" + "</body></html>"), };
+
   private static int SKIP = 9;
 
-  private static String[] testBaseHrefs= {
-    "http://www.nutch.org",     
-    "http://www.nutch.org/docs/foo.html",     
-    "http://www.nutch.org/docs/",     
-    "http://www.nutch.org/docs/",
-    "http://www.nutch.org/frames/",     
-    "http://www.nutch.org/maps/",
-    "http://www.nutch.org/whitespace/",
-    "http://www.nutch.org//",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/;something"
-  };
-    
-  private static final DocumentFragment testDOMs[]=
-    new DocumentFragment[testPages.length];
+  private static String[] testBaseHrefs = { "http://www.nutch.org",
+      "http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
+      "http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
+      "http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
+      "http://www.nutch.org//", "http://www.nutch.org/",
+      "http://www.nutch.org/", "http://www.nutch.org/",
+      "http://www.nutch.org/;something" };
 
-  private static URL[] testBaseHrefURLs= 
-    new URL[testPages.length];
+  private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
 
+  private static URL[] testBaseHrefURLs = new URL[testPages.length];
 
-  private static final String[] answerText= {
-    "title body anchor",
-    "title body home bots",
-    "separate this from this",
-    "my title body home 1 2",
-    "my title",
-    "my title the bottom",
-    "my title Whitespace test whitespace test "
-        + "This is a whitespace test . Newlines should appear as space too. "
-        + "Tabs are spaces too. This is a break -> and the line after break . "
-        + "one two three space here space there no space "
-        + "one two two three three four put some text here and there. "
-        + "End this madness ! . . . .",
-    "ignore ignore",
-    "test1 test2",
-    "test1 test2",
-    "title anchor1 anchor2 anchor3",
-    "title anchor1 anchor2 anchor3 anchor4 anchor5"
-  };
+  private static final String[] answerText = {
+      "title body anchor",
+      "title body home bots",
+      "separate this from this",
+      "my title body home 1 2",
+      "my title",
+      "my title the bottom",
+      "my title Whitespace test whitespace test "
+          + "This is a whitespace test . Newlines should appear as space too. "
+          + "Tabs are spaces too. This is a break -> and the line after break . "
+          + "one two three space here space there no space "
+          + "one two two three three four put some text here and there. "
+          + "End this madness ! . . . .", "ignore ignore", "test1 test2",
+      "test1 test2", "title anchor1 anchor2 anchor3",
+      "title anchor1 anchor2 anchor3 anchor4 anchor5" };
 
-  private static final String[] answerTitle= {
-    "title",
-    "title",
-    "",
-    "my title",
-    "my title",
-    "my title",
-    "my title",
-    "",
-    "",
-    "",
-    "title",
-    "title"
-  };
+  private static final String[] answerTitle = { "title", "title", "",
+      "my title", "my title", "my title", "my title", "", "", "", "title",
+      "title" };
 
   // note: should be in page-order
   private static Outlink[][] answerOutlinks;
-  
+
   private static Configuration conf;
   private static DOMContentUtils utils = null;
-  
+
   @Before
   public void setup() {
     conf = NutchConfiguration.create();
     conf.setBoolean("parser.html.form.use_action", true);
     utils = new DOMContentUtils(conf);
-    DOMFragmentParser parser= new DOMFragmentParser();
+    DOMFragmentParser parser = new DOMFragmentParser();
     try {
-      parser.setFeature(
-          "http://cyberneko.org/html/features/scanner/allow-selfclosing-iframe",
-          true);
-    } catch (SAXException e) {}
-    for (int i= 0; i < testPages.length; i++) {
-        DocumentFragment node= 
-          new HTMLDocumentImpl().createDocumentFragment();
-        try {
-          parser.parse(
-            new InputSource( 
-              new ByteArrayInputStream(testPages[i].getBytes()) ),
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/scanner/allow-selfclosing-iframe",
+              true);
+    } catch (SAXException e) {
+    }
+    for (int i = 0; i < testPages.length; i++) {
+      DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
+      try {
+        parser.parse(
+            new InputSource(new ByteArrayInputStream(testPages[i].getBytes())),
             node);
-          testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
-        } catch (Exception e) {
-          assertTrue("caught exception: " + e, false);
-        } 
-      testDOMs[i]= node;
+        testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        assertTrue("caught exception: " + e, false);
+      }
+      testDOMs[i] = node;
     }
     try {
-    answerOutlinks = new Outlink[][]{ 
-        {
-          new Outlink("http://www.nutch.org", "anchor"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "home"),
-          new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "separate this"),
-          new Outlink("http://www.nutch.org/docs/ok", "from this"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "home"),
-          new Outlink("http://www.nutch.org/docs/1", "1"),
-          new Outlink("http://www.nutch.org/docs/2", "2"),
-        },
-        {
-          new Outlink("http://www.nutch.org/frames/top.html", ""),
-          new Outlink("http://www.nutch.org/frames/left.html", ""),
-          new Outlink("http://www.nutch.org/frames/invalid.html", ""),
-          new Outlink("http://www.nutch.org/frames/right.html", ""),
-        },
-        {
-          new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-          new Outlink("http://www.nutch.org/index.html", ""),
-          new Outlink("http://www.nutch.org/maps/#bottom", ""),
-          new Outlink("http://www.nutch.org/bot.html", ""),
-          new Outlink("http://www.nutch.org/docs/index.html", ""),
-        },
-        {
-          new Outlink("http://www.nutch.org/index.html", "whitespace test"),
-        },
-        {
-        },
-        {
-          new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
-        },
-        {
-        },
-        {
-          new Outlink("http://www.nutch.org/;x", "anchor1"),
-          new Outlink("http://www.nutch.org/g;x", "anchor2"),
-          new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
-        },
-        {
-          // this is tricky - see RFC3986 section 5.4.1 example 7
-          new Outlink("http://www.nutch.org/g", "anchor1"),
-          new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
-          new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-          new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-          new Outlink("http://www.nutch.org/;something?y=1;somethingelse", "anchor5")
-        }
-    };
+      answerOutlinks = new Outlink[][] {
+          { new Outlink("http://www.nutch.org", "anchor"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/bot.html", "bots"), },
+          { new Outlink("http://www.nutch.org/", "separate this"),
+              new Outlink("http://www.nutch.org/docs/ok", "from this"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/1", "1"),
+              new Outlink("http://www.nutch.org/docs/2", "2"), },
+          { new Outlink("http://www.nutch.org/frames/top.html", ""),
+              new Outlink("http://www.nutch.org/frames/left.html", ""),
+              new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+              new Outlink("http://www.nutch.org/frames/right.html", ""), },
+          { new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+              new Outlink("http://www.nutch.org/index.html", ""),
+              new Outlink("http://www.nutch.org/maps/#bottom", ""),
+              new Outlink("http://www.nutch.org/bot.html", ""),
+              new Outlink("http://www.nutch.org/docs/index.html", ""), },
+          { new Outlink("http://www.nutch.org/index.html", "whitespace test"), },
+          {},
+          { new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
+          {},
+          { new Outlink("http://www.nutch.org/;x", "anchor1"),
+              new Outlink("http://www.nutch.org/g;x", "anchor2"),
+              new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") },
+          {
+              // this is tricky - see RFC3986 section 5.4.1 example 7
+              new Outlink("http://www.nutch.org/g", "anchor1"),
+              new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
+              new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+              new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+              new Outlink("http://www.nutch.org/;something?y=1;somethingelse",
+                  "anchor5") } };
 
     } catch (MalformedURLException e) {
-        
+
+    }
   }
-  }
 
   private static boolean equalsIgnoreWhitespace(String s1, String s2) {
-    StringTokenizer st1= new StringTokenizer(s1);
-    StringTokenizer st2= new StringTokenizer(s2);
+    StringTokenizer st1 = new StringTokenizer(s1);
+    StringTokenizer st2 = new StringTokenizer(s2);
 
     while (st1.hasMoreTokens()) {
-      if (!st2.hasMoreTokens()) 
+      if (!st2.hasMoreTokens())
         return false;
-      if ( ! st1.nextToken().equals(st2.nextToken()) )
+      if (!st1.nextToken().equals(st2.nextToken()))
         return false;
     }
-    if (st2.hasMoreTokens()) 
+    if (st2.hasMoreTokens())
       return false;
     return true;
   }
@@ -320,42 +247,42 @@
 
   @Test
   public void testGetText() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuilder sb= new StringBuilder();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuilder sb = new StringBuilder();
       utils.getText(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerText[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerText[i], text));
     }
   }
 
   @Test
   public void testGetTitle() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuilder sb= new StringBuilder();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuilder sb = new StringBuilder();
       utils.getTitle(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerTitle[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerTitle[i], text));
     }
   }
 
   @Test
   public void testGetOutlinks() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      ArrayList<Outlink> outlinks= new ArrayList<Outlink>();
+    for (int i = 0; i < testPages.length; i++) {
+      ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
       if (i == SKIP) {
         conf.setBoolean("parser.html.form.use_action", false);
         utils.setConf(conf);
@@ -364,14 +291,14 @@
         utils.setConf(conf);
       }
       utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
-      Outlink[] outlinkArr= new Outlink[outlinks.size()];
-      outlinkArr= outlinks.toArray(outlinkArr);
+      Outlink[] outlinkArr = new Outlink[outlinks.size()];
+      outlinkArr = outlinks.toArray(outlinkArr);
       compareOutlinks(answerOutlinks[i], outlinkArr);
     }
   }
 
   private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
-    for (int i= 0; i < o.length; i++) {
+    for (int i = 0; i < o.length; i++) {
       sb.append(o[i].toString());
       sb.append(System.getProperty("line.separator"));
     }
@@ -378,7 +305,7 @@
   }
 
   private static final String outlinksString(Outlink[] o) {
-    StringBuffer sb= new StringBuffer();
+    StringBuffer sb = new StringBuffer();
     appendOutlinks(sb, o);
     return sb.toString();
   }
@@ -385,31 +312,27 @@
 
   private static final void compareOutlinks(Outlink[] o1, Outlink[] o2) {
     if (o1.length != o2.length) {
-      assertTrue("got wrong number of outlinks (expecting " + o1.length 
-                 + ", got " + o2.length + ")" 
-                 + System.getProperty("line.separator") 
-                 + "answer: " + System.getProperty("line.separator") 
-                 + outlinksString(o1) 
-                 + System.getProperty("line.separator") 
-                 + "got: " + System.getProperty("line.separator") 
-                 + outlinksString(o2)
-                 + System.getProperty("line.separator"),
-                 false
-        );
+      assertTrue(
+          "got wrong number of outlinks (expecting " + o1.length + ", got "
+              + o2.length + ")" + System.getProperty("line.separator")
+              + "answer: " + System.getProperty("line.separator")
+              + outlinksString(o1) + System.getProperty("line.separator")
+              + "got: " + System.getProperty("line.separator")
+              + outlinksString(o2) + System.getProperty("line.separator"),
+          false);
     }
 
-    for (int i= 0; i < o1.length; i++) {
+    for (int i = 0; i < o1.length; i++) {
       if (!o1[i].equals(o2[i])) {
-        assertTrue("got wrong outlinks at position " + i
-                   + System.getProperty("line.separator") 
-                   + "answer: " + System.getProperty("line.separator") 
-                   + "'" + o1[i].getToUrl() + "', anchor: '" + o1[i].getAnchor() + "'"
-                   + System.getProperty("line.separator") 
-                   + "got: " + System.getProperty("line.separator") 
-                   + "'" + o2[i].getToUrl() + "', anchor: '" + o2[i].getAnchor() + "'",
-                   false
-          );
-        
+        assertTrue(
+            "got wrong outlinks at position " + i
+                + System.getProperty("line.separator") + "answer: "
+                + System.getProperty("line.separator") + "'" + o1[i].getToUrl()
+                + "', anchor: '" + o1[i].getAnchor() + "'"
+                + System.getProperty("line.separator") + "got: "
+                + System.getProperty("line.separator") + "'" + o2[i].getToUrl()
+                + "', anchor: '" + o2[i].getAnchor() + "'", false);
+
       }
     }
   }
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java	(revision 1650444)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java	(working copy)
@@ -36,71 +36,55 @@
 
 public class TestHtmlParser {
 
-  public static final Logger LOG = LoggerFactory.getLogger(TestHtmlParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(TestHtmlParser.class);
 
-  private static final String encodingTestKeywords = 
-      "français, español, русский язык, čeština, ελληνικά";
-  private static final String encodingTestBody =
-      "<ul>\n  <li>français\n  <li>español\n  <li>русский язык\n  <li>čeština\n  <li>ελληνικά\n</ul>";
-  private static final String encodingTestContent =
-      "<title>" + encodingTestKeywords + "</title>\n"
-          + "<meta name=\"keywords\" content=\"" + encodingTestKeywords + "</meta>\n"
-          + "</head>\n<body>" + encodingTestBody + "</body>\n</html>";
+  private static final String encodingTestKeywords = "français, español, русский язык, čeština, ελληνικά";
+  private static final String encodingTestBody = "<ul>\n  <li>français\n  <li>español\n  <li>русский язык\n  <li>čeština\n  <li>ελληνικά\n</ul>";
+  private static final String encodingTestContent = "<title>"
+      + encodingTestKeywords + "</title>\n"
+      + "<meta name=\"keywords\" content=\"" + encodingTestKeywords
+      + "</meta>\n" + "</head>\n<body>" + encodingTestBody + "</body>\n</html>";
 
-  private static String[][] encodingTestPages= {
-    { 
-      "HTML4, utf-8, meta http-equiv, no quotes",
-      "utf-8",
-      "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
-          + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
-          + "<html>\n<head>\n"
-          + "<meta http-equiv=Content-Type content=\"text/html; charset=utf-8\" />"
-          + encodingTestContent
-    },
-    { 
-      "HTML4, utf-8, meta http-equiv, single quotes",
-      "utf-8",
-      "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
-          + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
-          + "<html>\n<head>\n"
-          + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />"
-          + encodingTestContent
-    },
-    { 
-      "XHTML, utf-8, meta http-equiv, double quotes",
-      "utf-8",
-      "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
-          + "<html>\n<head>\n"
-          + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
-          + encodingTestContent
-    },
-    { 
-      "HTML5, utf-8, meta charset",
-      "utf-8",
-      "<!DOCTYPE html>\n<html>\n<head>\n"
-          + "<meta charset=\"utf-8\">"
-          + encodingTestContent
-    },
-    { 
-      "HTML5, utf-8, BOM",
-      "utf-8",
-      "\ufeff<!DOCTYPE html>\n<html>\n<head>\n"
-          + encodingTestContent
-    },
-    { 
-      "HTML5, utf-16, BOM",
-      "utf-16",
-      "\ufeff<!DOCTYPE html>\n<html>\n<head>\n"
-          + encodingTestContent
-    }
-  };
-  
+  private static String[][] encodingTestPages = {
+      {
+          "HTML4, utf-8, meta http-equiv, no quotes",
+          "utf-8",
+          "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+              + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=Content-Type content=\"text/html; charset=utf-8\" />"
+              + encodingTestContent },
+      {
+          "HTML4, utf-8, meta http-equiv, single quotes",
+          "utf-8",
+          "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+              + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />"
+              + encodingTestContent },
+      {
+          "XHTML, utf-8, meta http-equiv, double quotes",
+          "utf-8",
+          "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
+              + encodingTestContent },
+      {
+          "HTML5, utf-8, meta charset",
+          "utf-8",
+          "<!DOCTYPE html>\n<html>\n<head>\n" + "<meta charset=\"utf-8\">"
+              + encodingTestContent },
+      { "HTML5, utf-8, BOM", "utf-8",
+          "\ufeff<!DOCTYPE html>\n<html>\n<head>\n" + encodingTestContent },
+      { "HTML5, utf-16, BOM", "utf-16",
+          "\ufeff<!DOCTYPE html>\n<html>\n<head>\n" + encodingTestContent } };
+
   private Configuration conf;
   private Parser parser;
-  
+
   private static final String dummyUrl = "http://dummy.url/";
 
-  
   @Before
   public void setup() {
     conf = NutchConfiguration.create();
@@ -115,12 +99,11 @@
     page.setContentType(new Utf8("text/html"));
     return page;
   }
-  
+
   protected Parse parse(WebPage page) {
     return parser.getParse(dummyUrl, page);
   }
 
-
   @Test
   public void testEncodingDetection() {
     for (String[] testPage : encodingTestPages) {
@@ -127,13 +110,14 @@
       String name = testPage[0];
       Charset charset = Charset.forName(testPage[1]);
       byte[] contentBytes = testPage[2].getBytes(charset);
-      //Parse parse = parse(contentBytes);
+      // Parse parse = parse(contentBytes);
       WebPage page = page(contentBytes);
       Parse parse = parse(page);
       String text = parse.getText();
       String title = parse.getTitle();
-      //String keywords = parse.getMeta("keywords");
-      String keywords = Bytes.toString(page.getMetadata().get(new Utf8("keywords")));
+      // String keywords = parse.getMeta("keywords");
+      String keywords = Bytes.toString(page.getMetadata().get(
+          new Utf8("keywords")));
       LOG.info(name);
       LOG.info("title:\t" + title);
       LOG.info("keywords:\t" + keywords);
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(revision 1650444)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(working copy)
@@ -34,92 +34,68 @@
 public class TestRobotsMetaProcessor {
 
   /*
+   * 
+   * some sample tags:
+   * 
+   * <meta name="robots" content="index,follow"> <meta name="robots"
+   * content="noindex,follow"> <meta name="robots" content="index,nofollow">
+   * <meta name="robots" content="noindex,nofollow">
+   * 
+   * <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+   */
 
-  some sample tags:
+  public static String[] tests = {
+      "<html><head><title>test page</title>"
+          + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
+          + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <meta name="robots" content="index,follow">
-  <meta name="robots" content="noindex,follow">
-  <meta name="robots" content="index,nofollow">
-  <meta name="robots" content="noindex,nofollow">
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"all\"> "
+          + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+      "<html><head><title>test page</title>"
+          + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
+          + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  */
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"none\"> " + "</head><body>"
+          + " some text" + "</body></html>",
 
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  public static String[] tests= 
-  {
-    "<html><head><title>test page</title>"
-    + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
-    + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,follow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"all\"> "
-    + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
-    + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,follow\"> "
+          + "<base href=\"http://www.nutch.org/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"none\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>" + "<meta name=\"robots\"> "
+          + "<base href=\"http://www.nutch.org/base/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,follow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,follow\"> "
-    + "<base href=\"http://www.nutch.org/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\"> "
-    + "<base href=\"http://www.nutch.org/base/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
   };
 
-  public static final boolean[][] answers= {
-    {true, true, true},     // NONE
-    {false, false, true},   // all
-    {true, true, true},     // nOnE
-    {true, true, false},    // none
-    {true, true, false},    // noindex,nofollow
-    {true, false, false},   // noindex,follow
-    {false, true, false},   // index,nofollow
-    {false, false, false},  // index,follow
-    {false, false, false},  // missing!
+  public static final boolean[][] answers = { { true, true, true }, // NONE
+      { false, false, true }, // all
+      { true, true, true }, // nOnE
+      { true, true, false }, // none
+      { true, true, false }, // noindex,nofollow
+      { true, false, false }, // noindex,follow
+      { false, true, false }, // index,nofollow
+      { false, false, false }, // index,follow
+      { false, false, false }, // missing!
   };
 
   private URL[][] currURLsAndAnswers;
@@ -126,28 +102,28 @@
 
   @Test
   public void testRobotsMetaProcessor() {
-    DOMFragmentParser parser= new DOMFragmentParser();;
+    DOMFragmentParser parser = new DOMFragmentParser();
+    ;
 
-    try { 
-      currURLsAndAnswers= new URL[][] {
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org/foo/"), 
-         new URL("http://www.nutch.org/")},
-        {new URL("http://www.nutch.org"), 
-         new URL("http://www.nutch.org/base/")}
-      };
+    try {
+      currURLsAndAnswers = new URL[][] {
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org/foo/"),
+              new URL("http://www.nutch.org/") },
+          { new URL("http://www.nutch.org"),
+              new URL("http://www.nutch.org/base/") } };
     } catch (Exception e) {
       assertTrue("couldn't make test URLs!", false);
     }
 
-    for (int i= 0; i < tests.length; i++) {
-      byte[] bytes= tests[i].getBytes();
+    for (int i = 0; i < tests.length; i++) {
+      byte[] bytes = tests[i].getBytes();
 
       DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
 
@@ -157,24 +133,22 @@
         e.printStackTrace();
       }
 
-      HTMLMetaTags robotsMeta= new HTMLMetaTags();
-      HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
-                                                  currURLsAndAnswers[i][0]);
+      HTMLMetaTags robotsMeta = new HTMLMetaTags();
+      HTMLMetaProcessor.getMetaTags(robotsMeta, node, currURLsAndAnswers[i][0]);
 
       assertTrue("got index wrong on test " + i,
-                 robotsMeta.getNoIndex() == answers[i][0]);
+          robotsMeta.getNoIndex() == answers[i][0]);
       assertTrue("got follow wrong on test " + i,
-                 robotsMeta.getNoFollow() == answers[i][1]);
+          robotsMeta.getNoFollow() == answers[i][1]);
       assertTrue("got cache wrong on test " + i,
-                 robotsMeta.getNoCache() == answers[i][2]);
-      assertTrue("got base href wrong on test " + i + " (got "
-                 + robotsMeta.getBaseHref() + ")",
-                 ( (robotsMeta.getBaseHref() == null)
-                    && (currURLsAndAnswers[i][1] == null) )
-                 || ( (robotsMeta.getBaseHref() != null)
-                      && robotsMeta.getBaseHref().equals(
-                        currURLsAndAnswers[i][1]) ) );
-      
+          robotsMeta.getNoCache() == answers[i][2]);
+      assertTrue(
+          "got base href wrong on test " + i + " (got "
+              + robotsMeta.getBaseHref() + ")",
+          ((robotsMeta.getBaseHref() == null) && (currURLsAndAnswers[i][1] == null))
+              || ((robotsMeta.getBaseHref() != null) && robotsMeta
+                  .getBaseHref().equals(currURLsAndAnswers[i][1])));
+
     }
   }
 
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 1650444)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -56,11 +56,10 @@
 import org.w3c.dom.NodeList;
 
 /**
- * This class is a heuristic link extractor for JavaScript files and
- * code snippets. The general idea of a two-pass regex matching comes from
- * Heritrix. Parts of the code come from OutlinkExtractor.java
- * by Stephan Strittmatter.
- *
+ * This class is a heuristic link extractor for JavaScript files and code
+ * snippets. The general idea of a two-pass regex matching comes from Heritrix.
+ * Parts of the code come from OutlinkExtractor.java by Stephan Strittmatter.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class JSParseFilter implements ParseFilter, Parser {
@@ -72,11 +71,17 @@
 
   /**
    * Scan the JavaScript looking for possible {@link Outlink}'s
-   * @param url URL of the {@link WebPage} to be parsed 
-   * @param page {@link WebPage} object relative to the URL
-   * @param parse {@link Parse} object holding parse status
-   * @param metatags within the {@link NutchDocument}
-   * @param doc The {@link NutchDocument} object
+   * 
+   * @param url
+   *          URL of the {@link WebPage} to be parsed
+   * @param page
+   *          {@link WebPage} object relative to the URL
+   * @param parse
+   *          {@link Parse} object holding parse status
+   * @param metatags
+   *          within the {@link NutchDocument}
+   * @param doc
+   *          The {@link NutchDocument} object
    * @return parse the actual {@link Parse} object
    */
   @Override
@@ -98,7 +103,8 @@
     return parse;
   }
 
-  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List<Outlink> outlinks) {
+  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base,
+      List<Outlink> outlinks) {
     if (n instanceof Element) {
       String name = n.getNodeName();
       if (name.equalsIgnoreCase("script")) {
@@ -105,21 +111,26 @@
         @SuppressWarnings("unused")
         String lang = null;
         Node lNode = n.getAttributes().getNamedItem("language");
-        if (lNode == null) lang = "javascript";
-        else lang = lNode.getNodeValue();
+        if (lNode == null)
+          lang = "javascript";
+        else
+          lang = lNode.getNodeValue();
         StringBuffer script = new StringBuffer();
         NodeList nn = n.getChildNodes();
         if (nn.getLength() > 0) {
           for (int i = 0; i < nn.getLength(); i++) {
-            if (i > 0) script.append('\n');
+            if (i > 0)
+              script.append('\n');
             script.append(nn.item(i).getNodeValue());
           }
           // This logging makes the output very messy.
-          //if (LOG.isInfoEnabled()) {
-          //  LOG.info("script: language=" + lang + ", text: " + script.toString());
-          //}
+          // if (LOG.isInfoEnabled()) {
+          // LOG.info("script: language=" + lang + ", text: " +
+          // script.toString());
+          // }
           Outlink[] links = getJSLinks(script.toString(), "", base);
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
           // no other children of interest here, go one level up.
           return;
         }
@@ -131,7 +142,8 @@
           // Window: onload,onunload
           // Form: onchange,onsubmit,onreset,onselect,onblur,onfocus
           // Keyboard: onkeydown,onkeypress,onkeyup
-          // Mouse: onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
+          // Mouse:
+          // onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
           Node anode = attrs.item(i);
           Outlink[] links = null;
           if (anode.getNodeName().startsWith("on")) {
@@ -142,7 +154,8 @@
               links = getJSLinks(val, "", base);
             }
           }
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
         }
       }
     }
@@ -154,31 +167,38 @@
 
   /**
    * Set the {@link Configuration} object
-   * @param url URL of the {@link WebPage} which is parsed
-   * @param page {@link WebPage} object relative to the URL
+   * 
+   * @param url
+   *          URL of the {@link WebPage} which is parsed
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @return parse the actual {@link Parse} object
    */
   @Override
   public Parse getParse(String url, WebPage page) {
     String type = TableUtil.toString(page.getContentType());
-    if (type != null && !type.trim().equals("") && !type.toLowerCase().startsWith("application/x-javascript"))
-      return ParseStatusUtils.getEmptyParse(ParseStatusCodes.FAILED_INVALID_FORMAT,
-          "Content not JavaScript: '" + type + "'", getConf());
+    if (type != null && !type.trim().equals("")
+        && !type.toLowerCase().startsWith("application/x-javascript"))
+      return ParseStatusUtils.getEmptyParse(
+          ParseStatusCodes.FAILED_INVALID_FORMAT, "Content not JavaScript: '"
+              + type + "'", getConf());
     String script = Bytes.toString(page.getContent());
     Outlink[] outlinks = getJSLinks(script, "", url);
-    if (outlinks == null) outlinks = new Outlink[0];
+    if (outlinks == null)
+      outlinks = new Outlink[0];
     // Title? use the first line of the script...
     String title;
     int idx = script.indexOf('\n');
     if (idx != -1) {
-      if (idx > MAX_TITLE_LEN) idx = MAX_TITLE_LEN;
+      if (idx > MAX_TITLE_LEN)
+        idx = MAX_TITLE_LEN;
       title = script.substring(0, idx);
     } else {
       idx = Math.min(MAX_TITLE_LEN, script.length());
       title = script.substring(0, idx);
     }
-    Parse parse =
-      new Parse(script, title, outlinks, ParseStatusUtils.STATUS_SUCCESS);
+    Parse parse = new Parse(script, title, outlinks,
+        ParseStatusUtils.STATUS_SUCCESS);
     return parse;
   }
 
@@ -185,11 +205,13 @@
   private static final String STRING_PATTERN = "(\\\\*(?:\"|\'))([^\\s\"\']+?)(?:\\1)";
   // A simple pattern. This allows also invalid URL characters.
   private static final String URI_PATTERN = "(^|\\s*?)/?\\S+?[/\\.]\\S+($|\\s*)";
+
   // Alternative pattern, which limits valid url characters.
-  //private static final String URI_PATTERN = "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
+  // private static final String URI_PATTERN =
+  // "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
 
   /**
-   *  This method extracts URLs from literals embedded in JavaScript.
+   * This method extracts URLs from literals embedded in JavaScript.
    */
   private Outlink[] getJSLinks(String plainText, String anchor, String base) {
 
@@ -199,8 +221,8 @@
     try {
       baseURL = new URL(base);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { 
-        LOG.error("error assigning base URL", e); 
+      if (LOG.isErrorEnabled()) {
+        LOG.error("error assigning base URL", e);
       }
     }
 
@@ -208,10 +230,10 @@
       final PatternCompiler cp = new Perl5Compiler();
       final Pattern pattern = cp.compile(STRING_PATTERN,
           Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
-          | Perl5Compiler.MULTILINE_MASK);
+              | Perl5Compiler.MULTILINE_MASK);
       final Pattern pattern1 = cp.compile(URI_PATTERN,
           Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
-          | Perl5Compiler.MULTILINE_MASK);
+              | Perl5Compiler.MULTILINE_MASK);
       final PatternMatcher matcher = new Perl5Matcher();
 
       final PatternMatcher matcher1 = new Perl5Matcher();
@@ -220,14 +242,14 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         result = matcher.getMatch();
         url = result.group(2);
         PatternMatcherInput input1 = new PatternMatcherInput(url);
         if (!matcher1.matches(input1, pattern1)) {
-          if (LOG.isTraceEnabled()) { 
-        	LOG.trace(" - invalid '" + url + "'"); 
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(" - invalid '" + url + "'");
           }
           continue;
         }
@@ -234,14 +256,14 @@
         if (url.startsWith("www.")) {
           url = "http://" + url;
         } else {
-          // See if candidate URL is parseable.  If not, pass and move on to
+          // See if candidate URL is parseable. If not, pass and move on to
           // the next match.
           try {
             url = new URL(baseURL, url).toString();
           } catch (MalformedURLException ex) {
             if (LOG.isTraceEnabled()) {
-              LOG.trace(" - failed URL parse '" + url + "' and baseURL '" +
-                  baseURL + "'", ex);
+              LOG.trace(" - failed URL parse '" + url + "' and baseURL '"
+                  + baseURL + "'", ex);
             }
             continue;
           }
@@ -255,14 +277,14 @@
     } catch (Exception ex) {
       // if it is a malformed URL we just throw it away and continue with
       // extraction.
-      if (LOG.isErrorEnabled()) { 
-        LOG.error(" - invalid or malformed URL", ex); 
+      if (LOG.isErrorEnabled()) {
+        LOG.error(" - invalid or malformed URL", ex);
       }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -273,8 +295,10 @@
   }
 
   /**
-   * Main method which can be run from command line with the plugin option.
-   * The method takes two arguments e.g. o.a.n.parse.js.JSParseFilter file.js baseURL  
+   * Main method which can be run from command line with the plugin option. The
+   * method takes two arguments e.g. o.a.n.parse.js.JSParseFilter file.js
+   * baseURL
+   * 
    * @param args
    * @throws Exception
    */
@@ -287,7 +311,8 @@
     BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
     StringBuffer sb = new StringBuffer();
     String line = null;
-    while ((line = br.readLine()) != null) sb.append(line + "\n");
+    while ((line = br.readLine()) != null)
+      sb.append(line + "\n");
     JSParseFilter parseFilter = new JSParseFilter();
     parseFilter.setConf(NutchConfiguration.create());
     Outlink[] links = parseFilter.getJSLinks(sb.toString(), "", args[1]);
@@ -311,10 +336,9 @@
   }
 
   /**
-   * Gets all the fields for a given {@link WebPage}
-   * Many datastores need to setup the mapreduce job by specifying the fields
-   * needed. All extensions that work on WebPage are able to specify what fields
-   * they need.
+   * Gets all the fields for a given {@link WebPage} Many datastores need to
+   * setup the mapreduce job by specifying the fields needed. All extensions
+   * that work on WebPage are able to specify what fields they need.
    */
   @Override
   public Collection<WebPage.Field> getFields() {
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java	(revision 1650444)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * from JavaScript files and embedded JavaScript code snippets.
  */
 package org.apache.nutch.parse.js;
+
Index: src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java	(revision 1650444)
+++ src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java	(working copy)
@@ -38,9 +38,9 @@
 import static org.junit.Assert.assertEquals;
 
 /**
- * JUnit test case for {@link JSParseFilter} which tests 
- * 1. That 5 outlinks are extracted from JavaScript snippets embedded in HTML
- * 2. That X outlinks are extracted from a pure JavaScript file (this is temporarily disabled)
+ * JUnit test case for {@link JSParseFilter} which tests 1. That 5 outlinks are
+ * extracted from JavaScript snippets embedded in HTML 2. That X outlinks are
+ * extracted from a pure JavaScript file (this is temporarily disabled)
  * 
  * @author lewismc
  */
@@ -54,10 +54,11 @@
 
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/parse-js/build.xml during plugin compilation.
-  private String[] sampleFiles = { "parse_pure_js_test.js", "parse_embedded_js_test.html" };
-	  
+  private String[] sampleFiles = { "parse_pure_js_test.js",
+      "parse_embedded_js_test.html" };
+
   private Configuration conf;
-	  
+
   @Before
   public void setUp() {
     conf = NutchConfiguration.create();
@@ -64,10 +65,11 @@
     conf.set("file.content.limit", "-1");
   }
 
-  public Outlink[] getOutlinks(String[] sampleFiles) throws ProtocolException, ParseException, IOException {
+  public Outlink[] getOutlinks(String[] sampleFiles) throws ProtocolException,
+      ParseException, IOException {
     String urlString;
     Parse parse;
-	
+
     urlString = "file:" + sampleDir + fileSeparator + sampleFiles;
     File file = new File(urlString);
     byte[] bytes = new byte[(int) file.length()];
@@ -74,7 +76,7 @@
     DataInputStream dip = new DataInputStream(new FileInputStream(file));
     dip.readFully(bytes);
     dip.close();
-    
+
     WebPage page = WebPage.newBuilder().build();
     page.setBaseUrl(new Utf8(urlString));
     page.setContent(ByteBuffer.wrap(bytes));
@@ -81,20 +83,24 @@
     MimeUtil mutil = new MimeUtil(conf);
     String mime = mutil.getMimeType(file);
     page.setContentType(new Utf8(mime));
-	
+
     parse = new ParseUtil(conf).parse(urlString, page);
     return parse.getOutlinks();
   }
-  
+
   @Test
-  public void testOutlinkExtraction() throws ProtocolException, ParseException, IOException {
+  public void testOutlinkExtraction() throws ProtocolException, ParseException,
+      IOException {
     String[] filenames = new File(sampleDir).list();
     for (int i = 0; i < filenames.length; i++) {
       if (filenames[i].endsWith(".js") == true) {
-        assertEquals("number of outlinks in .js test file should be 5", 5, getOutlinks(sampleFiles));
-        // temporarily disabled as a suitable pure JS file could not be be found.
-        //} else {
-        //assertEquals("number of outlinks in .html file should be X", 5, getOutlinks(sampleFiles));
+        assertEquals("number of outlinks in .js test file should be 5", 5,
+            getOutlinks(sampleFiles));
+        // temporarily disabled as a suitable pure JS file could not be be
+        // found.
+        // } else {
+        // assertEquals("number of outlinks in .html file should be X", 5,
+        // getOutlinks(sampleFiles));
       }
     }
   }
Index: src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
===================================================================
--- src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java	(revision 1650444)
+++ src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java	(working copy)
@@ -21,3 +21,4 @@
  * (see {@link org.apache.nutch.indexer.metadata}).
  */
 package org.apache.nutch.parse.metatags;
+
Index: src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java
===================================================================
--- src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java	(revision 1650444)
+++ src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java	(working copy)
@@ -59,7 +59,7 @@
 
   /**
    * 
-   *
+   * 
    * @param fileName
    *          This variable set test file.
    * @param useUtil
@@ -66,7 +66,8 @@
    *          If value is True method use ParseUtil
    * @return If successfully document parsed, it return metatags
    */
-  public Map<CharSequence, ByteBuffer> parseMetaTags(String fileName, boolean useUtil) {
+  public Map<CharSequence, ByteBuffer> parseMetaTags(String fileName,
+      boolean useUtil) {
     try {
       Configuration conf = NutchConfiguration.create();
       String urlString = "file:" + sampleDir + fileSeparator + fileName;
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack<Element> m_elemStack = new Stack<Element>();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  DOMBuilder(Document doc, Node node)
-  {
+  DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  DOMBuilder(Document doc)
-  {
+  DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  Node getRootNode()
-  {
+  Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  Node getCurrentNode()
-  {
+  Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  java.io.Writer getWriter()
-  {
+  java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,31 +168,34 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
@@ -210,13 +202,13 @@
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
@@ -223,15 +215,15 @@
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
@@ -238,53 +230,56 @@
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
@@ -291,20 +286,21 @@
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while in DOM
+          // they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,39 +319,45 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
-    m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek();
+    m_currentNode = m_elemStack.isEmpty() ? null : (Node) m_elemStack.peek();
   }
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
@@ -364,35 +364,42 @@
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,63 +523,69 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
@@ -572,70 +592,79 @@
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
@@ -642,11 +671,10 @@
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
@@ -653,88 +681,86 @@
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(working copy)
@@ -34,34 +34,34 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 public class DOMContentUtils {
 
   private static class LinkParams {
-	private String elName;
-	private String attrName;
-	private int childLen;
-      
-	private LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-	public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    private String elName;
+    private String attrName;
+    private int childLen;
+
+    private LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
+
   private HashMap<String, LinkParams> linkParams = new HashMap<String, LinkParams>();
-  
+
   public DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -82,39 +82,38 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuffer</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  private boolean getText(StringBuffer sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  private boolean getText(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   public void getText(StringBuffer sb, Node node) {
@@ -121,20 +120,19 @@
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuffer sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -146,7 +144,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -157,36 +155,37 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   public boolean getTitle(StringBuffer sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
@@ -193,8 +192,8 @@
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -202,28 +201,29 @@
   URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -234,10 +234,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -246,27 +245,28 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
@@ -273,23 +273,21 @@
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -297,57 +295,54 @@
 
     return false;
   }
-  
+
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, 
-                                       Node node) {
-    
+  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
-        LinkParams params = (LinkParams)linkParams.get(nodeName);
+        LinkParams params = (LinkParams) linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuffer linkText = new StringBuffer();
             getText(linkText, currentNode, true);
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
@@ -355,14 +350,15 @@
               try {
 
                 URL url = URLUtil.resolveURL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
@@ -369,4 +365,3 @@
   }
 
 }
-
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,31 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 public class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  public static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +62,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +73,43 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
             metaTags.getGeneralTags().add(name, contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,7 +122,7 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
@@ -131,7 +129,8 @@
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -142,9 +141,11 @@
               URL refreshUrl = null;
               if (metaTags.getRefresh() && idx != -1) { // set the URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry with just the
+                                 // url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
@@ -187,13 +188,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(working copy)
@@ -47,204 +47,195 @@
  */
 public class TikaConfig {
 
-    private final Map<String, Parser> parsers = new HashMap<String, Parser>();
+  private final Map<String, Parser> parsers = new HashMap<String, Parser>();
 
-    private final MimeTypes mimeTypes;
+  private final MimeTypes mimeTypes;
 
-    public TikaConfig(String file) throws TikaException, IOException,
-	    SAXException {
-	this(new File(file));
-    }
+  public TikaConfig(String file) throws TikaException, IOException,
+      SAXException {
+    this(new File(file));
+  }
 
-    public TikaConfig(File file) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(file));
-    }
+  public TikaConfig(File file) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(file));
+  }
 
-    public TikaConfig(URL url) throws TikaException, IOException, SAXException {
-	this(getBuilder().parse(url.toString()));
-    }
+  public TikaConfig(URL url) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(url.toString()));
+  }
 
-    public TikaConfig(InputStream stream) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(stream));
-    }
+  public TikaConfig(InputStream stream) throws TikaException, IOException,
+      SAXException {
+    this(getBuilder().parse(stream));
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(InputStream stream, Parser delegate)
-	    throws TikaException, IOException, SAXException {
-	this(stream);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(InputStream stream, Parser delegate) throws TikaException,
+      IOException, SAXException {
+    this(stream);
+  }
 
-    public TikaConfig(Document document) throws TikaException, IOException {
-	this(document.getDocumentElement());
-    }
+  public TikaConfig(Document document) throws TikaException, IOException {
+    this(document.getDocumentElement());
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Document document, Parser delegate) throws TikaException,
-	    IOException {
-	this(document);
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Document document, Parser delegate) throws TikaException,
+      IOException {
+    this(document);
+  }
+
+  public TikaConfig(Element element) throws TikaException, IOException {
+    Element mtr = getChild(element, "mimeTypeRepository");
+    if (mtr != null && mtr.hasAttribute("resource")) {
+      mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+    } else {
+      mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
     }
 
-    public TikaConfig(Element element) throws TikaException, IOException {
-	Element mtr = getChild(element, "mimeTypeRepository");
-	if (mtr != null && mtr.hasAttribute("resource")) {
-	    mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
-	} else {
-	    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
-	}
+    NodeList nodes = element.getElementsByTagName("parser");
+    for (int i = 0; i < nodes.getLength(); i++) {
+      Element node = (Element) nodes.item(i);
+      String name = node.getAttribute("class");
 
-	NodeList nodes = element.getElementsByTagName("parser");
-	for (int i = 0; i < nodes.getLength(); i++) {
-	    Element node = (Element) nodes.item(i);
-	    String name = node.getAttribute("class");
+      try {
+        Class<?> parserClass = Class.forName(name);
+        Object instance = parserClass.newInstance();
+        if (!(instance instanceof Parser)) {
+          throw new TikaException("Configured class is not a Tika Parser: "
+              + name);
+        }
+        Parser parser = (Parser) instance;
 
-	    try {
-		Class<?> parserClass = Class.forName(name);
-		Object instance = parserClass.newInstance();
-		if (!(instance instanceof Parser)) {
-		    throw new TikaException(
-			    "Configured class is not a Tika Parser: " + name);
-		}
-		Parser parser = (Parser) instance;
-
-		NodeList mimes = node.getElementsByTagName("mime");
-		if (mimes.getLength() > 0) {
-		    for (int j = 0; j < mimes.getLength(); j++) {
-			parsers.put(getText(mimes.item(j)).trim(), parser);
-		    }
-		} else {
-		    ParseContext context = new ParseContext();
-		    for (MediaType type : parser.getSupportedTypes(context)) {
-			parsers.put(type.toString(), parser);
-		    }
-		}
-	    } catch (ClassNotFoundException e) {
-		throw new TikaException("Configured parser class not found: "
-			+ name, e);
-	    } catch (IllegalAccessException e) {
-		throw new TikaException("Unable to access a parser class: "
-			+ name, e);
-	    } catch (InstantiationException e) {
-		throw new TikaException(
-			"Unable to instantiate a parser class: " + name, e);
-	    }
-	}
+        NodeList mimes = node.getElementsByTagName("mime");
+        if (mimes.getLength() > 0) {
+          for (int j = 0; j < mimes.getLength(); j++) {
+            parsers.put(getText(mimes.item(j)).trim(), parser);
+          }
+        } else {
+          ParseContext context = new ParseContext();
+          for (MediaType type : parser.getSupportedTypes(context)) {
+            parsers.put(type.toString(), parser);
+          }
+        }
+      } catch (ClassNotFoundException e) {
+        throw new TikaException("Configured parser class not found: " + name, e);
+      } catch (IllegalAccessException e) {
+        throw new TikaException("Unable to access a parser class: " + name, e);
+      } catch (InstantiationException e) {
+        throw new TikaException(
+            "Unable to instantiate a parser class: " + name, e);
+      }
     }
+  }
 
-    public TikaConfig() throws MimeTypeException, IOException {
-	ParseContext context = new ParseContext();
-	Iterator<Parser> iterator = ServiceRegistry.lookupProviders(
-		Parser.class, this.getClass().getClassLoader());
-	while (iterator.hasNext()) {
-	    Parser parser = iterator.next();
-	    for (MediaType type : parser.getSupportedTypes(context)) {
-		parsers.put(type.toString(), parser);
-	    }
-	}
-	mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  public TikaConfig() throws MimeTypeException, IOException {
+    ParseContext context = new ParseContext();
+    Iterator<Parser> iterator = ServiceRegistry.lookupProviders(Parser.class,
+        this.getClass().getClassLoader());
+    while (iterator.hasNext()) {
+      Parser parser = iterator.next();
+      for (MediaType type : parser.getSupportedTypes(context)) {
+        parsers.put(type.toString(), parser);
+      }
     }
+    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Element element, Parser delegate) throws TikaException,
-	    IOException {
-	this(element);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Element element, Parser delegate) throws TikaException,
+      IOException {
+    this(element);
+  }
 
-    private String getText(Node node) {
-	if (node.getNodeType() == Node.TEXT_NODE) {
-	    return node.getNodeValue();
-	} else if (node.getNodeType() == Node.ELEMENT_NODE) {
-	    StringBuilder builder = new StringBuilder();
-	    NodeList list = node.getChildNodes();
-	    for (int i = 0; i < list.getLength(); i++) {
-		builder.append(getText(list.item(i)));
-	    }
-	    return builder.toString();
-	} else {
-	    return "";
-	}
+  private String getText(Node node) {
+    if (node.getNodeType() == Node.TEXT_NODE) {
+      return node.getNodeValue();
+    } else if (node.getNodeType() == Node.ELEMENT_NODE) {
+      StringBuilder builder = new StringBuilder();
+      NodeList list = node.getChildNodes();
+      for (int i = 0; i < list.getLength(); i++) {
+        builder.append(getText(list.item(i)));
+      }
+      return builder.toString();
+    } else {
+      return "";
     }
+  }
 
-    /**
-     * Returns the parser instance configured for the given MIME type. Returns
-     * <code>null</code> if the given MIME type is unknown.
-     * 
-     * @param mimeType
-     *            MIME type
-     * @return configured Parser instance, or <code>null</code>
-     */
-    public Parser getParser(String mimeType) {
-	return parsers.get(mimeType);
-    }
+  /**
+   * Returns the parser instance configured for the given MIME type. Returns
+   * <code>null</code> if the given MIME type is unknown.
+   * 
+   * @param mimeType
+   *          MIME type
+   * @return configured Parser instance, or <code>null</code>
+   */
+  public Parser getParser(String mimeType) {
+    return parsers.get(mimeType);
+  }
 
-    public Map<String, Parser> getParsers() {
-	return parsers;
-    }
+  public Map<String, Parser> getParsers() {
+    return parsers;
+  }
 
-    public MimeTypes getMimeRepository() {
-	return mimeTypes;
-    }
+  public MimeTypes getMimeRepository() {
+    return mimeTypes;
+  }
 
-    /**
-     * Provides a default configuration (TikaConfig). Currently creates a new
-     * instance each time it's called; we may be able to have it return a shared
-     * instance once it is completely immutable.
-     * 
-     * @return default configuration
-     */
-    public static TikaConfig getDefaultConfig() {
-	try {
-	    return new TikaConfig();
-	} catch (IOException e) {
-	    throw new RuntimeException("Unable to read default configuration",
-		    e);
-	} catch (TikaException e) {
-	    throw new RuntimeException(
-		    "Unable to access default configuration", e);
-	}
+  /**
+   * Provides a default configuration (TikaConfig). Currently creates a new
+   * instance each time it's called; we may be able to have it return a shared
+   * instance once it is completely immutable.
+   * 
+   * @return default configuration
+   */
+  public static TikaConfig getDefaultConfig() {
+    try {
+      return new TikaConfig();
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to read default configuration", e);
+    } catch (TikaException e) {
+      throw new RuntimeException("Unable to access default configuration", e);
     }
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public static TikaConfig getDefaultConfig(Parser delegate)
-	    throws TikaException {
-	return getDefaultConfig();
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public static TikaConfig getDefaultConfig(Parser delegate)
+      throws TikaException {
+    return getDefaultConfig();
+  }
 
-    private static DocumentBuilder getBuilder() throws TikaException {
-	try {
-	    return DocumentBuilderFactory.newInstance().newDocumentBuilder();
-	} catch (ParserConfigurationException e) {
-	    throw new TikaException("XML parser not available", e);
-	}
+  private static DocumentBuilder getBuilder() throws TikaException {
+    try {
+      return DocumentBuilderFactory.newInstance().newDocumentBuilder();
+    } catch (ParserConfigurationException e) {
+      throw new TikaException("XML parser not available", e);
     }
+  }
 
-    private static Element getChild(Element element, String name) {
-	Node child = element.getFirstChild();
-	while (child != null) {
-	    if (child.getNodeType() == Node.ELEMENT_NODE
-		    && name.equals(child.getNodeName())) {
-		return (Element) child;
-	    }
-	    child = child.getNextSibling();
-	}
-	return null;
+  private static Element getChild(Element element, String name) {
+    Node child = element.getFirstChild();
+    while (child != null) {
+      if (child.getNodeType() == Node.ELEMENT_NODE
+          && name.equals(child.getNodeName())) {
+        return (Element) child;
+      }
+      child = child.getNextSibling();
     }
+    return null;
+  }
 
 }
\ No newline at end of file
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.tika;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-class XMLCharacterRecognizer
-{
+class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  static boolean isWhiteSpace(char ch)
-  {
+  static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,17 +71,16 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(StringBuffer buf)
-  {
+  static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
@@ -86,22 +87,20 @@
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(String s)
-  {
+  static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java	(revision 1650444)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * <a href="http://tika.apache.org/">Apache Tika</a>.
  */
 package org.apache.nutch.parse.tika;
+
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java	(working copy)
@@ -46,377 +46,342 @@
  */
 public class DOMContentUtilsTest {
 
-	private static final String[] testPages = {
-			// 0.
-			new String(
-					"<html><head><title> title </title><script> script </script>"
-							+ "</head><body> body <a href=\"http://www.nutch.org\">"
-							+ " anchor </a><!--comment-->" + "</body></html>"),
-			// 1.
-			new String(
-					"<html><head><title> title </title><script> script </script>"
-							+ "</head><body> body <a href=\"/\">"
-							+ " home </a><!--comment-->"
-							+ "<style> style </style>"
-							+ " <a href=\"bot.html\">" + " bots </a>"
-							+ "</body></html>"),
-			// 2.
-			new String("<html><head><title> </title>" + "</head><body> "
-					+ "<a href=\"/\"> separate this "
-					+ "<a href=\"ok\"> from this" + "</a></a>"
-					+ "</body></html>"),
-			// 3.
-			// this one relies on certain neko fixup behavior, possibly
-			// distributing the anchors into the LI's-but not the other
-			// anchors (outside of them, instead)! So you get a tree that
-			// looks like:
-			// ... <li> <a href=/> home </a> </li>
-			// <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-			// <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-			new String("<html><head><title> my title </title>"
-					+ "</head><body> body " + "<ul>"
-					+ "<li> <a href=\"/\"> home" + "<li> <a href=\"1\"> 1"
-					+ "<li> <a href=\"2\"> 2" + "</ul>" + "</body></html>"),
-			// 4.
-			// test frameset link extraction. The invalid frame in the middle
-			// will be
-			// fixed to a third standalone frame.
-			new String("<html><head><title> my title </title>"
-					+ "</head><frameset rows=\"20,*\"> "
-					+ "<frame src=\"top.html\">" + "</frame>"
-					+ "<frameset cols=\"20,*\">" + "<frame src=\"left.html\">"
-					+ "</frame>" + "<frame src=\"invalid.html\"/>" + "</frame>"
-					+ "<frame src=\"right.html\">" + "</frame>" + "</frameset>"
-					+ "</frameset>" + "</body></html>"),
-			// 5.
-			// test <area> and <iframe> link extraction + url normalization
-			new String(
-					"<html><head><title> my title </title>"
-							+ "</head><body>"
-							+ "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-							+ "<map name=\"green\">"
-							+ "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-							+ "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-							+ "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-							+ "</map>"
-							+ "<a name=\"bottom\"/><h1> the bottom </h1> "
-							+ "<iframe src=\"../docs/index.html\"/>"
-							+ "</body></html>"),
-			// 6.
-			// test whitespace processing for plain text extraction
-			new String(
-					"<html><head>\n <title> my\t\n  title\r\n </title>\n"
-							+ " </head>\n"
-							+ " <body>\n"
-							+ "    <h1> Whitespace\ttest  </h1> \n"
-							+ "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-							+ "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-							+ "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-							+ "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-							+ "<table>"
-							+ "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-							+ "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-							+ "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-							+ "</table>put some text here<Br>and there."
-							+ "<h2>End\tthis\rmadness\n!</h2>\r\n"
-							+ "         .        .        .         ."
-							+ "</body>  </html>"),
-			// 7.
-			// test that <a rel=nofollow> links are not returned
-			new String(
-					"<html><head></head><body>"
-							+ "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-							+ "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-							+ "</body></html>"),
-			// 8.
-			// test that POST form actions are skipped
-			new String(
-					"<html><head></head><body>"
-							+ "<form method='POST' action='/search.jsp'><input type=text>"
-							+ "<input type=submit><p>test1</p></form>"
-							+ "<form method='GET' action='/dummy.jsp'><input type=text>"
-							+ "<input type=submit><p>test2</p></form></body></html>"),
-			// 9.
-			// test that all form actions are skipped
-			new String(
-					"<html><head></head><body>"
-							+ "<form method='POST' action='/search.jsp'><input type=text>"
-							+ "<input type=submit><p>test1</p></form>"
-							+ "<form method='GET' action='/dummy.jsp'><input type=text>"
-							+ "<input type=submit><p>test2</p></form></body></html>"),
-			// 10.
-			new String("<html><head><title> title </title>" + "</head><body>"
-					+ "<a href=\";x\">anchor1</a>"
-					+ "<a href=\"g;x\">anchor2</a>"
-					+ "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
-			// 11.
-			new String("<html><head><title> title </title>" + "</head><body>"
-					+ "<a href=\"g\">anchor1</a>"
-					+ "<a href=\"g?y#s\">anchor2</a>"
-					+ "<a href=\"?y=1\">anchor3</a>"
-					+ "<a href=\"?y=1#s\">anchor4</a>"
-					+ "<a href=\"?y=1;somethingelse\">anchor5</a>"
-					+ "</body></html>"), };
+  private static final String[] testPages = {
+      // 0.
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"http://www.nutch.org\">"
+          + " anchor </a><!--comment-->" + "</body></html>"),
+      // 1.
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"/\">" + " home </a><!--comment-->"
+          + "<style> style </style>" + " <a href=\"bot.html\">" + " bots </a>"
+          + "</body></html>"),
+      // 2.
+      new String("<html><head><title> </title>" + "</head><body> "
+          + "<a href=\"/\"> separate this " + "<a href=\"ok\"> from this"
+          + "</a></a>" + "</body></html>"),
+      // 3.
+      // this one relies on certain neko fixup behavior, possibly
+      // distributing the anchors into the LI's-but not the other
+      // anchors (outside of them, instead)! So you get a tree that
+      // looks like:
+      // ... <li> <a href=/> home </a> </li>
+      // <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+      // <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+      new String("<html><head><title> my title </title>"
+          + "</head><body> body " + "<ul>" + "<li> <a href=\"/\"> home"
+          + "<li> <a href=\"1\"> 1" + "<li> <a href=\"2\"> 2" + "</ul>"
+          + "</body></html>"),
+      // 4.
+      // test frameset link extraction. The invalid frame in the middle
+      // will be
+      // fixed to a third standalone frame.
+      new String("<html><head><title> my title </title>"
+          + "</head><frameset rows=\"20,*\"> " + "<frame src=\"top.html\">"
+          + "</frame>" + "<frameset cols=\"20,*\">"
+          + "<frame src=\"left.html\">" + "</frame>"
+          + "<frame src=\"invalid.html\"/>" + "</frame>"
+          + "<frame src=\"right.html\">" + "</frame>" + "</frameset>"
+          + "</frameset>" + "</body></html>"),
+      // 5.
+      // test <area> and <iframe> link extraction + url normalization
+      new String(
+          "<html><head><title> my title </title>"
+              + "</head><body>"
+              + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+              + "<map name=\"green\">"
+              + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+              + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+              + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+              + "</map>" + "<a name=\"bottom\"/><h1> the bottom </h1> "
+              + "<iframe src=\"../docs/index.html\"/>" + "</body></html>"),
+      // 6.
+      // test whitespace processing for plain text extraction
+      new String(
+          "<html><head>\n <title> my\t\n  title\r\n </title>\n"
+              + " </head>\n"
+              + " <body>\n"
+              + "    <h1> Whitespace\ttest  </h1> \n"
+              + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+              + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+              + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+              + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+              + "<table>"
+              + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+              + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+              + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+              + "</table>put some text here<Br>and there."
+              + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+              + "         .        .        .         ." + "</body>  </html>"),
+      // 7.
+      // test that <a rel=nofollow> links are not returned
+      new String("<html><head></head><body>"
+          + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+          + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+          + "</body></html>"),
+      // 8.
+      // test that POST form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // 9.
+      // test that all form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // 10.
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\";x\">anchor1</a>" + "<a href=\"g;x\">anchor2</a>"
+          + "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
+      // 11.
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\">anchor1</a>" + "<a href=\"g?y#s\">anchor2</a>"
+          + "<a href=\"?y=1\">anchor3</a>" + "<a href=\"?y=1#s\">anchor4</a>"
+          + "<a href=\"?y=1;somethingelse\">anchor5</a>" + "</body></html>"), };
 
-	private static int SKIP = 9;
+  private static int SKIP = 9;
 
-	private static String[] testBaseHrefs = { "http://www.nutch.org",
-			"http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
-			"http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
-			"http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
-			"http://www.nutch.org//", "http://www.nutch.org/",
-			"http://www.nutch.org/", "http://www.nutch.org/",
-			"http://www.nutch.org/;something" };
+  private static String[] testBaseHrefs = { "http://www.nutch.org",
+      "http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
+      "http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
+      "http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
+      "http://www.nutch.org//", "http://www.nutch.org/",
+      "http://www.nutch.org/", "http://www.nutch.org/",
+      "http://www.nutch.org/;something" };
 
-	private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
+  private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
 
-	private static URL[] testBaseHrefURLs = new URL[testPages.length];
+  private static URL[] testBaseHrefURLs = new URL[testPages.length];
 
-	private static final String[] answerText = {
-			"body anchor",
-			"body home bots",
-			"separate this from this",
-			"body home 1 2",
-			"",
-			"the bottom",
-			"Whitespace test whitespace test "
-					+ "This is a whitespace test . Newlines should appear as space too. "
-					+ "Tabs are spaces too. This is a break -> and the line after break . "
-					+ "one two three space here space there no space "
-					+ "one two two three three four put some text here and there. "
-					+ "End this madness ! . . . .", "ignore ignore",
-			"test1 test2", "test1 test2", "anchor1 anchor2 anchor3",
-			"anchor1 anchor2 anchor3 anchor4 anchor5" };
+  private static final String[] answerText = {
+      "body anchor",
+      "body home bots",
+      "separate this from this",
+      "body home 1 2",
+      "",
+      "the bottom",
+      "Whitespace test whitespace test "
+          + "This is a whitespace test . Newlines should appear as space too. "
+          + "Tabs are spaces too. This is a break -> and the line after break . "
+          + "one two three space here space there no space "
+          + "one two two three three four put some text here and there. "
+          + "End this madness ! . . . .", "ignore ignore", "test1 test2",
+      "test1 test2", "anchor1 anchor2 anchor3",
+      "anchor1 anchor2 anchor3 anchor4 anchor5" };
 
-	private static final String[] answerTitle = { "title", "title", "",
-			"my title", "my title", "my title", "my title", "", "", "",
-			"title", "title" };
+  private static final String[] answerTitle = { "title", "title", "",
+      "my title", "my title", "my title", "my title", "", "", "", "title",
+      "title" };
 
-	// note: should be in page-order
-	private static Outlink[][] answerOutlinks;
+  // note: should be in page-order
+  private static Outlink[][] answerOutlinks;
 
-	private static Configuration conf;
-	private static DOMContentUtils utils = null;
-	
-	public static final Logger Logger = LoggerFactory.getLogger(DOMContentUtilsTest.class);
+  private static Configuration conf;
+  private static DOMContentUtils utils = null;
 
-	public DOMContentUtilsTest(String name) {
-	}
+  public static final Logger Logger = LoggerFactory
+      .getLogger(DOMContentUtilsTest.class);
 
-	private static void setup() throws Exception {
-		conf = NutchConfiguration.create();
-		conf.setBoolean("parser.html.form.use_action", true);
-		utils = new DOMContentUtils(conf);
-		TikaParser tikaParser = new TikaParser();
-		tikaParser.setConf(conf);
-		Parser parser = tikaParser.getTikaConfig().getParser("text/html");
-		for (int i = 0; i < testPages.length; i++) {
-			Metadata tikamd = new Metadata();
+  public DOMContentUtilsTest(String name) {
+  }
 
-			HTMLDocumentImpl doc = new HTMLDocumentImpl();
-			doc.setErrorChecking(false);
-			DocumentFragment root = doc.createDocumentFragment();
-			DOMBuilder domhandler = new DOMBuilder(doc, root);
-			ParseContext context = new ParseContext();
-			// to add once available in Tika
-			//context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
-			try {
-				parser.parse(new ByteArrayInputStream(testPages[i].getBytes()),
-						domhandler, tikamd, context);
-				testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
-			} catch (Exception e) {
-				e.printStackTrace();
-				fail("caught exception: " + e);
-			}
-			testDOMs[i] = root;
-			LSSerializerImpl lsi = new LSSerializerImpl();
-			System.out.println("input " + i + ": '" + testPages[i] + "'");
-			System.out.println("output " + i + ": '" + lsi.writeToString(root)
-					+ "'");
+  private static void setup() throws Exception {
+    conf = NutchConfiguration.create();
+    conf.setBoolean("parser.html.form.use_action", true);
+    utils = new DOMContentUtils(conf);
+    TikaParser tikaParser = new TikaParser();
+    tikaParser.setConf(conf);
+    Parser parser = tikaParser.getTikaConfig().getParser("text/html");
+    for (int i = 0; i < testPages.length; i++) {
+      Metadata tikamd = new Metadata();
 
-		}
-		answerOutlinks = new Outlink[][] {
-				// 0
-				{ new Outlink("http://www.nutch.org", "anchor"), },
-				// 1
-				{
-				  new Outlink("http://www.nutch.org/", "home"),
-				  new Outlink("http://www.nutch.org/docs/bot.html",
-								"bots"), },
-				// 2
-				{
-					new Outlink("http://www.nutch.org/", "separate this"),
-					new Outlink("http://www.nutch.org/docs/ok", "from this"), },
-				
-				// 3	
-				{   new Outlink("http://www.nutch.org/", "home"),
-					new Outlink("http://www.nutch.org/docs/1", "1"),
-					new Outlink("http://www.nutch.org/docs/2", "2"), },
-				// 4	
-				{
-					new Outlink("http://www.nutch.org/frames/top.html", ""),
-					new Outlink("http://www.nutch.org/frames/left.html", ""),
-					new Outlink("http://www.nutch.org/frames/invalid.html",""),
-					new Outlink("http://www.nutch.org/frames/right.html",""), 
-				},
-				// 5
-				{ 
-					new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-					new Outlink("http://www.nutch.org/index.html", ""),
-					new Outlink("http://www.nutch.org/maps/#bottom", ""),
-					new Outlink("http://www.nutch.org/bot.html", ""),
-					new Outlink("http://www.nutch.org/docs/index.html", "") 
-				},
-				// 6
-				{ new Outlink("http://www.nutch.org/index.html",
-						"whitespace test"), 
-				},
-				// 7
-				{},
-				// 8
-				{ new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
-				// 9
-				{},
-				// 10 
-				{ 
-				 new Outlink("http://www.nutch.org/;x", "anchor1"),
-				 new Outlink("http://www.nutch.org/g;x", "anchor2"),
-				 new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") 
-				},
-				// 11
-				{
-				 // this is tricky - see RFC3986 section 5.4.1 example 7
-				 new Outlink("http://www.nutch.org/g","anchor1"),
-				 new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
-				 new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-				 new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-				 new Outlink("http://www.nutch.org/;something?y=1;somethingelse", "anchor5") }
-				};
+      HTMLDocumentImpl doc = new HTMLDocumentImpl();
+      doc.setErrorChecking(false);
+      DocumentFragment root = doc.createDocumentFragment();
+      DOMBuilder domhandler = new DOMBuilder(doc, root);
+      ParseContext context = new ParseContext();
+      // to add once available in Tika
+      // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
+      try {
+        parser.parse(new ByteArrayInputStream(testPages[i].getBytes()),
+            domhandler, tikamd, context);
+        testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        e.printStackTrace();
+        fail("caught exception: " + e);
+      }
+      testDOMs[i] = root;
+      LSSerializerImpl lsi = new LSSerializerImpl();
+      System.out.println("input " + i + ": '" + testPages[i] + "'");
+      System.out.println("output " + i + ": '" + lsi.writeToString(root) + "'");
 
-	}
+    }
+    answerOutlinks = new Outlink[][] {
+        // 0
+        { new Outlink("http://www.nutch.org", "anchor"), },
+        // 1
+        { new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/bot.html", "bots"), },
+        // 2
+        { new Outlink("http://www.nutch.org/", "separate this"),
+            new Outlink("http://www.nutch.org/docs/ok", "from this"), },
 
-	private static boolean equalsIgnoreWhitespace(String s1, String s2) {
-		StringTokenizer st1 = new StringTokenizer(s1);
-		StringTokenizer st2 = new StringTokenizer(s2);
+        // 3
+        { new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/1", "1"),
+            new Outlink("http://www.nutch.org/docs/2", "2"), },
+        // 4
+        { new Outlink("http://www.nutch.org/frames/top.html", ""),
+            new Outlink("http://www.nutch.org/frames/left.html", ""),
+            new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+            new Outlink("http://www.nutch.org/frames/right.html", ""), },
+        // 5
+        { new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+            new Outlink("http://www.nutch.org/index.html", ""),
+            new Outlink("http://www.nutch.org/maps/#bottom", ""),
+            new Outlink("http://www.nutch.org/bot.html", ""),
+            new Outlink("http://www.nutch.org/docs/index.html", "") },
+        // 6
+        { new Outlink("http://www.nutch.org/index.html", "whitespace test"), },
+        // 7
+        {},
+        // 8
+        { new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
+        // 9
+        {},
+        // 10
+        { new Outlink("http://www.nutch.org/;x", "anchor1"),
+            new Outlink("http://www.nutch.org/g;x", "anchor2"),
+            new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") },
+        // 11
+        {
+            // this is tricky - see RFC3986 section 5.4.1 example 7
+            new Outlink("http://www.nutch.org/g", "anchor1"),
+            new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
+            new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+            new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+            new Outlink("http://www.nutch.org/;something?y=1;somethingelse",
+                "anchor5") } };
 
-		while (st1.hasMoreTokens()) {
-			if (!st2.hasMoreTokens()) {
-			 Logger.info("st1+ '" + st1.nextToken() + "'");
-				return false;
-			}
-			String st1Token = st1.nextToken();
-			String st2Token = st2.nextToken();
-			if (!st1Token.equals(st2Token)) {
-			 Logger.info("st1:'" + st1Token + "' != st2:'" + st2Token + "'");
-				return false;
-			}
-		}
-		if (st2.hasMoreTokens()) {
-			System.err.println("st2+ '" + st2.nextToken() + "'");
-			return false;
-		}
-		return true;
-	}
+  }
 
-	@Test
-	public void testGetText() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			StringBuffer sb = new StringBuffer();
-			utils.getText(sb, testDOMs[i]);
-			String text = sb.toString();
-			assertTrue(
-					"example " + i + " : expecting text: " + answerText[i]
-							+ System.getProperty("line.separator")
-							+ System.getProperty("line.separator")
-							+ "got text: " + text,
-					equalsIgnoreWhitespace(answerText[i], text));
-		}
-	}
+  private static boolean equalsIgnoreWhitespace(String s1, String s2) {
+    StringTokenizer st1 = new StringTokenizer(s1);
+    StringTokenizer st2 = new StringTokenizer(s2);
 
-	// won't work with Tika - the title is stored in the metadata but
-	// not put in the XHTML representation
-	@Test
-	public void testGetTitle() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			StringBuffer sb = new StringBuffer();
-			utils.getTitle(sb, testDOMs[i]);
-			String title = sb.toString();
-			assertTrue(
-					"example " + i + " : expecting title: " + answerTitle[i]
-							+ System.getProperty("line.separator")
-							+ System.getProperty("line.separator")
-							+ "got title: " + title,
-					equalsIgnoreWhitespace(answerTitle[i], title));
-		}
-	}
+    while (st1.hasMoreTokens()) {
+      if (!st2.hasMoreTokens()) {
+        Logger.info("st1+ '" + st1.nextToken() + "'");
+        return false;
+      }
+      String st1Token = st1.nextToken();
+      String st2Token = st2.nextToken();
+      if (!st1Token.equals(st2Token)) {
+        Logger.info("st1:'" + st1Token + "' != st2:'" + st2Token + "'");
+        return false;
+      }
+    }
+    if (st2.hasMoreTokens()) {
+      System.err.println("st2+ '" + st2.nextToken() + "'");
+      return false;
+    }
+    return true;
+  }
 
-	@Test
-	public void testGetOutlinks() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
-			if (i == SKIP) {
-				conf.setBoolean("parser.html.form.use_action", false);
-				utils.setConf(conf);
-			} else {
-				conf.setBoolean("parser.html.form.use_action", true);
-				utils.setConf(conf);
-			}
-			utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
-			Outlink[] outlinkArr = new Outlink[outlinks.size()];
-			outlinkArr = outlinks.toArray(outlinkArr);
-			compareOutlinks(i, answerOutlinks[i], outlinkArr);
-		}
-	}
+  @Test
+  public void testGetText() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
+      utils.getText(sb, testDOMs[i]);
+      String text = sb.toString();
+      assertTrue(
+          "example " + i + " : expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerText[i], text));
+    }
+  }
 
-	private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
-		for (int i = 0; i < o.length; i++) {
-			sb.append(o[i].toString());
-			sb.append(System.getProperty("line.separator"));
-		}
-	}
+  // won't work with Tika - the title is stored in the metadata but
+  // not put in the XHTML representation
+  @Test
+  public void testGetTitle() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
+      utils.getTitle(sb, testDOMs[i]);
+      String title = sb.toString();
+      assertTrue(
+          "example " + i + " : expecting title: " + answerTitle[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got title: " + title,
+          equalsIgnoreWhitespace(answerTitle[i], title));
+    }
+  }
 
-	private static final String outlinksString(Outlink[] o) {
-		StringBuffer sb = new StringBuffer();
-		appendOutlinks(sb, o);
-		return sb.toString();
-	}
+  @Test
+  public void testGetOutlinks() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
+      if (i == SKIP) {
+        conf.setBoolean("parser.html.form.use_action", false);
+        utils.setConf(conf);
+      } else {
+        conf.setBoolean("parser.html.form.use_action", true);
+        utils.setConf(conf);
+      }
+      utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
+      Outlink[] outlinkArr = new Outlink[outlinks.size()];
+      outlinkArr = outlinks.toArray(outlinkArr);
+      compareOutlinks(i, answerOutlinks[i], outlinkArr);
+    }
+  }
 
-	private static final void compareOutlinks(int test, Outlink[] o1,
-			Outlink[] o2) {
-		if (o1.length != o2.length) {
-			assertTrue(
-					"test " + test
-							+ ", got wrong number of outlinks (expecting "
-							+ o1.length + ", got " + o2.length + ")"
-							+ System.getProperty("line.separator") + "answer: "
-							+ System.getProperty("line.separator")
-							+ outlinksString(o1)
-							+ System.getProperty("line.separator") + "got: "
-							+ System.getProperty("line.separator")
-							+ outlinksString(o2)
-							+ System.getProperty("line.separator"), false);
-		}
+  private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
+    for (int i = 0; i < o.length; i++) {
+      sb.append(o[i].toString());
+      sb.append(System.getProperty("line.separator"));
+    }
+  }
 
-		for (int i = 0; i < o1.length; i++) {
-			if (!o1[i].equals(o2[i])) {
-				assertTrue(
-						"test " + test + ", got wrong outlinks at position "
-								+ i + System.getProperty("line.separator")
-								+ "answer: "
-								+ System.getProperty("line.separator")
-								+ o1[i].toString()
-								+ System.getProperty("line.separator")
-								+ "got: "
-								+ System.getProperty("line.separator")
-								+ o2[i].toString(), false);
+  private static final String outlinksString(Outlink[] o) {
+    StringBuffer sb = new StringBuffer();
+    appendOutlinks(sb, o);
+    return sb.toString();
+  }
 
-			}
-		}
-	}
+  private static final void compareOutlinks(int test, Outlink[] o1, Outlink[] o2) {
+    if (o1.length != o2.length) {
+      assertTrue(
+          "test " + test + ", got wrong number of outlinks (expecting "
+              + o1.length + ", got " + o2.length + ")"
+              + System.getProperty("line.separator") + "answer: "
+              + System.getProperty("line.separator") + outlinksString(o1)
+              + System.getProperty("line.separator") + "got: "
+              + System.getProperty("line.separator") + outlinksString(o2)
+              + System.getProperty("line.separator"), false);
+    }
+
+    for (int i = 0; i < o1.length; i++) {
+      if (!o1[i].equals(o2[i])) {
+        assertTrue(
+            "test " + test + ", got wrong outlinks at position " + i
+                + System.getProperty("line.separator") + "answer: "
+                + System.getProperty("line.separator") + o1[i].toString()
+                + System.getProperty("line.separator") + "got: "
+                + System.getProperty("line.separator") + o2[i].toString(),
+            false);
+
+      }
+    }
+  }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java	(working copy)
@@ -39,9 +39,9 @@
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
+  private String sampleDir = System.getProperty("test.data", ".");
   // Make sure sample files are copied to "test.data" as specified in
-  private String[] sampleFiles = {"nutch_logo_tm.gif"};
+  private String[] sampleFiles = { "nutch_logo_tm.gif" };
 
   @Test
   public void testIt() throws ProtocolException, ParseException, IOException {
@@ -50,32 +50,32 @@
     Parse parse;
     Configuration conf = NutchConfiguration.create();
     MimeUtil mimeutil = new MimeUtil(conf);
-    
+
     for (int i = 0; i < sampleFiles.length; i++) {
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
-      
+
       File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
       byte[] bytes = new byte[(int) file.length()];
       DataInputStream in = new DataInputStream(new FileInputStream(file));
       in.readFully(bytes);
       in.close();
-      
+
       WebPage page = WebPage.newBuilder().build();
       page.setBaseUrl(new Utf8(urlString));
       page.setContent(ByteBuffer.wrap(bytes));
       String mtype = mimeutil.getMimeType(file);
       page.setContentType(new Utf8(mtype));
-      
+
       parse = new ParseUtil(conf).parse(urlString, page);
-      
-      //assert width
+
+      // assert width
       ByteBuffer bbufW = page.getMetadata().get(new Utf8("width"));
       byte[] byteArrayW = new byte[bbufW.remaining()];
       bbufW.get(byteArrayW);
       String width = new String(byteArrayW);
       assertEquals("121", width);
-      
-      //assert height
+
+      // assert height
       ByteBuffer bbufH = page.getMetadata().get(new Utf8("height"));
       byte[] byteArrayH = new byte[bbufH.remaining()];
       bbufH.get(byteArrayH);
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java	(working copy)
@@ -44,63 +44,63 @@
  */
 public class TestMSWordParser {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-msword/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-msword/sample/README.txt for what they are.
-    private String[] sampleFiles = { "word97.doc" };
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-msword/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-msword/sample/README.txt for what they are.
+  private String[] sampleFiles = { "word97.doc" };
 
-    private String expectedText = "This is a sample doc file prepared for nutch.";
+  private String expectedText = "This is a sample doc file prepared for nutch.";
 
-    private Configuration conf;
+  private Configuration conf;
 
-    @Before
-    public void setUp() {
-	conf = NutchConfiguration.create();
-	conf.set("file.content.limit", "-1");
-    }
+  @Before
+  public void setUp() {
+    conf = NutchConfiguration.create();
+    conf.set("file.content.limit", "-1");
+  }
 
-    public String getTextContent(String fileName) throws ProtocolException,
-	    ParseException, IOException {
-	String urlString = sampleDir + fileSeparator + fileName;
+  public String getTextContent(String fileName) throws ProtocolException,
+      ParseException, IOException {
+    String urlString = sampleDir + fileSeparator + fileName;
 
-	File file = new File(urlString);
-	byte[] bytes = new byte[(int) file.length()];
-	DataInputStream in = new DataInputStream(new FileInputStream(file));
-	in.readFully(bytes);
-	in.close();
-	Parse parse;
-	WebPage page = WebPage.newBuilder().build();
-	page.setBaseUrl(new Utf8("file:"+urlString));
-	page.setContent(ByteBuffer.wrap(bytes));
-	// set the content type?
-	MimeUtil mimeutil = new MimeUtil(conf);
-	String mtype = mimeutil.getMimeType(file);
-	page.setContentType(new Utf8(mtype));
-		
-	parse = new ParseUtil(conf).parse("file:"+urlString, page);
-	return parse.getText();
-    }
+    File file = new File(urlString);
+    byte[] bytes = new byte[(int) file.length()];
+    DataInputStream in = new DataInputStream(new FileInputStream(file));
+    in.readFully(bytes);
+    in.close();
+    Parse parse;
+    WebPage page = WebPage.newBuilder().build();
+    page.setBaseUrl(new Utf8("file:" + urlString));
+    page.setContent(ByteBuffer.wrap(bytes));
+    // set the content type?
+    MimeUtil mimeutil = new MimeUtil(conf);
+    String mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype));
 
-    @Test
-    public void testIt() throws ProtocolException, ParseException, IOException {
-	for (int i = 0; i < sampleFiles.length; i++) {
-	    String found = getTextContent(sampleFiles[i]);
-	    assertTrue("text found : '" + found + "'", found
-		    .startsWith(expectedText));
-	}
+    parse = new ParseUtil(conf).parse("file:" + urlString, page);
+    return parse.getText();
+  }
+
+  @Test
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    for (int i = 0; i < sampleFiles.length; i++) {
+      String found = getTextContent(sampleFiles[i]);
+      assertTrue("text found : '" + found + "'", found.startsWith(expectedText));
     }
+  }
 
-    @Test
-    public void testOpeningDocs() throws ProtocolException, ParseException, IOException {
-	String[] filenames = new File(sampleDir).list();
-	for (int i = 0; i < filenames.length; i++) {
-	    if (filenames[i].endsWith(".doc") == false)
-		continue;
-	    assertTrue("cann't read content of " + filenames[i],
-		    getTextContent(filenames[i]).length() > 0);
-	}
+  @Test
+  public void testOpeningDocs() throws ProtocolException, ParseException,
+      IOException {
+    String[] filenames = new File(sampleDir).list();
+    for (int i = 0; i < filenames.length; i++) {
+      if (filenames[i].endsWith(".doc") == false)
+        continue;
+      assertTrue("cann't read content of " + filenames[i],
+          getTextContent(filenames[i]).length() > 0);
     }
+  }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java	(working copy)
@@ -57,11 +57,11 @@
     Parse parse;
     Configuration conf = NutchConfiguration.create();
     MimeUtil mimeutil = new MimeUtil(conf);
-	
+
     try {
       // read the test string
       FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
-        + sampleText);
+          + sampleText);
       StringBuffer sb = new StringBuffer();
       int len = 0;
       InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
@@ -83,7 +83,7 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       if (sampleFiles[i].startsWith("ootest") == false)
-      continue;
+        continue;
 
       File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
       byte[] bytes = new byte[(int) file.length()];
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java	(working copy)
@@ -43,43 +43,43 @@
  */
 public class TestPdfParser {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-pdf/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-pdf/sample/README.txt for what they are.
-    private String[] sampleFiles = { "pdftest.pdf", "encrypted.pdf" };
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-pdf/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-pdf/sample/README.txt for what they are.
+  private String[] sampleFiles = { "pdftest.pdf", "encrypted.pdf" };
 
-    private String expectedText = "A VERY SMALL PDF FILE";
+  private String expectedText = "A VERY SMALL PDF FILE";
 
-    @Test
-    public void testIt() throws ProtocolException, ParseException, IOException {
-	String urlString;
-	Parse parse;
-	Configuration conf = NutchConfiguration.create();
-	MimeUtil mimeutil = new MimeUtil(conf);
+  @Test
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    String urlString;
+    Parse parse;
+    Configuration conf = NutchConfiguration.create();
+    MimeUtil mimeutil = new MimeUtil(conf);
 
-	for (int i = 0; i < sampleFiles.length; i++) {
-	    urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
+    for (int i = 0; i < sampleFiles.length; i++) {
+      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-	    File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
-	    byte[] bytes = new byte[(int) file.length()];
-	    DataInputStream in = new DataInputStream(new FileInputStream(file));
-	    in.readFully(bytes);
-	    in.close();
+      File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
+      byte[] bytes = new byte[(int) file.length()];
+      DataInputStream in = new DataInputStream(new FileInputStream(file));
+      in.readFully(bytes);
+      in.close();
 
-	    WebPage page = WebPage.newBuilder().build();
-	    page.setBaseUrl(new Utf8(urlString));
-	    page.setContent(ByteBuffer.wrap(bytes));
-	    String mtype = mimeutil.getMimeType(file);
-	    page.setContentType(new Utf8(mtype));
+      WebPage page = WebPage.newBuilder().build();
+      page.setBaseUrl(new Utf8(urlString));
+      page.setContent(ByteBuffer.wrap(bytes));
+      String mtype = mimeutil.getMimeType(file);
+      page.setContentType(new Utf8(mtype));
 
-	    parse = new ParseUtil(conf).parse(urlString, page);
+      parse = new ParseUtil(conf).parse(urlString, page);
 
-	    int index = parse.getText().indexOf(expectedText);
-	    assertTrue(index > 0);
-	}
+      int index = parse.getText().indexOf(expectedText);
+      assertTrue(index > 0);
     }
+  }
 
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java	(working copy)
@@ -68,7 +68,7 @@
    * </ul>
    */
   @Test
-  public void testIt()throws ProtocolException, ParseException, IOException {
+  public void testIt() throws ProtocolException, ParseException, IOException {
     String urlString;
     Parse parse;
 
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java	(revision 1650444)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java	(working copy)
@@ -44,50 +44,49 @@
  */
 public class TestRTFParser {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-rtf/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
-    private String rtfFile = "test.rtf";
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-rtf/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
+  private String rtfFile = "test.rtf";
 
-    @Test
-    public void testIt() throws ProtocolException, ParseException, IOException {
+  @Test
+  public void testIt() throws ProtocolException, ParseException, IOException {
 
-	String urlString;
-	Parse parse;
-	Configuration conf = NutchConfiguration.create();
-	MimeUtil mimeutil = new MimeUtil(conf);
+    String urlString;
+    Parse parse;
+    Configuration conf = NutchConfiguration.create();
+    MimeUtil mimeutil = new MimeUtil(conf);
 
-	urlString = "file:" + sampleDir + fileSeparator + rtfFile;
+    urlString = "file:" + sampleDir + fileSeparator + rtfFile;
 
-	File file = new File(sampleDir + fileSeparator + rtfFile);
-	byte[] bytes = new byte[(int) file.length()];
-	DataInputStream in = new DataInputStream(new FileInputStream(file));
-	in.readFully(bytes);
-	in.close();
+    File file = new File(sampleDir + fileSeparator + rtfFile);
+    byte[] bytes = new byte[(int) file.length()];
+    DataInputStream in = new DataInputStream(new FileInputStream(file));
+    in.readFully(bytes);
+    in.close();
 
-	WebPage page = WebPage.newBuilder().build();
-	page.setBaseUrl(new Utf8(urlString));
-	page.setContent(ByteBuffer.wrap(bytes));
-	String mtype = mimeutil.getMimeType(file);
-	page.setContentType(new Utf8(mtype));
+    WebPage page = WebPage.newBuilder().build();
+    page.setBaseUrl(new Utf8(urlString));
+    page.setContent(ByteBuffer.wrap(bytes));
+    String mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype));
 
-	parse = new ParseUtil(conf).parse(urlString, page);
+    parse = new ParseUtil(conf).parse(urlString, page);
 
-	String title = parse.getTitle();
-	String text = parse.getText();
-	assertEquals("test rft document", title);
-	//assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
+    String title = parse.getTitle();
+    String text = parse.getText();
+    assertEquals("test rft document", title);
+    // assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
 
-	
-	// HOW DO WE GET THE PARSE METADATA?
-	// Metadata meta = parse();
+    // HOW DO WE GET THE PARSE METADATA?
+    // Metadata meta = parse();
 
-	// METADATA extraction is not yet supported in Tika
-	// 
-	// assertEquals("tests", meta.get(DublinCore.SUBJECT));
-    }
+    // METADATA extraction is not yet supported in Tika
+    //
+    // assertEquals("tests", meta.get(DublinCore.SUBJECT));
+  }
 
 }
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(revision 1650444)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(working copy)
@@ -32,10 +32,11 @@
 import java.util.HashSet;
 
 /**
- * This class is a protocol plugin used for file: scheme.
- * It creates {@link FileResponse} object and gets the content of the url from it.
- * Configurable parameters are {@code file.content.limit} and {@code file.crawl.parent} 
- * in nutch-default.xml defined under "file properties" section.
+ * This class is a protocol plugin used for file: scheme. It creates
+ * {@link FileResponse} object and gets the content of the url from it.
+ * Configurable parameters are {@code file.content.limit} and
+ * {@code file.crawl.parent} in nutch-default.xml defined under
+ * "file properties" section.
  */
 public class File implements Protocol {
 
@@ -51,7 +52,7 @@
   static final int MAX_REDIRECTS = 5;
 
   int maxContentLength;
-  
+
   boolean crawlParents;
 
   /**
@@ -63,7 +64,8 @@
   private Configuration conf;
 
   // constructor
-  public File() { }
+  public File() {
+  }
 
   /**
    * Set the {@link Configuration} object
@@ -75,7 +77,7 @@
     this.symlinksAsRedirects = conf.getBoolean(
         "file.crawl.redirect_noncanonical", true);
   }
-  
+
   /**
    * Get the {@link Configuration} object
    */
@@ -82,22 +84,25 @@
   public Configuration getConf() {
     return this.conf;
   }
-    
-  /** 
-   * Set the point at which content is truncated. 
+
+  /**
+   * Set the point at which content is truncated.
    */
   public void setMaxContentLength(int maxContentLength) {
     this.maxContentLength = maxContentLength;
   }
-  
-  /** 
-   * Creates a {@link FileResponse} object corresponding to the url and 
-   * return a {@link ProtocolOutput} object as per the content received
+
+  /**
+   * Creates a {@link FileResponse} object corresponding to the url and return a
+   * {@link ProtocolOutput} object as per the content received
    * 
-   * @param url Text containing the url
-   * @param datum The CrawlDatum object corresponding to the url
+   * @param url
+   *          Text containing the url
+   * @param datum
+   *          The CrawlDatum object corresponding to the url
    * 
-   * @return {@link ProtocolOutput} object for the content of the file indicated by url
+   * @return {@link ProtocolOutput} object for the content of the file indicated
+   *         by url
    */
   public ProtocolOutput getProtocolOutput(String url, WebPage page) {
     String urlString = url.toString();
@@ -115,13 +120,16 @@
           return new ProtocolOutput(response.toContent()); // return it
 
         } else if (code == 304) { // got not modified
-          return new ProtocolOutput(response.toContent(), ProtocolStatusUtils.STATUS_NOTMODIFIED);
+          return new ProtocolOutput(response.toContent(),
+              ProtocolStatusUtils.STATUS_NOTMODIFIED);
 
         } else if (code == 401) { // access denied / no read permissions
-          return new ProtocolOutput(response.toContent(), ProtocolStatusUtils.makeStatus(ProtocolStatusUtils.ACCESS_DENIED));
+          return new ProtocolOutput(response.toContent(),
+              ProtocolStatusUtils.makeStatus(ProtocolStatusUtils.ACCESS_DENIED));
 
         } else if (code == 404) { // no such file
-          return new ProtocolOutput(response.toContent(), ProtocolStatusUtils.STATUS_NOTFOUND);
+          return new ProtocolOutput(response.toContent(),
+              ProtocolStatusUtils.STATUS_NOTFOUND);
 
         } else if (code >= 300 && code < 400) { // handle redirect
           u = new URL(response.getHeader("Location"));
@@ -156,8 +164,8 @@
     return FIELDS;
   }
 
-  /** 
-   * Quick way for running this class. Useful for debugging. 
+  /**
+   * Quick way for running this class. Useful for debugging.
    */
   public static void main(String[] args) throws Exception {
     int maxContentLength = Integer.MIN_VALUE;
@@ -216,11 +224,11 @@
     file = null;
   }
 
-  /** 
-   * No robots parsing is done for file protocol. 
-   * So this returns a set of empty rules which will allow every url.
+  /**
+   * No robots parsing is done for file protocol. So this returns a set of empty
+   * rules which will allow every url.
    */
   public BaseRobotRules getRobotRules(String url, WebPage page) {
     return RobotRulesParser.EMPTY_RULES;
-  }   
+  }
 }
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(revision 1650444)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.file;
 
-/** Thrown for File error codes.
+/**
+ * Thrown for File error codes.
  */
 public class FileError extends FileException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FileError(int code) {
     super("File Error: " + code);
     this.code = code;
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 1650444)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -30,31 +30,27 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.MimeUtil;
 
-
 /************************************
- * FileResponse.java mimics file replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * (1) java.net.URL and java.net.URLConnection can handle file: scheme.
- * However they are not flexible enough, so not used in this implementation.
- *
- * (2) java.io.File is used for its abstractness across platforms.
- * Warning:
- * java.io.File API (1.4.2) does not elaborate on how special files,
- * such as /dev/* in unix and /proc/* on linux, are treated. Tests show
- *  (a) java.io.File.isFile() return false for /dev/*
- *  (b) java.io.File.isFile() return true for /proc/*
- *  (c) java.io.File.length() return 0 for /proc/*
- * We are probably oaky for now. Could be buggy here.
- * How about special files on windows?
- *
- * (3) java.io.File API (1.4.2) does not seem to know unix hard link files.
- * They are just treated as individual files.
- *
+ * FileResponse.java mimics file replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: (1) java.net.URL and java.net.URLConnection can handle file:
+ * scheme. However they are not flexible enough, so not used in this
+ * implementation.
+ * 
+ * (2) java.io.File is used for its abstractness across platforms. Warning:
+ * java.io.File API (1.4.2) does not elaborate on how special files, such as
+ * /dev/* in unix and /proc/* on linux, are treated. Tests show (a)
+ * java.io.File.isFile() return false for /dev/* (b) java.io.File.isFile()
+ * return true for /proc/* (c) java.io.File.length() return 0 for /proc/* We are
+ * probably oaky for now. Could be buggy here. How about special files on
+ * windows?
+ * 
+ * (3) java.io.File API (1.4.2) does not seem to know unix hard link files. They
+ * are just treated as individual files.
+ * 
  * (4) No funcy POSIX file attributes yet. May never need?
- *
+ * 
  * @author John Xing
  ***********************************/
 public class FileResponse {
@@ -68,11 +64,13 @@
 
   private final File file;
   private Configuration conf;
-  
+
   private MimeUtil MIME;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
@@ -79,22 +77,23 @@
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
-  
+
   public FileResponse(URL url, WebPage page, File file, Configuration conf)
-    throws FileException, IOException {
+      throws FileException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
     this.file = file;
     this.conf = conf;
-    
+
     MIME = new MimeUtil(conf);
 
     if (!"file".equals(url.getProtocol()))
@@ -117,22 +116,22 @@
       path = java.net.URLDecoder.decode(path, "UTF-8");
     } catch (UnsupportedEncodingException ex) {
     }
-    
+
     try {
 
       this.content = null;
 
       // url.toURI() is only in j2se 1.5.0
-      //java.io.File f = new java.io.File(url.toURI());
+      // java.io.File f = new java.io.File(url.toURI());
       java.io.File f = new java.io.File(path);
 
       if (!f.exists()) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
       if (!f.canRead()) {
-        this.code = 401;  // http Unauthorized
+        this.code = 401; // http Unauthorized
         return;
       }
 
@@ -141,15 +140,17 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        //hdrs.put("Location", f.getCanonicalFile().toURI());
-        headers.set(Response.LOCATION, f.getCanonicalFile().toURI().toURL().toString());
+        // hdrs.put("Location", f.getCanonicalFile().toURI());
+        headers.set(Response.LOCATION, f.getCanonicalFile().toURI().toURL()
+            .toString());
 
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         return;
       }
       if (f.lastModified() <= page.getModifiedTime()) {
         this.code = 304;
-        this.headers.set("Last-Modified", HttpDateFormat.toString(f.lastModified()));
+        this.headers.set("Last-Modified",
+            HttpDateFormat.toString(f.lastModified()));
         return;
       }
 
@@ -169,22 +170,22 @@
   }
 
   // get file as http response
-  private void getFileAsHttpResponse(java.io.File f)
-    throws FileException, IOException {
+  private void getFileAsHttpResponse(java.io.File f) throws FileException,
+      IOException {
 
     // ignore file of size larger than
     // Integer.MAX_VALUE = 2^31-1 = 2147483647
     long size = f.length();
     if (size > Integer.MAX_VALUE) {
-      throw new FileException("file is too large, size: "+size);
+      throw new FileException("file is too large, size: " + size);
       // or we can do this?
-      // this.code = 400;  // http Bad request
+      // this.code = 400; // http Bad request
       // return;
     }
 
     // capture content
     int len = (int) size;
-    
+
     if (this.file.maxContentLength >= 0 && len > this.file.maxContentLength)
       len = this.file.maxContentLength;
 
@@ -191,23 +192,24 @@
     this.content = new byte[len];
 
     java.io.InputStream is = new java.io.FileInputStream(f);
-    int offset = 0; int n = 0;
+    int offset = 0;
+    int n = 0;
     while (offset < len
-      && (n = is.read(this.content, offset, len-offset)) >= 0) {
+        && (n = is.read(this.content, offset, len - offset)) >= 0) {
       offset += n;
     }
     if (offset < len) { // keep whatever already have, but issue a warning
       if (File.LOG.isWarnEnabled()) {
-        File.LOG.warn("not enough bytes read from file: "+f.getPath());
+        File.LOG.warn("not enough bytes read from file: " + f.getPath());
       }
     }
-    is.close(); 
+    is.close();
 
     // set headers
     headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
-    headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(f
-        .lastModified()));
-    
+    headers.set(Response.LAST_MODIFIED,
+        HttpDateFormat.toString(f.lastModified()));
+
     String mimeType = MIME.getMimeType(f);
     String mimeTypeString = mimeType != null ? mimeType.toString() : "";
     headers.set(Response.CONTENT_TYPE, mimeTypeString);
@@ -217,21 +219,21 @@
   }
 
   // get dir list as http response
-  private void getDirAsHttpResponse(java.io.File f)
-    throws IOException {
+  private void getDirAsHttpResponse(java.io.File f) throws IOException {
 
     String path = f.toString();
     if (this.file.crawlParents)
-        this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : true);
+      this.content = list2html(f.listFiles(), path, "/".equals(path) ? false
+          : true);
     else
-        this.content = list2html(f.listFiles(), path, false);
+      this.content = list2html(f.listFiles(), path, false);
 
     // set headers
     headers.set(Response.CONTENT_LENGTH,
-      new Integer(this.content.length).toString());
+        new Integer(this.content.length).toString());
     headers.set(Response.CONTENT_TYPE, "text/html");
     headers.set(Response.LAST_MODIFIED,
-      HttpDateFormat.toString(f.lastModified()));
+        HttpDateFormat.toString(f.lastModified()));
 
     // response code
     this.code = 200; // http OK
@@ -238,12 +240,12 @@
   }
 
   // generate html page from dir list
-  private byte[] list2html(java.io.File[] list,
-    String path, boolean includeDotDot) {
+  private byte[] list2html(java.io.File[] list, String path,
+      boolean includeDotDot) {
 
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
@@ -252,7 +254,7 @@
     // fix me: we might want to sort list here! but not now.
 
     java.io.File f;
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       f = list[i];
       String name = f.getName();
       String time = HttpDateFormat.toString(f.lastModified());
@@ -259,13 +261,13 @@
       if (f.isDirectory()) {
         // java 1.4.2 api says dir itself and parent dir are not listed
         // so the following is not needed.
-        //if (name.equals(".") || name.equals(".."))
-        //  continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        // if (name.equals(".") || name.equals(".."))
+        // continue;
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.length()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.length() + "\n");
       } else {
         // ignore any other
       }
Index: src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
===================================================================
--- src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(revision 1650444)
+++ src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(working copy)
@@ -37,9 +37,10 @@
  * @author mattmann
  * @version $Revision$
  * 
- * <p>
- * Unit tests for the {@link File}Protocol.
- * </p>.
+ *          <p>
+ *          Unit tests for the {@link File}Protocol.
+ *          </p>
+ *          .
  */
 public class TestProtocolFile {
 
@@ -47,12 +48,13 @@
   private String sampleDir = System.getProperty("test.data", ".");
 
   private static final String[] testTextFiles = new String[] {
-      "testprotocolfile.txt", "testprotocolfile_(encoded).txt", "testprotocolfile_%28encoded%29.txt" };
+      "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
+      "testprotocolfile_%28encoded%29.txt" };
 
   private static final String expectedMimeType = "text/plain";
-  
+
   private Configuration conf;
-  
+
   @Before
   public void setUp() {
     conf = NutchConfiguration.create();
@@ -64,12 +66,12 @@
       setContentType(testTextFile);
     }
   }
-  
+
   /**
-   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata
-   * field.
-   * @throws ProtocolNotFound 
+   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata field.
    * 
+   * @throws ProtocolNotFound
+   * 
    * @since NUTCH-384
    * 
    */
@@ -78,19 +80,19 @@
     assertNotNull(urlString);
     WebPage datum = WebPage.newBuilder().build();
     Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
-    ProtocolOutput output = protocol.getProtocolOutput(urlString,datum);
+    ProtocolOutput output = protocol.getProtocolOutput(urlString, datum);
     assertNotNull(output);
 
     assertEquals("Status code: [" + output.getStatus().getCode()
         + "], not equal to: [" + ProtocolStatusCodes.SUCCESS + "]: args: ["
-        + output.getStatus().getArgs() + "]", (Integer) ProtocolStatusCodes.SUCCESS, output
-        .getStatus().getCode());
+        + output.getStatus().getArgs() + "]",
+        (Integer) ProtocolStatusCodes.SUCCESS, output.getStatus().getCode());
     assertNotNull(output.getContent());
     assertNotNull(output.getContent().getContentType());
     assertEquals(expectedMimeType, output.getContent().getContentType());
     assertNotNull(output.getContent().getMetadata());
-    assertEquals(expectedMimeType, output.getContent().getMetadata().get(
-        Response.CONTENT_TYPE));
+    assertEquals(expectedMimeType,
+        output.getContent().getMetadata().get(Response.CONTENT_TYPE));
 
   }
 
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(working copy)
@@ -27,7 +27,6 @@
 import java.util.List;
 //import java.util.LinkedList;
 
-
 import org.apache.commons.net.MalformedServerReplyException;
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPCommand;
@@ -37,561 +36,551 @@
 import org.apache.commons.net.ftp.FTPConnectionClosedException;
 
 /***********************************************
- * Client.java encapsulates functionalities necessary for nutch to
- * get dir list and retrieve file from an FTP server.
- * This class takes care of all low level details of interacting
- * with an FTP server and provides a convenient higher level interface.
- *
+ * Client.java encapsulates functionalities necessary for nutch to get dir list
+ * and retrieve file from an FTP server. This class takes care of all low level
+ * details of interacting with an FTP server and provides a convenient higher
+ * level interface.
+ * 
  * Modified from FtpClient.java in apache commons-net.
  * 
- * Notes by John Xing:
- * ftp server implementations are hardly uniform and none seems to follow
- * RFCs whole-heartedly. We have no choice, but assume common denominator
- * as following:
- * (1) Use stream mode for data transfer. Block mode will be better for
- *     multiple file downloading and partial file downloading. However
- *     not every ftpd has block mode support.
- * (2) Use passive mode for data connection.
- *     So nutch will work if we run behind firewall.
- * (3) Data connection is opened/closed per ftp command for the reasons
- *     listed in (1). There are ftp servers out there,
- *     when partial downloading is enforeced by closing data channel
- *     socket on our client side, the server side immediately closes
- *     control channel (socket). Our codes deal with such a bad behavior.
- * (4) LIST is used to obtain remote file attributes if possible.
- *     MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
- * (5) Avoid using ABOR in single thread? Do not use it at all.
- *
- * About exceptions:
- * Some specific exceptions are re-thrown as one of FtpException*.java
- * In fact, each function throws FtpException*.java or pass IOException.
- *
+ * Notes by John Xing: ftp server implementations are hardly uniform and none
+ * seems to follow RFCs whole-heartedly. We have no choice, but assume common
+ * denominator as following: (1) Use stream mode for data transfer. Block mode
+ * will be better for multiple file downloading and partial file downloading.
+ * However not every ftpd has block mode support. (2) Use passive mode for data
+ * connection. So nutch will work if we run behind firewall. (3) Data connection
+ * is opened/closed per ftp command for the reasons listed in (1). There are ftp
+ * servers out there, when partial downloading is enforeced by closing data
+ * channel socket on our client side, the server side immediately closes control
+ * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
+ * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
+ * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
+ * thread? Do not use it at all.
+ * 
+ * About exceptions: Some specific exceptions are re-thrown as one of
+ * FtpException*.java In fact, each function throws FtpException*.java or pass
+ * IOException.
+ * 
  * @author John Xing
  ***********************************************/
 
-public class Client extends FTP
-{
-    private int __dataTimeout;
-    private int __passivePort;
-    private String __passiveHost;
-//    private int __fileType, __fileFormat;
-    private boolean __remoteVerificationEnabled;
-//    private FTPFileEntryParser __entryParser;
-    private String __systemName;
+public class Client extends FTP {
+  private int __dataTimeout;
+  private int __passivePort;
+  private String __passiveHost;
+  // private int __fileType, __fileFormat;
+  private boolean __remoteVerificationEnabled;
+  // private FTPFileEntryParser __entryParser;
+  private String __systemName;
 
-    // constructor
-    public Client()
-    {
-        __initDefaults();
-        __dataTimeout = -1;
-        __remoteVerificationEnabled = true;
-    }
+  // constructor
+  public Client() {
+    __initDefaults();
+    __dataTimeout = -1;
+    __remoteVerificationEnabled = true;
+  }
 
-    // defaults when initialize
-    private void __initDefaults()
-    {
-        __passiveHost        = null;
-        __passivePort        = -1;
-        __systemName         = null;
-/*        __fileType           = FTP.ASCII_FILE_TYPE;
-        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
-        __entryParser        = null;
-*/    }
+  // defaults when initialize
+  private void __initDefaults() {
+    __passiveHost = null;
+    __passivePort = -1;
+    __systemName = null;
+    /*
+     * __fileType = FTP.ASCII_FILE_TYPE; __fileFormat =
+     * FTP.NON_PRINT_TEXT_FORMAT; __entryParser = null;
+     */}
 
-    // parse reply for pass()
-    private void __parsePassiveModeReply(String reply)
-    throws MalformedServerReplyException
-    {
-        int i, index, lastIndex;
-        String octet1, octet2;
-        StringBuffer host;
+  // parse reply for pass()
+  private void __parsePassiveModeReply(String reply)
+      throws MalformedServerReplyException {
+    int i, index, lastIndex;
+    String octet1, octet2;
+    StringBuffer host;
 
-        reply = reply.substring(reply.indexOf('(') + 1,
-                                reply.indexOf(')')).trim();
+    reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
 
-        host = new StringBuffer(24);
-        lastIndex = 0;
-        index = reply.indexOf(',');
-        host.append(reply.substring(lastIndex, index));
+    host = new StringBuffer(24);
+    lastIndex = 0;
+    index = reply.indexOf(',');
+    host.append(reply.substring(lastIndex, index));
 
-        for (i = 0; i < 3; i++)
-        {
-            host.append('.');
-            lastIndex = index + 1;
-            index = reply.indexOf(',', lastIndex);
-            host.append(reply.substring(lastIndex, index));
-        }
+    for (i = 0; i < 3; i++) {
+      host.append('.');
+      lastIndex = index + 1;
+      index = reply.indexOf(',', lastIndex);
+      host.append(reply.substring(lastIndex, index));
+    }
 
-        lastIndex = index + 1;
-        index = reply.indexOf(',', lastIndex);
+    lastIndex = index + 1;
+    index = reply.indexOf(',', lastIndex);
 
-        octet1 = reply.substring(lastIndex, index);
-        octet2 = reply.substring(index + 1);
+    octet1 = reply.substring(lastIndex, index);
+    octet2 = reply.substring(index + 1);
 
-        // index and lastIndex now used as temporaries
-        try
-        {
-            index = Integer.parseInt(octet1);
-            lastIndex = Integer.parseInt(octet2);
-        }
-        catch (NumberFormatException e)
-        {
-            throw new MalformedServerReplyException(
-                "Could not parse passive host information.\nServer Reply: " + reply);
-        }
+    // index and lastIndex now used as temporaries
+    try {
+      index = Integer.parseInt(octet1);
+      lastIndex = Integer.parseInt(octet2);
+    } catch (NumberFormatException e) {
+      throw new MalformedServerReplyException(
+          "Could not parse passive host information.\nServer Reply: " + reply);
+    }
 
-        index <<= 8;
-        index |= lastIndex;
+    index <<= 8;
+    index |= lastIndex;
 
-        __passiveHost = host.toString();
-        __passivePort = index;
-    }
+    __passiveHost = host.toString();
+    __passivePort = index;
+  }
 
-    /**
-     * open a passive data connection socket
-     * @param command
-     * @param arg
-     * @return
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     */
-    protected Socket __openPassiveDataConnection(int command, String arg)
+  /**
+   * open a passive data connection socket
+   * 
+   * @param command
+   * @param arg
+   * @return
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   */
+  protected Socket __openPassiveDataConnection(int command, String arg)
       throws IOException, FtpExceptionCanNotHaveDataConnection {
-        Socket socket;
+    Socket socket;
 
-//        // 20040317, xing, accommodate ill-behaved servers, see below
-//        int port_previous = __passivePort;
+    // // 20040317, xing, accommodate ill-behaved servers, see below
+    // int port_previous = __passivePort;
 
-        if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-          throw new FtpExceptionCanNotHaveDataConnection(
-            "pasv() failed. " + getReplyString());
+    if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+      throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+          + getReplyString());
 
-        try {
-          __parsePassiveModeReply(getReplyStrings()[0]);
-        } catch (MalformedServerReplyException e) {
-          throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-        }
+    try {
+      __parsePassiveModeReply(getReplyStrings()[0]);
+    } catch (MalformedServerReplyException e) {
+      throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    }
 
-//        // 20040317, xing, accommodate ill-behaved servers, see above
-//        int count = 0;
-//        System.err.println("__passivePort "+__passivePort);
-//        System.err.println("port_previous "+port_previous);
-//        while (__passivePort == port_previous) {
-//          // just quit if too many tries. make it an exception here?
-//          if (count++ > 10)
-//            return null;
-//          // slow down further for each new try
-//          Thread.sleep(500*count);
-//          if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-//            throw new FtpExceptionCanNotHaveDataConnection(
-//              "pasv() failed. " + getReplyString());
-//            //return null;
-//          try {
-//            __parsePassiveModeReply(getReplyStrings()[0]);
-//          } catch (MalformedServerReplyException e) {
-//            throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-//          }
-//        }
+    // // 20040317, xing, accommodate ill-behaved servers, see above
+    // int count = 0;
+    // System.err.println("__passivePort "+__passivePort);
+    // System.err.println("port_previous "+port_previous);
+    // while (__passivePort == port_previous) {
+    // // just quit if too many tries. make it an exception here?
+    // if (count++ > 10)
+    // return null;
+    // // slow down further for each new try
+    // Thread.sleep(500*count);
+    // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+    // throw new FtpExceptionCanNotHaveDataConnection(
+    // "pasv() failed. " + getReplyString());
+    // //return null;
+    // try {
+    // __parsePassiveModeReply(getReplyStrings()[0]);
+    // } catch (MalformedServerReplyException e) {
+    // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    // }
+    // }
 
-        socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
+    socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
 
-        if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
-          socket.close();
-          return null;
-        }
+    if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
+      socket.close();
+      return null;
+    }
 
-        if (__remoteVerificationEnabled && !verifyRemote(socket))
-        {
-            InetAddress host1, host2;
+    if (__remoteVerificationEnabled && !verifyRemote(socket)) {
+      InetAddress host1, host2;
 
-            host1 = socket.getInetAddress();
-            host2 = getRemoteAddress();
+      host1 = socket.getInetAddress();
+      host2 = getRemoteAddress();
 
-            socket.close();
+      socket.close();
 
-            // our precaution
-            throw new FtpExceptionCanNotHaveDataConnection(
-                "Host attempting data connection " + host1.getHostAddress() +
-                " is not same as server " + host2.getHostAddress() +
-                " So we intentionally close it for security precaution."
-                );
-        }
+      // our precaution
+      throw new FtpExceptionCanNotHaveDataConnection(
+          "Host attempting data connection " + host1.getHostAddress()
+              + " is not same as server " + host2.getHostAddress()
+              + " So we intentionally close it for security precaution.");
+    }
 
-        if (__dataTimeout >= 0)
-            socket.setSoTimeout(__dataTimeout);
+    if (__dataTimeout >= 0)
+      socket.setSoTimeout(__dataTimeout);
 
-        return socket;
-    }
+    return socket;
+  }
 
-    /***
-     * Sets the timeout in milliseconds to use for data connection.
-     * set immediately after opening the data connection.
-     ***/
-    public void setDataTimeout(int timeout)
-    {
-        __dataTimeout = timeout;
-    }
+  /***
+   * Sets the timeout in milliseconds to use for data connection. set
+   * immediately after opening the data connection.
+   ***/
+  public void setDataTimeout(int timeout) {
+    __dataTimeout = timeout;
+  }
 
-    /***
-     * Closes the connection to the FTP server and restores
-     * connection parameters to the default values.
-     * <p>
-     * @exception IOException If an error occurs while disconnecting.
-     ***/
-    public void disconnect() throws IOException
-    {
-        __initDefaults();
-        super.disconnect();
-        // no worry for data connection, since we always close it
-        // in every ftp command that invloves data connection
-    }
+  /***
+   * Closes the connection to the FTP server and restores connection parameters
+   * to the default values.
+   * <p>
+   * 
+   * @exception IOException
+   *              If an error occurs while disconnecting.
+   ***/
+  public void disconnect() throws IOException {
+    __initDefaults();
+    super.disconnect();
+    // no worry for data connection, since we always close it
+    // in every ftp command that invloves data connection
+  }
 
-    /***
-     * Enable or disable verification that the remote host taking part
-     * of a data connection is the same as the host to which the control
-     * connection is attached.  The default is for verification to be
-     * enabled.  You may set this value at any time, whether the
-     * FTPClient is currently connected or not.
-     * <p>
-     * @param enable True to enable verification, false to disable verification.
-     ***/
-    public void setRemoteVerificationEnabled(boolean enable)
-    {
-        __remoteVerificationEnabled = enable;
-    }
+  /***
+   * Enable or disable verification that the remote host taking part of a data
+   * connection is the same as the host to which the control connection is
+   * attached. The default is for verification to be enabled. You may set this
+   * value at any time, whether the FTPClient is currently connected or not.
+   * <p>
+   * 
+   * @param enable
+   *          True to enable verification, false to disable verification.
+   ***/
+  public void setRemoteVerificationEnabled(boolean enable) {
+    __remoteVerificationEnabled = enable;
+  }
 
-    /***
-     * Return whether or not verification of the remote host participating
-     * in data connections is enabled.  The default behavior is for
-     * verification to be enabled.
-     * <p>
-     * @return True if verification is enabled, false if not.
-     ***/
-    public boolean isRemoteVerificationEnabled()
-    {
-        return __remoteVerificationEnabled;
-    }
+  /***
+   * Return whether or not verification of the remote host participating in data
+   * connections is enabled. The default behavior is for verification to be
+   * enabled.
+   * <p>
+   * 
+   * @return True if verification is enabled, false if not.
+   ***/
+  public boolean isRemoteVerificationEnabled() {
+    return __remoteVerificationEnabled;
+  }
 
-    /***
-     * Login to the FTP server using the provided username and password.
-     * <p>
-     * @param username The username to login under.
-     * @param password The password to use.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean login(String username, String password) throws IOException
-    {
-        user(username);
+  /***
+   * Login to the FTP server using the provided username and password.
+   * <p>
+   * 
+   * @param username
+   *          The username to login under.
+   * @param password
+   *          The password to use.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean login(String username, String password) throws IOException {
+    user(username);
 
-        if (FTPReply.isPositiveCompletion(getReplyCode()))
-            return true;
+    if (FTPReply.isPositiveCompletion(getReplyCode()))
+      return true;
 
-        // If we get here, we either have an error code, or an intermmediate
-        // reply requesting password.
-        if (!FTPReply.isPositiveIntermediate(getReplyCode()))
-            return false;
+    // If we get here, we either have an error code, or an intermmediate
+    // reply requesting password.
+    if (!FTPReply.isPositiveIntermediate(getReplyCode()))
+      return false;
 
-        return FTPReply.isPositiveCompletion(pass(password));
-    }
+    return FTPReply.isPositiveCompletion(pass(password));
+  }
 
-    /***
-     * Logout of the FTP server by sending the QUIT command.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean logout() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(quit());
-    }
+  /***
+   * Logout of the FTP server by sending the QUIT command.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean logout() throws IOException {
+    return FTPReply.isPositiveCompletion(quit());
+  }
 
-    /**
-     * Retrieve a list reply for path
-     * @param path
-     * @param entries
-     * @param limit
-     * @param parser
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     * @throws FtpExceptionUnknownForcedDataClose
-     * @throws FtpExceptionControlClosedByForcedDataClose
-     */
-    public void retrieveList(String path, List<FTPFile> entries, int limit,
-      FTPFileEntryParser parser)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
-      Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
+  /**
+   * Retrieve a list reply for path
+   * 
+   * @param path
+   * @param entries
+   * @param limit
+   * @param parser
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   * @throws FtpExceptionUnknownForcedDataClose
+   * @throws FtpExceptionControlClosedByForcedDataClose
+   */
+  public void retrieveList(String path, List<FTPFile> entries, int limit,
+      FTPFileEntryParser parser) throws IOException,
+      FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
+    Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("LIST "
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("LIST "
           + ((path == null) ? "" : path));
 
-      BufferedReader reader =
-          new BufferedReader(new InputStreamReader(socket.getInputStream()));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        socket.getInputStream()));
 
-      // force-close data channel socket, when download limit is reached
-//      boolean mandatory_close = false;
+    // force-close data channel socket, when download limit is reached
+    // boolean mandatory_close = false;
 
-      //List entries = new LinkedList();
-      int count = 0;
-      String line = parser.readNextEntry(reader);
-      while (line != null) {
-        FTPFile ftpFile = parser.parseFTPEntry(line);
-        // skip non-formatted lines
-        if (ftpFile == null) {
-          line = parser.readNextEntry(reader);
-          continue;
-        }
-        entries.add(ftpFile);
-        count += line.length();
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is up to the line when total bytes is just over limit
-        if (limit >= 0 && count > limit) {
-//          mandatory_close = true;
-          break;
-        }
+    // List entries = new LinkedList();
+    int count = 0;
+    String line = parser.readNextEntry(reader);
+    while (line != null) {
+      FTPFile ftpFile = parser.parseFTPEntry(line);
+      // skip non-formatted lines
+      if (ftpFile == null) {
         line = parser.readNextEntry(reader);
+        continue;
       }
+      entries.add(ftpFile);
+      count += line.length();
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is up to the line when total bytes is just over limit
+      if (limit >= 0 && count > limit) {
+        // mandatory_close = true;
+        break;
+      }
+      line = parser.readNextEntry(reader);
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    /**
-     * Retrieve a file for path
-     * @param path
-     * @param os
-     * @param limit
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     * @throws FtpExceptionUnknownForcedDataClose
-     * @throws FtpExceptionControlClosedByForcedDataClose
-     */
-    public void retrieveFile(String path, OutputStream os, int limit)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
+  }
 
-      Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+  /**
+   * Retrieve a file for path
+   * 
+   * @param path
+   * @param os
+   * @param limit
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   * @throws FtpExceptionUnknownForcedDataClose
+   * @throws FtpExceptionControlClosedByForcedDataClose
+   */
+  public void retrieveFile(String path, OutputStream os, int limit)
+      throws IOException, FtpExceptionCanNotHaveDataConnection,
+      FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("RETR "
+    Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("RETR "
           + ((path == null) ? "" : path));
 
-      InputStream input = socket.getInputStream();
+    InputStream input = socket.getInputStream();
 
-      // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
-      // do we ever need ASCII_FILE_TYPE?
-      //if (__fileType == ASCII_FILE_TYPE)
-      // input = new FromNetASCIIInputStream(input);
+    // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
+    // do we ever need ASCII_FILE_TYPE?
+    // if (__fileType == ASCII_FILE_TYPE)
+    // input = new FromNetASCIIInputStream(input);
 
-      // fixme, should we instruct server here for binary file type?
+    // fixme, should we instruct server here for binary file type?
 
-      // force-close data channel socket
-//      boolean mandatory_close = false;
+    // force-close data channel socket
+    // boolean mandatory_close = false;
 
-      int len; int count = 0;
-      byte[] buf =
-        new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
-      while((len=input.read(buf,0,buf.length)) != -1){
-        count += len;
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is exactly of limit bytes
-        if (limit >= 0 && count > limit) {
-          os.write(buf,0,len-(count-limit));
- //         mandatory_close = true;
-          break;
-        }
-        os.write(buf,0,len);
-        os.flush();
+    int len;
+    int count = 0;
+    byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
+    while ((len = input.read(buf, 0, buf.length)) != -1) {
+      count += len;
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is exactly of limit bytes
+      if (limit >= 0 && count > limit) {
+        os.write(buf, 0, len - (count - limit));
+        // mandatory_close = true;
+        break;
       }
+      os.write(buf, 0, len);
+      os.flush();
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      // do not need this
-      //sendCommand("ABOR");
+    // do not need this
+    // sendCommand("ABOR");
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    // reply check after closing data connection
-    private boolean _notBadReply(int reply) {
+  }
 
-      if (FTPReply.isPositiveCompletion(reply)) {
-        // do nothing
-      } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
+  // reply check after closing data connection
+  private boolean _notBadReply(int reply) {
+
+    if (FTPReply.isPositiveCompletion(reply)) {
+      // do nothing
+    } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
       // some ftp servers reply 426, e.g.,
       // foggy FTP server (Version wu-2.6.2(2)
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
       // some ftp servers reply 450, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
       // some ftp servers reply 451, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
-      } else {
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+    } else {
       // what other kind of ftp server out there?
-        return false;
-      }
+      return false;
+    }
 
+    return true;
+  }
+
+  /***
+   * Sets the file type to be transferred. This should be one of
+   * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
+   * etc. The file type only needs to be set when you want to change the type.
+   * After changing it, the new type stays in effect until you change it again.
+   * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
+   * is never called.
+   * <p>
+   * 
+   * @param fileType
+   *          The <code> _FILE_TYPE </code> constant indcating the type of file.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean setFileType(int fileType) throws IOException {
+    if (FTPReply.isPositiveCompletion(type(fileType))) {
+      /*
+       * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+       */
       return true;
     }
+    return false;
+  }
 
-    /***
-     * Sets the file type to be transferred.  This should be one of 
-     * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
-     * etc.  The file type only needs to be set when you want to change the
-     * type.  After changing it, the new type stays in effect until you change
-     * it again.  The default file type is <code> FTP.ASCII_FILE_TYPE </code>
-     * if this method is never called.
-     * <p>
-     * @param fileType The <code> _FILE_TYPE </code> constant indcating the
-     *                 type of file.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean setFileType(int fileType) throws IOException
-    {
-        if (FTPReply.isPositiveCompletion(type(fileType)))
-        {
-/*            __fileType = fileType;
-            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/
-            return true;
-        }
-        return false;
+  /***
+   * Fetches the system type name from the server and returns the string. This
+   * value is cached for the duration of the connection after the first call to
+   * this method. In other words, only the first time that you invoke this
+   * method will it issue a SYST command to the FTP server. FTPClient will
+   * remember the value and return the cached value until a call to disconnect.
+   * <p>
+   * 
+   * @return The system type name obtained from the server. null if the
+   *         information could not be obtained.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
+    // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
+    // Technically, we should expect a NAME_SYSTEM_TYPE response, but
+    // in practice FTP servers deviate, so we soften the condition to
+    // a positive completion.
+    if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
+      __systemName = (getReplyStrings()[0]).substring(4);
+    } else {
+      throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+          + getReplyString());
     }
 
-    /***
-     * Fetches the system type name from the server and returns the string.
-     * This value is cached for the duration of the connection after the
-     * first call to this method.  In other words, only the first time
-     * that you invoke this method will it issue a SYST command to the
-     * FTP server.  FTPClient will remember the value and return the
-     * cached value until a call to disconnect.
-     * <p>
-     * @return The system type name obtained from the server.  null if the
-     *       information could not be obtained.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *  command to the server or receiving a reply from the server.
-     ***/
-    public String getSystemName()
-      throws IOException, FtpExceptionBadSystResponse
-    {
-      //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
-      // Technically, we should expect a NAME_SYSTEM_TYPE response, but
-      // in practice FTP servers deviate, so we soften the condition to
-      // a positive completion.
-        if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
-            __systemName = (getReplyStrings()[0]).substring(4);
-        } else {
-            throw new FtpExceptionBadSystResponse(
-              "Bad response of SYST: " + getReplyString());
-        }
+    return __systemName;
+  }
 
-        return __systemName;
-    }
+  /***
+   * Sends a NOOP command to the FTP server. This is useful for preventing
+   * server timeouts.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean sendNoOp() throws IOException {
+    return FTPReply.isPositiveCompletion(noop());
+  }
 
-    /***
-     * Sends a NOOP command to the FTP server.  This is useful for preventing
-     * server timeouts.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean sendNoOp() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(noop());
-    }
+  // client.stat(path);
+  // client.sendCommand("STAT");
+  // client.sendCommand("STAT",path);
+  // client.sendCommand("MDTM",path);
+  // client.sendCommand("SIZE",path);
+  // client.sendCommand("HELP","SITE");
+  // client.sendCommand("SYST");
+  // client.setRestartOffset(120);
 
-//    client.stat(path);
-//    client.sendCommand("STAT");
-//    client.sendCommand("STAT",path);
-//    client.sendCommand("MDTM",path);
-//    client.sendCommand("SIZE",path);
-//    client.sendCommand("HELP","SITE");
-//    client.sendCommand("SYST");
-//    client.setRestartOffset(120);
-
 }
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(working copy)
@@ -33,13 +33,12 @@
 import java.util.HashSet;
 
 /**
- * This class is a protocol plugin used for ftp: scheme.
- * It creates {@link FtpResponse} object and gets the content of the url from it.
+ * This class is a protocol plugin used for ftp: scheme. It creates
+ * {@link FtpResponse} object and gets the content of the url from it.
  * Configurable parameters are {@code ftp.username}, {@code ftp.password},
- *                             {@code ftp.content.limit}, {@code ftp.timeout}, 
- *                             {@code ftp.server.timeout}, {@code ftp.password}, 
- *                             {@code ftp.keep.connection} and {@code ftp.follow.talk}.
- * For details see "FTP properties" section in {@code nutch-default.xml}.
+ * {@code ftp.content.limit}, {@code ftp.timeout}, {@code ftp.server.timeout},
+ * {@code ftp.password}, {@code ftp.keep.connection} and {@code ftp.follow.talk}
+ * . For details see "FTP properties" section in {@code nutch-default.xml}.
  */
 public class Ftp implements Protocol {
 
@@ -82,7 +81,7 @@
   private Configuration conf;
 
   private FtpRobotRulesParser robots = null;
-      
+
   // constructor
   public Ftp() {
     robots = new FtpRobotRulesParser();
@@ -108,12 +107,14 @@
     this.keepConnection = keepConnection;
   }
 
-  /** 
-   * Creates a {@link FtpResponse} object corresponding to the url and 
-   * returns a {@link ProtocolOutput} object as per the content received
+  /**
+   * Creates a {@link FtpResponse} object corresponding to the url and returns a
+   * {@link ProtocolOutput} object as per the content received
    * 
-   * @param url Text containing the ftp url
-   * @param datum The CrawlDatum object corresponding to the url
+   * @param url
+   *          Text containing the ftp url
+   * @param datum
+   *          The CrawlDatum object corresponding to the url
    * 
    * @return {@link ProtocolOutput} object for the url
    */
@@ -233,8 +234,8 @@
     if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
       ftp.setMaxContentLength(maxContentLength);
 
-    Content content = ftp.getProtocolOutput(urlString, WebPage.newBuilder().build())
-        .getContent();
+    Content content = ftp.getProtocolOutput(urlString,
+        WebPage.newBuilder().build()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
     System.err.println("Content-Length: "
@@ -252,7 +253,7 @@
     return FIELDS;
   }
 
-  /** 
+  /**
    * Get the robots rules for a given url
    */
   public BaseRobotRules getRobotRules(String url, WebPage page) {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.ftp;
 
-/** Thrown for Ftp error codes.
+/**
+ * Thrown for Ftp error codes.
  */
 public class FtpError extends FtpException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FtpError(int code) {
     super("Ftp Error: " + code);
     this.code = code;
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(working copy)
@@ -20,9 +20,9 @@
 import org.apache.nutch.protocol.ProtocolException;
 
 /***
- * Superclass for important exceptions thrown during FTP talk,
- * that must be handled with care.
- *
+ * Superclass for important exceptions thrown during FTP talk, that must be
+ * handled with care.
+ * 
  * @author John Xing
  */
 public class FtpException extends ProtocolException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating bad reply of SYST command.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionBadSystResponse extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating failure of opening data connection.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionCanNotHaveDataConnection extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating control channel is closed by server end, due to
- * forced closure of data channel at client (our) end.
- *
+ * Exception indicating control channel is closed by server end, due to forced
+ * closure of data channel at client (our) end.
+ * 
  * @author John Xing
  */
 public class FtpExceptionControlClosedByForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating unrecognizable reply from server after
- * forced closure of data channel by client (our) side.
- *
+ * Exception indicating unrecognizable reply from server after forced closure of
+ * data channel by client (our) side.
+ * 
  * @author John Xing
  */
 public class FtpExceptionUnknownForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(working copy)
@@ -17,7 +17,6 @@
 
 package org.apache.nutch.protocol.ftp;
 
-
 import org.apache.avro.util.Utf8;
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPFile;
@@ -39,18 +38,14 @@
 import java.util.LinkedList;
 import java.util.List;
 
-
 /************************************
- * FtpResponse.java mimics ftp replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * In this class, all FtpException*.java thrown by Client.java
- * and some important commons-net exceptions passed by Client.java
- * must have been properly dealt with. They'd better not be leaked
- * to the caller of this class.
- *
+ * FtpResponse.java mimics ftp replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: In this class, all FtpException*.java thrown by Client.java and
+ * some important commons-net exceptions passed by Client.java must have been
+ * properly dealt with. They'd better not be leaked to the caller of this class.
+ * 
  * @author John Xing
  ***********************************/
 public class FtpResponse {
@@ -66,7 +61,9 @@
   private Configuration conf;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
@@ -73,16 +70,17 @@
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
 
   public FtpResponse(URL url, WebPage page, Ftp ftp, Configuration conf)
-    throws FtpException, IOException {
+      throws FtpException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
@@ -104,20 +102,19 @@
 
       if (ftp.followTalk) {
         if (Ftp.LOG.isInfoEnabled()) {
-          Ftp.LOG.info("fetching "+url);
+          Ftp.LOG.info("fetching " + url);
         }
       } else {
         if (Ftp.LOG.isTraceEnabled()) {
-          Ftp.LOG.trace("fetching "+url);
+          Ftp.LOG.trace("fetching " + url);
         }
       }
 
       InetAddress addr = InetAddress.getByName(url.getHost());
-      if (addr != null
-          && conf.getBoolean("store.ip.address", false) == true) {
-        String ipString = addr.getHostAddress(); //get the ip address
+      if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
+        String ipString = addr.getHostAddress(); // get the ip address
         page.getMetadata().put(new Utf8("_ip_"),
-          ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes()));
       }
 
       // idled too long, remote server or ourselves may have timed out,
@@ -124,7 +121,7 @@
       // should start anew.
       if (ftp.client != null && ftp.keepConnection
           && ftp.renewalTime < System.currentTimeMillis()) {
-        if (Ftp.LOG.isInfoEnabled()) { 
+        if (Ftp.LOG.isInfoEnabled()) {
           Ftp.LOG.info("delete client because idled too long");
         }
         ftp.client = null;
@@ -138,8 +135,9 @@
         // the real client
         ftp.client = new Client();
         // when to renew, take the lesser
-        //ftp.renewalTime = System.currentTimeMillis()
-        //  + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+        // ftp.renewalTime = System.currentTimeMillis()
+        // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
+        // ftp.serverTimeout);
 
         // timeout for control connection
         ftp.client.setDefaultTimeout(ftp.timeout);
@@ -148,8 +146,8 @@
 
         // follow ftp talk?
         if (ftp.followTalk)
-          ftp.client.addProtocolCommandListener(
-            new PrintCommandListener(Ftp.LOG));
+          ftp.client.addProtocolCommandListener(new PrintCommandListener(
+              Ftp.LOG));
       }
 
       // quit from previous site if at a different site now
@@ -157,8 +155,8 @@
         InetAddress remoteAddress = ftp.client.getRemoteAddress();
         if (!addr.equals(remoteAddress)) {
           if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-            Ftp.LOG.info("disconnect from "+remoteAddress
-            +" before connect to "+addr);
+            Ftp.LOG.info("disconnect from " + remoteAddress
+                + " before connect to " + addr);
           }
           // quit from current site
           ftp.client.logout();
@@ -170,7 +168,7 @@
       if (!ftp.client.isConnected()) {
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("connect to "+addr);
+          Ftp.LOG.info("connect to " + addr);
         }
 
         ftp.client.connect(addr);
@@ -177,8 +175,8 @@
         if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.connect() failed: "
-              + addr + " " + ftp.client.getReplyString());
+            Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+                + ftp.client.getReplyString());
           }
           this.code = 500; // http Internal Server Error
           return;
@@ -185,7 +183,7 @@
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("log into "+addr);
+          Ftp.LOG.info("log into " + addr);
         }
 
         if (!ftp.client.login(ftp.userName, ftp.passWord)) {
@@ -196,9 +194,9 @@
           // (not dealt with here at all) .
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.login() failed: " + addr);
           }
-          this.code = 401;  // http Unauthorized
+          this.code = 401; // http Unauthorized
           return;
         }
 
@@ -207,7 +205,7 @@
           ftp.client.logout();
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
           }
           this.code = 500; // http Internal Server Error
           return;
@@ -214,7 +212,7 @@
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("set parser for "+addr);
+          Ftp.LOG.info("set parser for " + addr);
         }
 
         // SYST is valid only after login
@@ -225,10 +223,11 @@
           if (parserKey.startsWith("UNKNOWN Type: L8"))
             parserKey = "UNIX Type: L8";
           ftp.parser = (new DefaultFTPFileEntryParserFactory())
-            .createFileEntryParser(parserKey);
+              .createFileEntryParser(parserKey);
         } catch (FtpExceptionBadSystResponse e) {
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+            Ftp.LOG
+                .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
           }
           ftp.parser = null;
         } catch (ParserInitializationException e) {
@@ -235,7 +234,7 @@
           // ParserInitializationException is RuntimeException defined in
           // org.apache.commons.net.ftp.parser.ParserInitializationException
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+            Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
           }
           ftp.parser = null;
         } finally {
@@ -243,7 +242,7 @@
             // do not log as severe, otherwise
             // FetcherThread/RequestScheduler will abort
             if (Ftp.LOG.isWarnEnabled()) {
-              Ftp.LOG.warn("ftp.parser is null: "+addr);
+              Ftp.LOG.warn("ftp.parser is null: " + addr);
             }
             ftp.client.logout();
             ftp.client.disconnect();
@@ -269,10 +268,11 @@
       // reset next renewalTime, take the lesser
       if (ftp.client != null && ftp.keepConnection) {
         ftp.renewalTime = System.currentTimeMillis()
-          + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+            + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
+                : ftp.serverTimeout);
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
           Ftp.LOG.info("reset renewalTime to "
-            + HttpDateFormat.toString(ftp.renewalTime));
+              + HttpDateFormat.toString(ftp.renewalTime));
         }
       }
 
@@ -280,15 +280,15 @@
       // may have deleted ftp.client
       if (ftp.client != null && !ftp.keepConnection) {
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("disconnect from "+addr);
+          Ftp.LOG.info("disconnect from " + addr);
         }
         ftp.client.logout();
         ftp.client.disconnect();
       }
-      
+
     } catch (Exception e) {
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(""+e);
+        Ftp.LOG.warn("" + e);
       }
       // for any un-foreseen exception (run time exception or not),
       // do ultimate clean and leave ftp.client for garbage collection
@@ -298,14 +298,14 @@
       ftp.client = null;
       // or do explicit garbage collection?
       // System.gc();
-// can we be less dramatic, using the following instead?
-// probably unnecessary for our practical purpose here
-//      try {
-//        ftp.client.logout();
-//        ftp.client.disconnect();
-//      }
+      // can we be less dramatic, using the following instead?
+      // probably unnecessary for our practical purpose here
+      // try {
+      // ftp.client.logout();
+      // ftp.client.disconnect();
+      // }
       throw new FtpException(e);
-      //throw e;
+      // throw e;
     }
 
   }
@@ -312,7 +312,7 @@
 
   // get ftp file as http response
   private void getFileAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
 
     ByteArrayOutputStream os = null;
     List<FTPFile> list = null;
@@ -324,9 +324,9 @@
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
+          new Long(ftpFile.getSize()).toString());
       this.headers.set(Response.LAST_MODIFIED,
-                       HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       // don't retrieve the file if not changed.
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
@@ -337,11 +337,11 @@
 
       this.content = os.toByteArray();
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -350,7 +350,8 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
@@ -357,23 +358,23 @@
       // in case this FtpExceptionControlClosedByForcedDataClose is
       // thrown by retrieveList() (not retrieveFile()) above,
       if (os == null) { // indicating throwing by retrieveList()
-        //throw new FtpException("fail to get attibutes: "+path);
+        // throw new FtpException("fail to get attibutes: "+path);
         if (Ftp.LOG.isWarnEnabled()) {
-          Ftp.LOG.warn(
-              "Please try larger maxContentLength for ftp.client.retrieveList(). "
-            + e);
+          Ftp.LOG
+              .warn("Please try larger maxContentLength for ftp.client.retrieveList(). "
+                  + e);
         }
         // in a way, this is our request fault
-        this.code = 400;  // http Bad request
+        this.code = 400; // http Bad request
         return;
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
-      //this.headers.put("content-type", "text/html");
+          new Long(ftpFile.getSize()).toString());
+      // this.headers.put("content-type", "text/html");
       this.headers.set(Response.LAST_MODIFIED,
-                      HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
@@ -380,11 +381,11 @@
         return;
       }
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -391,13 +392,13 @@
     } catch (FtpExceptionCanNotHaveDataConnection e) {
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-      // it is not a file, but dir, so redirect as a dir
+        // it is not a file, but dir, so redirect as a dir
         this.headers.set(Response.LOCATION, path + "/");
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
-      // it is not a dir either
-        this.code = 404;  // http Not Found
+        // it is not a dir either
+        this.code = 404; // http Not Found
       }
 
     } catch (FtpExceptionUnknownForcedDataClose e) {
@@ -404,10 +405,9 @@
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     }
@@ -416,7 +416,7 @@
 
   // get ftp dir list as http response
   private void getDirAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
     List<FTPFile> list = new LinkedList<FTPFile>();
 
     try {
@@ -423,7 +423,7 @@
 
       // change to that dir first
       if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
@@ -432,15 +432,15 @@
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -449,21 +449,22 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -471,14 +472,15 @@
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     } catch (FtpExceptionCanNotHaveDataConnection e) {
-      if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
+      if (Ftp.LOG.isWarnEnabled()) {
+        Ftp.LOG.warn("" + e);
+      }
       this.code = 500; // http Iternal Server Error
     }
 
@@ -485,18 +487,20 @@
   }
 
   // generate html page from ftp dir list
-  private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) {
+  private byte[] list2html(List<FTPFile> list, String path,
+      boolean includeDotDot) {
 
-    //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
+    // StringBuffer x = new
+    // StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
     }
 
-    for (int i=0; i<list.size(); i++) {
+    for (int i = 0; i < list.size(); i++) {
       FTPFile f = (FTPFile) list.get(i);
       String name = f.getName();
       String time = HttpDateFormat.toString(f.getTimestamp());
@@ -504,11 +508,11 @@
         // some ftp server LIST "." and "..", we skip them here
         if (name.equals(".") || name.equals(".."))
           continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.getSize()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.getSize() + "\n");
       } else {
         // ignore isSymbolicLink()
         // ignore isUnknown()
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java	(working copy)
@@ -31,16 +31,18 @@
 import java.net.URL;
 
 /**
- * This class is used for parsing robots for urls belonging to FTP protocol.
- * It extends the generic {@link RobotRulesParser} class and contains 
- * Ftp protocol specific implementation for obtaining the robots file.
+ * This class is used for parsing robots for urls belonging to FTP protocol. It
+ * extends the generic {@link RobotRulesParser} class and contains Ftp protocol
+ * specific implementation for obtaining the robots file.
  */
 public class FtpRobotRulesParser extends RobotRulesParser {
 
   private static final String CONTENT_TYPE = "text/plain";
-  public static final Logger LOG = LoggerFactory.getLogger(FtpRobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FtpRobotRulesParser.class);
 
-  FtpRobotRulesParser() { }
+  FtpRobotRulesParser() {
+  }
 
   public FtpRobotRulesParser(Configuration conf) {
     super(conf);
@@ -47,40 +49,45 @@
   }
 
   /**
-   * The hosts for which the caching of robots rules is yet to be done,
-   * it sends a Ftp request to the host corresponding to the {@link URL} 
-   * passed, gets robots file, parses the rules and caches the rules object
-   * to avoid re-work in future.
+   * The hosts for which the caching of robots rules is yet to be done, it sends
+   * a Ftp request to the host corresponding to the {@link URL} passed, gets
+   * robots file, parses the rules and caches the rules object to avoid re-work
+   * in future.
    * 
-   *  @param ftp The {@link Protocol} object
-   *  @param url URL 
-   *  
-   *  @return robotRules A {@link BaseRobotRules} object for the rules
+   * @param ftp
+   *          The {@link Protocol} object
+   * @param url
+   *          URL
+   * 
+   * @return robotRules A {@link BaseRobotRules} object for the rules
    */
   public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url) {
 
-    String protocol = url.getProtocol().toLowerCase();  // normalize to lower case
-    String host = url.getHost().toLowerCase();          // normalize to lower case
+    String protocol = url.getProtocol().toLowerCase(); // normalize to lower
+                                                       // case
+    String host = url.getHost().toLowerCase(); // normalize to lower case
 
-    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":" + host);
+    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":"
+        + host);
 
     boolean cacheRule = true;
 
-    if (robotRules == null) {                     // cache miss
+    if (robotRules == null) { // cache miss
 
       if (LOG.isTraceEnabled())
         LOG.trace("cache miss " + url);
 
       try {
-        String robotsUrl = new URL(url, "/robots.txt").toString();        
-        ProtocolOutput output = ((Ftp)ftp).getProtocolOutput(robotsUrl, WebPage.newBuilder().build());
+        String robotsUrl = new URL(url, "/robots.txt").toString();
+        ProtocolOutput output = ((Ftp) ftp).getProtocolOutput(robotsUrl,
+            WebPage.newBuilder().build());
         int statusCode = output.getStatus().getCode();
 
         if (statusCode == ProtocolStatusCodes.SUCCESS) {
-          robotRules =  parseRules(url.toString(), output.getContent().getContent(), 
-                                  CONTENT_TYPE, agentNames);
-        } else {                                       
-          robotRules = EMPTY_RULES;                 // use default rules
+          robotRules = parseRules(url.toString(), output.getContent()
+              .getContent(), CONTENT_TYPE, agentNames);
+        } else {
+          robotRules = EMPTY_RULES; // use default rules
         }
       } catch (Throwable t) {
         if (LOG.isInfoEnabled()) {
@@ -91,7 +98,7 @@
       }
 
       if (cacheRule)
-        CACHE.put(protocol + ":" + host, robotRules);  // cache rules for host
+        CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
     }
     return robotRules;
   }
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(revision 1650444)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(working copy)
@@ -28,45 +28,44 @@
 
 /***
  * This is a support class for logging all ftp command/reply traffic.
- *
+ * 
  * @author John Xing
  ***/
-public class PrintCommandListener implements ProtocolCommandListener
-{
-    private Logger __logger;
+public class PrintCommandListener implements ProtocolCommandListener {
+  private Logger __logger;
 
-    public PrintCommandListener(Logger logger)
-    {
-        __logger = logger;
-    }
+  public PrintCommandListener(Logger logger) {
+    __logger = logger;
+  }
 
-    public void protocolCommandSent(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
-        }
+  public void protocolCommandSent(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolCommandSent(): " + e);
       }
     }
+  }
 
-    public void protocolReplyReceived(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
-        }
+  public void protocolReplyReceived(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolReplyReceived(): " + e);
       }
     }
+  }
 
-    private void __logIt(ProtocolCommandEvent event) throws IOException {
-      if (!__logger.isInfoEnabled()) { return; }
-      BufferedReader br =
-        new BufferedReader(new StringReader(event.getMessage()));
-      String line;
-      while ((line = br.readLine()) != null) {
-        __logger.info("ftp> "+line);
-      }
+  private void __logIt(ProtocolCommandEvent event) throws IOException {
+    if (!__logger.isInfoEnabled()) {
+      return;
     }
+    BufferedReader br = new BufferedReader(new StringReader(event.getMessage()));
+    String line;
+    while ((line = br.readLine()) != null) {
+      __logger.info("ftp> " + line);
+    }
+  }
 }
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(revision 1650444)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(working copy)
@@ -49,11 +49,11 @@
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-//    Level logLevel = Level.WARNING;
-//    if (conf.getBoolean("http.verbose", false)) {
-//      logLevel = Level.FINE;
-//    }
-//    LOG.setLevel(logLevel);
+    // Level logLevel = Level.WARNING;
+    // if (conf.getBoolean("http.verbose", false)) {
+    // logLevel = Level.FINE;
+    // }
+    // LOG.setLevel(logLevel);
   }
 
   public static void main(String[] args) throws Exception {
@@ -64,7 +64,7 @@
 
   @Override
   protected Response getResponse(URL url, WebPage page, boolean redirect)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
     return new HttpResponse(this, url, page);
   }
 
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 1650444)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -51,18 +51,17 @@
   private final Metadata headers = new SpellCheckedMetadata();
 
   protected enum Scheme {
-    HTTP,
-    HTTPS,
+    HTTP, HTTPS,
   }
 
   public HttpResponse(HttpBase http, URL url, WebPage page)
-  throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
 
     this.http = http;
     this.url = url;
 
     Scheme scheme = null;
- 
+
     if ("http".equals(url.getProtocol())) {
       scheme = Scheme.HTTP;
     } else if ("https".equals(url.getProtocol())) {
@@ -90,50 +89,56 @@
       } else {
         port = 443;
       }
-      portString= "";
+      portString = "";
     } else {
-      port= url.getPort();
-      portString= ":" + port;
+      port = url.getPort();
+      portString = ":" + port;
     }
     Socket socket = null;
 
     try {
-      socket = new Socket();                    // create the socket
+      socket = new Socket(); // create the socket
       socket.setSoTimeout(http.getTimeout());
 
-
       // connect
       String sockHost = http.useProxy() ? http.getProxyHost() : host;
       int sockPort = http.useProxy() ? http.getProxyPort() : port;
-      InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
+      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
       socket.connect(sockAddr, http.getTimeout());
-      
+
       if (scheme == Scheme.HTTPS) {
-        SSLSocketFactory factory = (SSLSocketFactory)SSLSocketFactory.getDefault();
-        SSLSocket sslsocket = (SSLSocket)factory.createSocket(socket, sockHost, sockPort, true);
+        SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory
+            .getDefault();
+        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket,
+            sockHost, sockPort, true);
         sslsocket.setUseClientMode(true);
-        
-        // Get the protocols and ciphers supported by this JVM    
-        Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket.getSupportedProtocols()));
-        Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket.getSupportedCipherSuites()));
-        
+
+        // Get the protocols and ciphers supported by this JVM
+        Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket
+            .getSupportedProtocols()));
+        Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket
+            .getSupportedCipherSuites()));
+
         // Intersect with preferred protocols and ciphers
         protocols.retainAll(http.getTlsPreferredProtocols());
         ciphers.retainAll(http.getTlsPreferredCipherSuites());
-        
-        sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols.size()]));
-        sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers.size()]));
-        
+
+        sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols
+            .size()]));
+        sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers
+            .size()]));
+
         sslsocket.startHandshake();
         socket = sslsocket;
       }
-      
+
       conf = http.getConf();
       if (sockAddr != null
           && conf.getBoolean("store.ip.address", false) == true) {
-        String ipString = sockAddr.getAddress().getHostAddress(); //get the ip address
+        String ipString = sockAddr.getAddress().getHostAddress(); // get the ip
+                                                                  // address
         page.getMetadata().put(new Utf8("_ip_"),
-          ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes()));
       }
 
       // make request
@@ -141,9 +146,9 @@
 
       StringBuffer reqStr = new StringBuffer("GET ");
       if (http.useProxy()) {
-      	reqStr.append(url.getProtocol()+"://"+host+portString+path);
+        reqStr.append(url.getProtocol() + "://" + host + portString + path);
       } else {
-      	reqStr.append(path);
+        reqStr.append(path);
       }
 
       reqStr.append(" HTTP/1.0\r\n");
@@ -161,7 +166,9 @@
 
       String userAgent = http.getUserAgent();
       if ((userAgent == null) || (userAgent.length() == 0)) {
-        if (Http.LOG.isErrorEnabled()) { Http.LOG.error("User-agent is not set!"); }
+        if (Http.LOG.isErrorEnabled()) {
+          Http.LOG.error("User-agent is not set!");
+        }
       } else {
         reqStr.append("User-Agent: ");
         reqStr.append(userAgent);
@@ -168,32 +175,31 @@
         reqStr.append("\r\n");
       }
 
-//      if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
-        reqStr.append("If-Modified-Since: " +
-                      HttpDateFormat.toString(page.getModifiedTime()));
-        reqStr.append("\r\n");
-//      }
+      // if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
+      reqStr.append("If-Modified-Since: "
+          + HttpDateFormat.toString(page.getModifiedTime()));
       reqStr.append("\r\n");
+      // }
+      reqStr.append("\r\n");
 
-      byte[] reqBytes= reqStr.toString().getBytes();
+      byte[] reqBytes = reqStr.toString().getBytes();
 
       req.write(reqBytes);
       req.flush();
 
-      PushbackInputStream in =                  // process response
-        new PushbackInputStream(
-          new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE),
-          Http.BUFFER_SIZE) ;
+      PushbackInputStream in = // process response
+      new PushbackInputStream(new BufferedInputStream(socket.getInputStream(),
+          Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
-      boolean haveSeenNonContinueStatus= false;
+      boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
         // parse status code line
         this.code = parseStatusLine(in, line);
         // parse headers
         parseHeaders(in, line);
-        haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
+        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
       }
 
       String transferEncoding = getHeader(Response.TRANSFER_ENCODING);
@@ -228,11 +234,11 @@
 
   }
 
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-
   public URL getUrl() {
     return url;
   }
@@ -253,15 +259,15 @@
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
+  private void readPlainContent(InputStream in) throws HttpException,
+      IOException {
 
-  private void readPlainContent(InputStream in)
-    throws HttpException, IOException {
-
-    int contentLength = Integer.MAX_VALUE;    // get content length
+    int contentLength = Integer.MAX_VALUE; // get content length
     String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
@@ -269,12 +275,13 @@
         if (!contentLengthString.isEmpty())
           contentLength = Integer.parseInt(contentLengthString);
       } catch (NumberFormatException e) {
-        throw new HttpException("bad content length: "+contentLengthString);
+        throw new HttpException("bad content length: " + contentLengthString);
       }
     }
-    if (http.getMaxContent() >= 0
-      && contentLength > http.getMaxContent())   // limit download size
-      contentLength  = http.getMaxContent();
+    if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit
+                                                                           // download
+                                                                           // size
+      contentLength = http.getMaxContent();
 
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
     byte[] bytes = new byte[Http.BUFFER_SIZE];
@@ -384,38 +391,37 @@
     parseHeaders(in, line);
 
   }
-  
+
   private int parseStatusLine(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
-    int codeEnd = line.indexOf(" ", codeStart+1);
+    int codeEnd = line.indexOf(" ", codeStart + 1);
 
     // handle lines with no plaintext result code, ie:
     // "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
     if (codeEnd == -1)
-      codeEnd= line.length();
+      codeEnd = line.length();
 
     int code;
     try {
-      code= Integer.parseInt(line.substring(codeStart+1, codeEnd));
+      code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line
-                              + "': " + e.getMessage(), e);
+      throw new HttpException("bad status line '" + line + "': "
+          + e.getMessage(), e);
     }
 
     return code;
   }
 
+  private void processHeaderLine(StringBuffer line) throws IOException,
+      HttpException {
 
-  private void processHeaderLine(StringBuffer line)
-    throws IOException, HttpException {
-
-    int colonIndex = line.indexOf(":");       // key is up to colon
+    int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
       int i;
-      for (i= 0; i < line.length(); i++)
+      for (i = 0; i < line.length(); i++)
         if (!Character.isWhitespace(line.charAt(i)))
           break;
       if (i == line.length())
@@ -424,7 +430,7 @@
     }
     String key = line.substring(0, colonIndex);
 
-    int valueStart = colonIndex+1;            // skip whitespace
+    int valueStart = colonIndex + 1; // skip whitespace
     while (valueStart < line.length()) {
       int c = line.charAt(valueStart);
       if (c != ' ' && c != '\t')
@@ -435,28 +441,27 @@
     headers.set(key, value);
   }
 
-
   // Adds headers to our headers Metadata
   private void parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if ( ((pos= line.indexOf("<!DOCTYPE")) != -1)
-           || ((pos= line.indexOf("<HTML")) != -1)
-           || ((pos= line.indexOf("<html")) != -1) ) {
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
+          || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-            //TODO: (CM) We don't know the header names here
-            //since we're just handling them generically. It would
-            //be nice to provide some sort of mapping function here
-            //for the returned header names to the standard metadata
-            //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -470,29 +475,29 @@
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,
-                      boolean allowContinuedLine)
-    throws IOException {
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
-        case '\r':
-          if (peek(in) == '\n') {
-            in.read();
-          }
-        case '\n':
-          if (line.length() > 0) {
-            // at EOL -- check for continued line if the current
-            // (possibly continued) line wasn't blank
-            if (allowContinuedLine)
-              switch (peek(in)) {
-                case ' ' : case '\t':                   // line is continued
-                  in.read();
-                  continue;
-              }
-          }
-          return line.length();      // else complete
-        default :
-          line.append((char)c);
+      case '\r':
+        if (peek(in) == '\n') {
+          in.read();
+        }
+      case '\n':
+        if (line.length() > 0) {
+          // at EOL -- check for continued line if the current
+          // (possibly continued) line wasn't blank
+          if (allowContinuedLine)
+            switch (peek(in)) {
+            case ' ':
+            case '\t': // line is continued
+              in.read();
+              continue;
+            }
+        }
+        return line.length(); // else complete
+      default:
+        line.append((char) c);
       }
     }
     throw new EOFException();
Index: src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
===================================================================
--- src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java	(revision 1650444)
+++ src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java	(working copy)
@@ -34,7 +34,7 @@
 import org.mortbay.jetty.servlet.ServletHolder;
 
 /**
- * Test cases for protocol-http 
+ * Test cases for protocol-http
  */
 public class TestProtocolHttp {
   private static final String RES_DIR = System.getProperty("test.data", ".");
@@ -44,7 +44,7 @@
   private Context root;
   private Configuration conf;
   private int port;
-  
+
   public void setUp(boolean redirection) throws Exception {
     this.conf = new Configuration();
     this.conf.addResource("nutch-default.xml");
@@ -52,18 +52,18 @@
 
     this.http = new Http();
     this.http.setConf(conf);
-    
+
     this.server = new Server();
-    
+
     if (redirection) {
       this.root = new Context(server, "/redirection", Context.SESSIONS);
       this.root.setAttribute("newContextURL", "/redirect");
-    } 
-    else {
+    } else {
       this.root = new Context(server, "/", Context.SESSIONS);
     }
 
-    ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
     this.root.addServlet(sh, "*.jsp");
     this.root.setResourceBase(RES_DIR);
   }
@@ -89,12 +89,14 @@
     startServer(47500, true);
     fetchPage("/redirection", 302);
   }
-  
+
   /**
    * Starts the Jetty server at a specified port and redirection parameter.
    * 
-   * @param portno Port number.
-   * @param redirection whether redirection        
+   * @param portno
+   *          Port number.
+   * @param redirection
+   *          whether redirection
    */
   private void startServer(int portno, boolean redirection) throws Exception {
     port = portno;
@@ -123,11 +125,13 @@
     Response response = http.getResponse(url, p, true);
     ProtocolOutput out = http.getProtocolOutput(url.toString(), p);
     Content content = out.getContent();
-    
-    assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode());
-    if (page.compareTo("/nonexists.html") != 0 
-    		 && page.compareTo("/brokenpage.jsp") != 0
-         && page.compareTo("/redirection") != 0)
-      assertEquals("ContentType " + url, "application/xhtml+xml", content.getContentType());
+
+    assertEquals("HTTP Status Code for " + url, expectedCode,
+        response.getCode());
+    if (page.compareTo("/nonexists.html") != 0
+        && page.compareTo("/brokenpage.jsp") != 0
+        && page.compareTo("/redirection") != 0)
+      assertEquals("ContentType " + url, "application/xhtml+xml",
+          content.getContentType());
   }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasySSLProtocolSocketFactory from commons-httpclient:
  * 
@@ -41,10 +41,12 @@
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 
-public class DummySSLProtocolSocketFactory implements SecureProtocolSocketFactory {
+public class DummySSLProtocolSocketFactory implements
+    SecureProtocolSocketFactory {
 
   /** Logger object for this class. */
-  private static final Logger LOG = LoggerFactory.getLogger(DummySSLProtocolSocketFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummySSLProtocolSocketFactory.class);
 
   private SSLContext sslcontext = null;
 
@@ -58,10 +60,13 @@
   private static SSLContext createEasySSLContext() {
     try {
       SSLContext context = SSLContext.getInstance("SSL");
-      context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null);
+      context.init(null,
+          new TrustManager[] { new DummyX509TrustManager(null) }, null);
       return context;
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage(), e);
+      }
       throw new HttpClientError(e.toString());
     }
   }
@@ -76,10 +81,11 @@
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
    */
-  public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException,
-          UnknownHostException {
+  public Socket createSocket(String host, int port, InetAddress clientHost,
+      int clientPort) throws IOException, UnknownHostException {
 
-    return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort);
+    return getSSLContext().getSocketFactory().createSocket(host, port,
+        clientHost, clientPort);
   }
 
   /**
@@ -93,20 +99,28 @@
    * throws an {@link ConnectTimeoutException}
    * </p>
    * 
-   * @param host the host name/IP
-   * @param port the port on the host
-   * @param localAddress the local host name/IP to bind the socket to
-   * @param localPort the port on the local machine
-   * @param params {@link HttpConnectionParams Http connection parameters}
+   * @param host
+   *          the host name/IP
+   * @param port
+   *          the port on the host
+   * @param localAddress
+   *          the local host name/IP to bind the socket to
+   * @param localPort
+   *          the port on the local machine
+   * @param params
+   *          {@link HttpConnectionParams Http connection parameters}
    * 
    * @return Socket a new socket
    * 
-   * @throws IOException if an I/O error occurs while creating the socket
-   * @throws UnknownHostException if the IP address of the host cannot be
-   *         determined
+   * @throws IOException
+   *           if an I/O error occurs while creating the socket
+   * @throws UnknownHostException
+   *           if the IP address of the host cannot be determined
    */
-  public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort,
-          final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException {
+  public Socket createSocket(final String host, final int port,
+      final InetAddress localAddress, final int localPort,
+      final HttpConnectionParams params) throws IOException,
+      UnknownHostException, ConnectTimeoutException {
     if (params == null) {
       throw new IllegalArgumentException("Parameters may not be null");
     }
@@ -115,7 +129,8 @@
       return createSocket(host, port, localAddress, localPort);
     } else {
       // To be eventually deprecated when migrated to Java 1.4 or above
-      return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout);
+      return ControllerThreadSocketFactory.createSocket(this, host, port,
+          localAddress, localPort, timeout);
     }
   }
 
@@ -122,7 +137,8 @@
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
    */
-  public Socket createSocket(String host, int port) throws IOException, UnknownHostException {
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
     return getSSLContext().getSocketFactory().createSocket(host, port);
   }
 
@@ -129,13 +145,15 @@
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
    */
-  public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException,
-          UnknownHostException {
-    return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose);
+  public Socket createSocket(Socket socket, String host, int port,
+      boolean autoClose) throws IOException, UnknownHostException {
+    return getSSLContext().getSocketFactory().createSocket(socket, host, port,
+        autoClose);
   }
 
   public boolean equals(Object obj) {
-    return ((obj != null) && obj.getClass().equals(DummySSLProtocolSocketFactory.class));
+    return ((obj != null) && obj.getClass().equals(
+        DummySSLProtocolSocketFactory.class));
   }
 
   public int hashCode() {
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(working copy)
@@ -1,19 +1,19 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasyX509TrustManager from commons-httpclient.
  */
@@ -30,53 +30,57 @@
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
 
-public class DummyX509TrustManager implements X509TrustManager
-{
-    private X509TrustManager standardTrustManager = null;
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
 
-    /**
-     * Constructor for DummyX509TrustManager.
-     */
-    public DummyX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException {
-        super();
-        String algo = TrustManagerFactory.getDefaultAlgorithm();
-        TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
-        factory.init(keystore);
-        TrustManager[] trustmanagers = factory.getTrustManagers();
-        if (trustmanagers.length == 0) {
-            throw new NoSuchAlgorithmException(algo + " trust manager not supported");
-        }
-        this.standardTrustManager = (X509TrustManager)trustmanagers[0];
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
     }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], String)
-     */
-    public boolean isClientTrusted(X509Certificate[] certificates) {
-        return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], String)
-     */
-    public boolean isServerTrusted(X509Certificate[] certificates) {
-      return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
-     */
-    public X509Certificate[] getAcceptedIssuers() {
-        return this.standardTrustManager.getAcceptedIssuers();
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
 
-    public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
 
-    public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(working copy)
@@ -67,395 +67,383 @@
  */
 public class Http extends HttpBase {
 
-	public static final Logger LOG = LoggerFactory.getLogger(Http.class);
+  public static final Logger LOG = LoggerFactory.getLogger(Http.class);
 
-	private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
+  private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
 
-	// Since the Configuration has not yet been set,
-	// then an unconfigured client is returned.
-	private static HttpClient client = new HttpClient(connectionManager);
-	private static String defaultUsername;
-	private static String defaultPassword;
-	private static String defaultRealm;
-	private static String defaultScheme;
-	private static String authFile;
-	private static String agentHost;
-	private static boolean authRulesRead = false;
-	private static Configuration conf;
+  // Since the Configuration has not yet been set,
+  // then an unconfigured client is returned.
+  private static HttpClient client = new HttpClient(connectionManager);
+  private static String defaultUsername;
+  private static String defaultPassword;
+  private static String defaultRealm;
+  private static String defaultScheme;
+  private static String authFile;
+  private static String agentHost;
+  private static boolean authRulesRead = false;
+  private static Configuration conf;
 
-	int maxThreadsTotal = 10;
+  int maxThreadsTotal = 10;
 
-	private String proxyUsername;
-	private String proxyPassword;
-	private String proxyRealm;
+  private String proxyUsername;
+  private String proxyPassword;
+  private String proxyRealm;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.MODIFIED_TIME);
-		FIELDS.add(WebPage.Field.HEADERS);
-	}
+  static {
+    FIELDS.add(WebPage.Field.MODIFIED_TIME);
+    FIELDS.add(WebPage.Field.HEADERS);
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
-	
-	/**
-	 * Returns the configured HTTP client.
-	 * 
-	 * @return HTTP client
-	 */
-	static synchronized HttpClient getClient() {
-		return client;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	/**
-	 * Constructs this plugin.
-	 */
-	public Http() {
-		super(LOG);
-	}
+  /**
+   * Returns the configured HTTP client.
+   * 
+   * @return HTTP client
+   */
+  static synchronized HttpClient getClient() {
+    return client;
+  }
 
-	/**
-	 * Reads the configuration from the Nutch configuration files and sets the
-	 * configuration.
-	 * 
-	 * @param conf
-	 *            Configuration
-	 */
-	public void setConf(Configuration conf) {
-		super.setConf(conf);
-		Http.conf = conf;
-		this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
-		this.proxyUsername = conf.get("http.proxy.username", "");
-		this.proxyPassword = conf.get("http.proxy.password", "");
-		this.proxyRealm = conf.get("http.proxy.realm", "");
-		agentHost = conf.get("http.agent.host", "");
-		authFile = conf.get("http.auth.file", "");
-		configureClient();
-		try {
-			setCredentials();
-		} catch (Exception ex) {
-			if (LOG.isErrorEnabled()) {
-				LOG.error("Could not read " + authFile + " : "
-						+ ex.getMessage());
-			}
-		}
-	}
+  /**
+   * Constructs this plugin.
+   */
+  public Http() {
+    super(LOG);
+  }
 
-	/**
-	 * Main method.
-	 * 
-	 * @param args
-	 *            Command line arguments
-	 */
-	public static void main(String[] args) throws Exception {
-		Http http = new Http();
-		http.setConf(NutchConfiguration.create());
-		main(http, args);
-	}
+  /**
+   * Reads the configuration from the Nutch configuration files and sets the
+   * configuration.
+   * 
+   * @param conf
+   *          Configuration
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    Http.conf = conf;
+    this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
+    this.proxyUsername = conf.get("http.proxy.username", "");
+    this.proxyPassword = conf.get("http.proxy.password", "");
+    this.proxyRealm = conf.get("http.proxy.realm", "");
+    agentHost = conf.get("http.agent.host", "");
+    authFile = conf.get("http.auth.file", "");
+    configureClient();
+    try {
+      setCredentials();
+    } catch (Exception ex) {
+      if (LOG.isErrorEnabled()) {
+        LOG.error("Could not read " + authFile + " : " + ex.getMessage());
+      }
+    }
+  }
 
-	/**
-	 * Fetches the <code>url</code> with a configured HTTP client and gets the
-	 * response.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 * @param datum
-	 *            Crawl data
-	 * @param redirect
-	 *            Follow redirects if and only if true
-	 * @return HTTP response
-	 */
-	protected Response getResponse(URL url, WebPage page, boolean redirect)
-			throws ProtocolException, IOException {
-		resolveCredentials(url);
-		return new HttpResponse(this, url, page, redirect);
-	}
+  /**
+   * Main method.
+   * 
+   * @param args
+   *          Command line arguments
+   */
+  public static void main(String[] args) throws Exception {
+    Http http = new Http();
+    http.setConf(NutchConfiguration.create());
+    main(http, args);
+  }
 
-	/**
-	 * Configures the HTTP client
-	 */
-	private void configureClient() {
+  /**
+   * Fetches the <code>url</code> with a configured HTTP client and gets the
+   * response.
+   * 
+   * @param url
+   *          URL to be fetched
+   * @param datum
+   *          Crawl data
+   * @param redirect
+   *          Follow redirects if and only if true
+   * @return HTTP response
+   */
+  protected Response getResponse(URL url, WebPage page, boolean redirect)
+      throws ProtocolException, IOException {
+    resolveCredentials(url);
+    return new HttpResponse(this, url, page, redirect);
+  }
 
-		// Set up an HTTPS socket factory that accepts self-signed certs.
-	  ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
-		Protocol https = new Protocol("https", factory, 443);
-		Protocol.registerProtocol("https", https);
+  /**
+   * Configures the HTTP client
+   */
+  private void configureClient() {
 
-		HttpConnectionManagerParams params = connectionManager.getParams();
-		params.setConnectionTimeout(timeout);
-		params.setSoTimeout(timeout);
-		params.setSendBufferSize(BUFFER_SIZE);
-		params.setReceiveBufferSize(BUFFER_SIZE);
-		params.setMaxTotalConnections(maxThreadsTotal);
-		
-		//Also set max connections per host to maxThreadsTotal since all threads
-		//might be used to fetch from the same host - otherwise timeout errors can occur
-		params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
+    // Set up an HTTPS socket factory that accepts self-signed certs.
+    ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
+    Protocol https = new Protocol("https", factory, 443);
+    Protocol.registerProtocol("https", https);
 
-		// executeMethod(HttpMethod) seems to ignore the connection timeout on
-		// the connection manager.
-		// set it explicitly on the HttpClient.
-		client.getParams().setConnectionManagerTimeout(timeout);
+    HttpConnectionManagerParams params = connectionManager.getParams();
+    params.setConnectionTimeout(timeout);
+    params.setSoTimeout(timeout);
+    params.setSendBufferSize(BUFFER_SIZE);
+    params.setReceiveBufferSize(BUFFER_SIZE);
+    params.setMaxTotalConnections(maxThreadsTotal);
 
-		HostConfiguration hostConf = client.getHostConfiguration();
-		ArrayList<Header> headers = new ArrayList<Header>();
-		// Set the User Agent in the header
-		headers.add(new Header("User-Agent", userAgent));
-		// prefer English
-		headers.add(new Header("Accept-Language",
-				"en-us,en-gb,en;q=0.7,*;q=0.3"));
-		// prefer UTF-8
-		headers.add(new Header("Accept-Charset",
-				"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
-		// prefer understandable formats
-		headers.add(new Header(
-				"Accept",
-				"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
-		// accept gzipped content
-		headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
-		hostConf.getParams().setParameter("http.default-headers", headers);
+    // Also set max connections per host to maxThreadsTotal since all threads
+    // might be used to fetch from the same host - otherwise timeout errors can
+    // occur
+    params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
 
-		// HTTP proxy server details
-		if (useProxy) {
-			hostConf.setProxy(proxyHost, proxyPort);
+    // executeMethod(HttpMethod) seems to ignore the connection timeout on
+    // the connection manager.
+    // set it explicitly on the HttpClient.
+    client.getParams().setConnectionManagerTimeout(timeout);
 
-			if (proxyUsername.length() > 0) {
+    HostConfiguration hostConf = client.getHostConfiguration();
+    ArrayList<Header> headers = new ArrayList<Header>();
+    // Set the User Agent in the header
+    headers.add(new Header("User-Agent", userAgent));
+    // prefer English
+    headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3"));
+    // prefer UTF-8
+    headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
+    // prefer understandable formats
+    headers
+        .add(new Header(
+            "Accept",
+            "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
+    // accept gzipped content
+    headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
+    hostConf.getParams().setParameter("http.default-headers", headers);
 
-				AuthScope proxyAuthScope = getAuthScope(this.proxyHost,
-						this.proxyPort, this.proxyRealm);
+    // HTTP proxy server details
+    if (useProxy) {
+      hostConf.setProxy(proxyHost, proxyPort);
 
-				NTCredentials proxyCredentials = new NTCredentials(
-						this.proxyUsername, this.proxyPassword, Http.agentHost,
-						this.proxyRealm);
+      if (proxyUsername.length() > 0) {
 
-				client.getState().setProxyCredentials(proxyAuthScope,
-						proxyCredentials);
-			}
-		}
+        AuthScope proxyAuthScope = getAuthScope(this.proxyHost, this.proxyPort,
+            this.proxyRealm);
 
-	}
+        NTCredentials proxyCredentials = new NTCredentials(this.proxyUsername,
+            this.proxyPassword, Http.agentHost, this.proxyRealm);
 
-	/**
-	 * Reads authentication configuration file (defined as 'http.auth.file' in
-	 * Nutch configuration file) and sets the credentials for the configured
-	 * authentication scopes in the HTTP client object.
-	 * 
-	 * @throws ParserConfigurationException
-	 *             If a document builder can not be created.
-	 * @throws SAXException
-	 *             If any parsing error occurs.
-	 * @throws IOException
-	 *             If any I/O error occurs.
-	 */
-	private static synchronized void setCredentials()
-			throws ParserConfigurationException, SAXException, IOException {
+        client.getState().setProxyCredentials(proxyAuthScope, proxyCredentials);
+      }
+    }
 
-		if (authRulesRead)
-			return;
+  }
 
-		authRulesRead = true; // Avoid re-attempting to read
+  /**
+   * Reads authentication configuration file (defined as 'http.auth.file' in
+   * Nutch configuration file) and sets the credentials for the configured
+   * authentication scopes in the HTTP client object.
+   * 
+   * @throws ParserConfigurationException
+   *           If a document builder can not be created.
+   * @throws SAXException
+   *           If any parsing error occurs.
+   * @throws IOException
+   *           If any I/O error occurs.
+   */
+  private static synchronized void setCredentials()
+      throws ParserConfigurationException, SAXException, IOException {
 
-		InputStream is = conf.getConfResourceAsInputStream(authFile);
-		if (is != null) {
-			Document doc = DocumentBuilderFactory.newInstance()
-					.newDocumentBuilder().parse(is);
+    if (authRulesRead)
+      return;
 
-			Element rootElement = doc.getDocumentElement();
-			if (!"auth-configuration".equals(rootElement.getTagName())) {
-				if (LOG.isWarnEnabled())
-					LOG.warn("Bad auth conf file: root element <"
-							+ rootElement.getTagName() + "> found in "
-							+ authFile + " - must be <auth-configuration>");
-			}
+    authRulesRead = true; // Avoid re-attempting to read
 
-			// For each set of credentials
-			NodeList credList = rootElement.getChildNodes();
-			for (int i = 0; i < credList.getLength(); i++) {
-				Node credNode = credList.item(i);
-				if (!(credNode instanceof Element))
-					continue;
+    InputStream is = conf.getConfResourceAsInputStream(authFile);
+    if (is != null) {
+      Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+          .parse(is);
 
-				Element credElement = (Element) credNode;
-				if (!"credentials".equals(credElement.getTagName())) {
-					if (LOG.isWarnEnabled())
-						LOG.warn("Bad auth conf file: Element <"
-								+ credElement.getTagName()
-								+ "> not recognized in " + authFile
-								+ " - expected <credentials>");
-					continue;
-				}
+      Element rootElement = doc.getDocumentElement();
+      if (!"auth-configuration".equals(rootElement.getTagName())) {
+        if (LOG.isWarnEnabled())
+          LOG.warn("Bad auth conf file: root element <"
+              + rootElement.getTagName() + "> found in " + authFile
+              + " - must be <auth-configuration>");
+      }
 
-				String username = credElement.getAttribute("username");
-				String password = credElement.getAttribute("password");
+      // For each set of credentials
+      NodeList credList = rootElement.getChildNodes();
+      for (int i = 0; i < credList.getLength(); i++) {
+        Node credNode = credList.item(i);
+        if (!(credNode instanceof Element))
+          continue;
 
-				// For each authentication scope
-				NodeList scopeList = credElement.getChildNodes();
-				for (int j = 0; j < scopeList.getLength(); j++) {
-					Node scopeNode = scopeList.item(j);
-					if (!(scopeNode instanceof Element))
-						continue;
+        Element credElement = (Element) credNode;
+        if (!"credentials".equals(credElement.getTagName())) {
+          if (LOG.isWarnEnabled())
+            LOG.warn("Bad auth conf file: Element <" + credElement.getTagName()
+                + "> not recognized in " + authFile
+                + " - expected <credentials>");
+          continue;
+        }
 
-					Element scopeElement = (Element) scopeNode;
+        String username = credElement.getAttribute("username");
+        String password = credElement.getAttribute("password");
 
-					if ("default".equals(scopeElement.getTagName())) {
+        // For each authentication scope
+        NodeList scopeList = credElement.getChildNodes();
+        for (int j = 0; j < scopeList.getLength(); j++) {
+          Node scopeNode = scopeList.item(j);
+          if (!(scopeNode instanceof Element))
+            continue;
 
-						// Determine realm and scheme, if any
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
+          Element scopeElement = (Element) scopeNode;
 
-						// Set default credentials
-						defaultUsername = username;
-						defaultPassword = password;
-						defaultRealm = realm;
-						defaultScheme = scheme;
+          if ("default".equals(scopeElement.getTagName())) {
 
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set as default" + " for realm: "
-									+ realm + "; scheme: " + scheme);
-						}
+            // Determine realm and scheme, if any
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
 
-					} else if ("authscope".equals(scopeElement.getTagName())) {
+            // Set default credentials
+            defaultUsername = username;
+            defaultPassword = password;
+            defaultRealm = realm;
+            defaultScheme = scheme;
 
-						// Determine authentication scope details
-						String host = scopeElement.getAttribute("host");
-						int port = -1; // For setting port to AuthScope.ANY_PORT
-						try {
-							port = Integer.parseInt(scopeElement
-									.getAttribute("port"));
-						} catch (Exception ex) {
-							// do nothing, port is already set to any port
-						}
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set as default" + " for realm: " + realm + "; scheme: "
+                  + scheme);
+            }
 
-						// Set credentials for the determined scope
-						AuthScope authScope = getAuthScope(host, port, realm,
-								scheme);
-						NTCredentials credentials = new NTCredentials(username,
-								password, agentHost, realm);
+          } else if ("authscope".equals(scopeElement.getTagName())) {
 
-						client.getState()
-								.setCredentials(authScope, credentials);
+            // Determine authentication scope details
+            String host = scopeElement.getAttribute("host");
+            int port = -1; // For setting port to AuthScope.ANY_PORT
+            try {
+              port = Integer.parseInt(scopeElement.getAttribute("port"));
+            } catch (Exception ex) {
+              // do nothing, port is already set to any port
+            }
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
 
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set for AuthScope - " + "host: "
-									+ host + "; port: " + port + "; realm: "
-									+ realm + "; scheme: " + scheme);
-						}
+            // Set credentials for the determined scope
+            AuthScope authScope = getAuthScope(host, port, realm, scheme);
+            NTCredentials credentials = new NTCredentials(username, password,
+                agentHost, realm);
 
-					} else {
-						if (LOG.isWarnEnabled())
-							LOG.warn("Bad auth conf file: Element <"
-									+ scopeElement.getTagName()
-									+ "> not recognized in " + authFile
-									+ " - expected <authscope>");
-					}
-				}
-				is.close();
-			}
-		}
-	}
+            client.getState().setCredentials(authScope, credentials);
 
-	/**
-	 * If credentials for the authentication scope determined from the specified
-	 * <code>url</code> is not already set in the HTTP client, then this method
-	 * sets the default credentials to fetch the specified <code>url</code>. If
-	 * credentials are found for the authentication scope, the method returns
-	 * without altering the client.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 */
-	private void resolveCredentials(URL url) {
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set for AuthScope - " + "host: " + host + "; port: "
+                  + port + "; realm: " + realm + "; scheme: " + scheme);
+            }
 
-		if (defaultUsername != null && defaultUsername.length() > 0) {
+          } else {
+            if (LOG.isWarnEnabled())
+              LOG.warn("Bad auth conf file: Element <"
+                  + scopeElement.getTagName() + "> not recognized in "
+                  + authFile + " - expected <authscope>");
+          }
+        }
+        is.close();
+      }
+    }
+  }
 
-			int port = url.getPort();
-			if (port == -1) {
-				if ("https".equals(url.getProtocol()))
-					port = 443;
-				else
-					port = 80;
-			}
+  /**
+   * If credentials for the authentication scope determined from the specified
+   * <code>url</code> is not already set in the HTTP client, then this method
+   * sets the default credentials to fetch the specified <code>url</code>. If
+   * credentials are found for the authentication scope, the method returns
+   * without altering the client.
+   * 
+   * @param url
+   *          URL to be fetched
+   */
+  private void resolveCredentials(URL url) {
 
-			AuthScope scope = new AuthScope(url.getHost(), port);
+    if (defaultUsername != null && defaultUsername.length() > 0) {
 
-			if (client.getState().getCredentials(scope) != null) {
-				if (LOG.isTraceEnabled())
-					LOG.trace("Pre-configured credentials with scope - host: "
-							+ url.getHost() + "; port: " + port
-							+ "; found for url: " + url);
+      int port = url.getPort();
+      if (port == -1) {
+        if ("https".equals(url.getProtocol()))
+          port = 443;
+        else
+          port = 80;
+      }
 
-				// Credentials are already configured, so do nothing and return
-				return;
-			}
+      AuthScope scope = new AuthScope(url.getHost(), port);
 
-			if (LOG.isTraceEnabled())
-				LOG.trace("Pre-configured credentials with scope -  host: "
-						+ url.getHost() + "; port: " + port
-						+ "; not found for url: " + url);
+      if (client.getState().getCredentials(scope) != null) {
+        if (LOG.isTraceEnabled())
+          LOG.trace("Pre-configured credentials with scope - host: "
+              + url.getHost() + "; port: " + port + "; found for url: " + url);
 
-			AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
-					defaultRealm, defaultScheme);
+        // Credentials are already configured, so do nothing and return
+        return;
+      }
 
-			NTCredentials serverCredentials = new NTCredentials(
-					defaultUsername, defaultPassword, agentHost, defaultRealm);
+      if (LOG.isTraceEnabled())
+        LOG.trace("Pre-configured credentials with scope -  host: "
+            + url.getHost() + "; port: " + port + "; not found for url: " + url);
 
-			client.getState()
-					.setCredentials(serverAuthScope, serverCredentials);
-		}
-	}
+      AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
+          defaultRealm, defaultScheme);
 
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code>, <code>realm</code> and <code>scheme</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 * @param scheme
-	 *            Authentication scheme.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm,
-			String scheme) {
+      NTCredentials serverCredentials = new NTCredentials(defaultUsername,
+          defaultPassword, agentHost, defaultRealm);
 
-		if (host.length() == 0)
-			host = null;
+      client.getState().setCredentials(serverAuthScope, serverCredentials);
+    }
+  }
 
-		if (port < 0)
-			port = -1;
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code>, <code>realm</code> and <code>scheme</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   * @param scheme
+   *          Authentication scheme.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm,
+      String scheme) {
 
-		if (realm.length() == 0)
-			realm = null;
+    if (host.length() == 0)
+      host = null;
 
-		if (scheme.length() == 0)
-			scheme = null;
+    if (port < 0)
+      port = -1;
 
-		return new AuthScope(host, port, realm, scheme);
-	}
+    if (realm.length() == 0)
+      realm = null;
 
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code> and <code>realm</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm) {
+    if (scheme.length() == 0)
+      scheme = null;
 
-		return getAuthScope(host, port, realm, "");
-	}
+    return new AuthScope(host, port, realm, scheme);
+  }
 
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code> and <code>realm</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm) {
+
+    return getAuthScope(host, port, realm, "");
+  }
+
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(working copy)
@@ -15,32 +15,31 @@
  * limitations under the License.
  */
 package org.apache.nutch.protocol.httpclient;
- 
+
 import java.util.List;
 
 /**
- *  The base level of services required for Http Authentication
- *
+ * The base level of services required for Http Authentication
+ * 
  * @see HttpAuthenticationFactory
  * 
- * @author    Matt Tencati
+ * @author Matt Tencati
  */
 public interface HttpAuthentication {
 
-    /**
-     *  Gets the credentials generated by the HttpAuthentication
-     *  object.  May return null.
-     *
-     * @return    The credentials value
-     */
-    public List getCredentials();
+  /**
+   * Gets the credentials generated by the HttpAuthentication object. May return
+   * null.
+   * 
+   * @return The credentials value
+   */
+  public List getCredentials();
 
-    /**
-     *  Gets the realm used by the HttpAuthentication object during creation.
-     *
-     *  @return    The realm value
-     */
-    public String getRealm();
+  /**
+   * Gets the realm used by the HttpAuthentication object during creation.
+   * 
+   * @return The realm value
+   */
+  public String getRealm();
 
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(working copy)
@@ -26,40 +26,46 @@
 
   private static final long serialVersionUID = 1L;
 
-    /**
-     *  Constructs a new exception with null as its detail message.
-     */
-    public HttpAuthenticationException() {
-        super();
-    }
+  /**
+   * Constructs a new exception with null as its detail message.
+   */
+  public HttpAuthenticationException() {
+    super();
+  }
 
-    /**
-     * Constructs a new exception with the specified detail message.
-     * 
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     */
-    public HttpAuthenticationException(String message) {
-        super(message);
-    }
+  /**
+   * Constructs a new exception with the specified detail message.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   */
+  public HttpAuthenticationException(String message) {
+    super(message);
+  }
 
-    /**
-     * Constructs a new exception with the specified message and cause.
-     *
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(String message, Throwable cause) {
-        super(message, cause);
-    }
+  /**
+   * Constructs a new exception with the specified message and cause.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(String message, Throwable cause) {
+    super(message, cause);
+  }
 
-    /**
-     * Constructs a new exception with the specified cause and detail message from
-     * given clause if it is not null.
-     * 
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(Throwable cause) {
-        super(cause);
-    }
+  /**
+   * Constructs a new exception with the specified cause and detail message from
+   * given clause if it is not null.
+   * 
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(Throwable cause) {
+    super(cause);
+  }
 
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(working copy)
@@ -34,12 +34,10 @@
 // Nutch imports
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * Provides the Http protocol implementation
- * with the ability to authenticate when prompted.  The goal is to provide 
- * multiple authentication types but for now just the {@link HttpBasicAuthentication} authentication 
- * type is provided.
+ * Provides the Http protocol implementation with the ability to authenticate
+ * when prompted. The goal is to provide multiple authentication types but for
+ * now just the {@link HttpBasicAuthentication} authentication type is provided.
  * 
  * @see HttpBasicAuthentication
  * @see Http
@@ -49,94 +47,96 @@
  */
 public class HttpAuthenticationFactory implements Configurable {
 
-    /** 
-     * The HTTP Authentication (WWW-Authenticate) header which is returned 
-     * by a webserver requiring authentication.
-     */
-    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
-	
-    public static final Logger LOG = LoggerFactory.getLogger(HttpAuthenticationFactory.class);
+  /**
+   * The HTTP Authentication (WWW-Authenticate) header which is returned by a
+   * webserver requiring authentication.
+   */
+  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
 
-    private static Map<?, ?> auths = new TreeMap<Object, Object>(); 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpAuthenticationFactory.class);
 
-    private Configuration conf = null;
-    
-    
-    public HttpAuthenticationFactory(Configuration conf) {
-      setConf(conf);
-    }
+  private static Map<?, ?> auths = new TreeMap<Object, Object>();
 
-   
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  private Configuration conf = null;
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
-    }
+  public HttpAuthenticationFactory(Configuration conf) {
+    setConf(conf);
+  }
 
-    public Configuration getConf() {
-      return conf;
-    }
- 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    @SuppressWarnings("unchecked")
-    public HttpAuthentication findAuthentication(Metadata header) {
+  public Configuration getConf() {
+    return conf;
+  }
 
-        if (header == null) return null;
-        
-    	try {
-			Collection challenge = null;
-			if (header instanceof Metadata) {
-				Object o = header.get(WWW_AUTHENTICATE);
-				if (o instanceof Collection) {
-					challenge = (Collection<?>) o;
-				} else {
-					challenge = new ArrayList<String>();
-					challenge.add(o.toString());
-				}
-			} else {
-				String challengeString = header.get(WWW_AUTHENTICATE); 
-				if (challengeString != null) {
-					challenge = new ArrayList<Object>();
-					challenge.add(challengeString);
-				}
-			}
-			if (challenge == null) {
-                                if (LOG.isTraceEnabled()) {
-				  LOG.trace("Authentication challenge is null");
-                                }
-				return null;
-			}
-			
-			Iterator<?> i = challenge.iterator();
-			HttpAuthentication auth = null;
-			while (i.hasNext() && auth == null) {
-				String challengeString = (String)i.next();
-				if (challengeString.equals("NTLM")) {
-				   challengeString="Basic realm=techweb";
-		                  }
-		               
-                                if (LOG.isTraceEnabled()) {  
-		                  LOG.trace("Checking challengeString=" + challengeString);
-                                }
-				auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
-				if (auth != null) return auth;
-				
-				//TODO Add additional Authentication lookups here
-			}
-		} catch (Exception e) {
-			LOG.error("Failed with following exception: ", e);
-		}
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  @SuppressWarnings("unchecked")
+  public HttpAuthentication findAuthentication(Metadata header) {
+
+    if (header == null)
+      return null;
+
+    try {
+      Collection challenge = null;
+      if (header instanceof Metadata) {
+        Object o = header.get(WWW_AUTHENTICATE);
+        if (o instanceof Collection) {
+          challenge = (Collection<?>) o;
+        } else {
+          challenge = new ArrayList<String>();
+          challenge.add(o.toString());
+        }
+      } else {
+        String challengeString = header.get(WWW_AUTHENTICATE);
+        if (challengeString != null) {
+          challenge = new ArrayList<Object>();
+          challenge.add(challengeString);
+        }
+      }
+      if (challenge == null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Authentication challenge is null");
+        }
         return null;
+      }
+
+      Iterator<?> i = challenge.iterator();
+      HttpAuthentication auth = null;
+      while (i.hasNext() && auth == null) {
+        String challengeString = (String) i.next();
+        if (challengeString.equals("NTLM")) {
+          challengeString = "Basic realm=techweb";
+        }
+
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Checking challengeString=" + challengeString);
+        }
+        auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
+        if (auth != null)
+          return auth;
+
+        // TODO Add additional Authentication lookups here
+      }
+    } catch (Exception e) {
+      LOG.error("Failed with following exception: ", e);
     }
+    return null;
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(working copy)
@@ -35,154 +35,163 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 
-
 /**
- * Implementation of RFC 2617 Basic Authentication.  Usernames and passwords are stored 
- * in standard Nutch configuration files using the following properties:
- *      http.auth.basic.<realm>.user
- *      http.auth.basic.<realm>.pass
+ * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are
+ * stored in standard Nutch configuration files using the following properties:
+ * http.auth.basic.<realm>.user http.auth.basic.<realm>.pass
  */
-public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
+public class HttpBasicAuthentication implements HttpAuthentication,
+    Configurable {
 
-    public static final Logger LOG = LoggerFactory.getLogger(HttpBasicAuthentication.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpBasicAuthentication.class);
 
-    private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
-	
-    private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>();
-   
-    private Configuration conf = null; 
-    private String challenge = null;
-    private ArrayList<String> credentials = null;
-    private String realm = null;
+  private static Pattern basic = Pattern
+      .compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
 
+  private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>();
 
-    /**
-     *  Construct an HttpBasicAuthentication for the given challenge
-     *  parameters. The challenge parameters are returned by the web
-     *  server using a WWW-Authenticate header. This will typically be
-     *  represented by single line of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
-     *
-     * @param  challenge  WWW-Authenticate header from web server
-     */
-    protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException {
-        
-        setConf(conf);
-        this.challenge = challenge;
-        credentials = new ArrayList<String>();
-        
-        String username = this.conf.get("http.auth.basic." + challenge + ".user");
-        String password = this.conf.get("http.auth.basic." + challenge + ".password");
-        
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("BasicAuthentication challenge is " + challenge);
-          LOG.trace("BasicAuthentication username=" + username);
-          LOG.trace("BasicAuthentication password=" + password);
-        }
- 
-        if (username == null) {
-        	throw new HttpAuthenticationException("Username for " + challenge + " is null");
-        }
+  private Configuration conf = null;
+  private String challenge = null;
+  private ArrayList<String> credentials = null;
+  private String realm = null;
 
-        if (password == null) {
-        	throw new HttpAuthenticationException("Password for " + challenge + " is null");
-        }
-        
-        byte[] credBytes = (username + ":" + password).getBytes();
-        credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Basic credentials: " + credentials);
-        }
-    }
+  /**
+   * Construct an HttpBasicAuthentication for the given challenge parameters.
+   * The challenge parameters are returned by the web server using a
+   * WWW-Authenticate header. This will typically be represented by single line
+   * of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
+   * 
+   * @param challenge
+   *          WWW-Authenticate header from web server
+   */
+  protected HttpBasicAuthentication(String challenge, Configuration conf)
+      throws HttpAuthenticationException {
 
+    setConf(conf);
+    this.challenge = challenge;
+    credentials = new ArrayList<String>();
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    String username = this.conf.get("http.auth.basic." + challenge + ".user");
+    String password = this.conf.get("http.auth.basic." + challenge
+        + ".password");
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("BasicAuthentication challenge is " + challenge);
+      LOG.trace("BasicAuthentication username=" + username);
+      LOG.trace("BasicAuthentication password=" + password);
     }
 
-    public Configuration getConf() {
-      return this.conf;
+    if (username == null) {
+      throw new HttpAuthenticationException("Username for " + challenge
+          + " is null");
     }
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    if (password == null) {
+      throw new HttpAuthenticationException("Password for " + challenge
+          + " is null");
+    }
 
-
-    /**
-     *  Gets the Basic credentials generated by this
-     *  HttpBasicAuthentication object
-     *
-     * @return    Credentials in the form of <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
-     *
-     */
-    public List<String> getCredentials() {
-        return credentials;
+    byte[] credBytes = (username + ":" + password).getBytes();
+    credentials.add("Authorization: Basic "
+        + new String(Base64.encodeBase64(credBytes)));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Basic credentials: " + credentials);
     }
+  }
 
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
-   /**
-    * Gets the realm attribute of the HttpBasicAuthentication object.
-    * This should have been supplied to the {@link #getAuthentication(String, Configuration)}
-    * static method
-    *
-    * @return    The realm
-    */
-    public String getRealm() {
-        return realm;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    /**
-     * This method is responsible for providing Basic authentication information.  The
-     * method caches authentication information for each realm so that the required
-     * authentication information does not need to be regenerated for every request.
-     *  
-     * @param challenge The challenge string provided by the webserver.  This is the
-     * text which follows the WWW-Authenticate header, including the Basic tag.
-     * @return An HttpBasicAuthentication object or null 
-     * if unable to generate appropriate credentials.
-     */
-    public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) {
-        if (challenge == null) return null;
-        Matcher basicMatcher = basic.matcher(challenge);
-        if (basicMatcher.matches()) {
-        	String realm = basicMatcher.group(1);
-	        Object auth = authMap.get(realm);
-	        if (auth == null) {
-	            HttpBasicAuthentication newAuth = null;
-	            try {
-	            	newAuth = new HttpBasicAuthentication(realm, conf);
-	            } catch (HttpAuthenticationException hae) { 
-                        if (LOG.isTraceEnabled()) {
-	            	  LOG.trace("HttpBasicAuthentication failed for " + challenge);
-                        }
-	            }
-	            authMap.put(realm, newAuth);
-	            return newAuth;
-	        } else {
-	            return (HttpBasicAuthentication) auth;
-	        }
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  /**
+   * Gets the Basic credentials generated by this HttpBasicAuthentication object
+   * 
+   * @return Credentials in the form of
+   *         <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
+   * 
+   */
+  public List<String> getCredentials() {
+    return credentials;
+  }
+
+  /**
+   * Gets the realm attribute of the HttpBasicAuthentication object. This should
+   * have been supplied to the {@link #getAuthentication(String, Configuration)}
+   * static method
+   * 
+   * @return The realm
+   */
+  public String getRealm() {
+    return realm;
+  }
+
+  /**
+   * This method is responsible for providing Basic authentication information.
+   * The method caches authentication information for each realm so that the
+   * required authentication information does not need to be regenerated for
+   * every request.
+   * 
+   * @param challenge
+   *          The challenge string provided by the webserver. This is the text
+   *          which follows the WWW-Authenticate header, including the Basic
+   *          tag.
+   * @return An HttpBasicAuthentication object or null if unable to generate
+   *         appropriate credentials.
+   */
+  public static HttpBasicAuthentication getAuthentication(String challenge,
+      Configuration conf) {
+    if (challenge == null)
+      return null;
+    Matcher basicMatcher = basic.matcher(challenge);
+    if (basicMatcher.matches()) {
+      String realm = basicMatcher.group(1);
+      Object auth = authMap.get(realm);
+      if (auth == null) {
+        HttpBasicAuthentication newAuth = null;
+        try {
+          newAuth = new HttpBasicAuthentication(realm, conf);
+        } catch (HttpAuthenticationException hae) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("HttpBasicAuthentication failed for " + challenge);
+          }
         }
-        return null;
+        authMap.put(realm, newAuth);
+        return newAuth;
+      } else {
+        return (HttpBasicAuthentication) auth;
+      }
     }
-    
-	/**
-	 * Provides a pattern which can be used by an outside resource to determine if 
-	 * this class can provide credentials based on simple header information.  It does
-	 * not calculate any information regarding realms or challenges.
-	 * 
-	 * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
-	 */
-	public static final Pattern getBasicPattern() {
-		return basic;
-	}
+    return null;
+  }
+
+  /**
+   * Provides a pattern which can be used by an outside resource to determine if
+   * this class can provide credentials based on simple header information. It
+   * does not calculate any information regarding realms or challenges.
+   * 
+   * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
+   */
+  public static final Pattern getBasicPattern() {
+    return basic;
+  }
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(working copy)
@@ -41,7 +41,7 @@
 
 /**
  * An HTTP response.
- *
+ * 
  * @author Susam Pal
  */
 public class HttpResponse implements Response {
@@ -53,18 +53,22 @@
 
   /**
    * Fetches the given <code>url</code> and prepares HTTP response.
-   *
-   * @param http                An instance of the implementation class
-   *                            of this plugin
-   * @param url                 URL to be fetched
-   * @param page                WebPage
-   * @param followRedirects     Whether to follow redirects; follows
-   *                            redirect if and only if this is true
-   * @return                    HTTP response
-   * @throws IOException        When an error occurs
+   * 
+   * @param http
+   *          An instance of the implementation class of this plugin
+   * @param url
+   *          URL to be fetched
+   * @param page
+   *          WebPage
+   * @param followRedirects
+   *          Whether to follow redirects; follows redirect if and only if this
+   *          is true
+   * @return HTTP response
+   * @throws IOException
+   *           When an error occurs
    */
-  HttpResponse(Http http, URL url, WebPage page,
-      boolean followRedirects) throws IOException {
+  HttpResponse(Http http, URL url, WebPage page, boolean followRedirects)
+      throws IOException {
 
     // Prepare GET method for HTTP request
     this.url = url;
@@ -99,7 +103,7 @@
       for (int i = 0; i < heads.length; i++) {
         headers.set(heads[i].getName(), heads[i].getValue());
       }
-      
+
       // Limit download size
       int contentLength = Integer.MAX_VALUE;
       String contentLengthString = headers.get(Response.CONTENT_LENGTH);
@@ -107,12 +111,10 @@
         try {
           contentLength = Integer.parseInt(contentLengthString.trim());
         } catch (NumberFormatException ex) {
-          throw new HttpException("bad content length: " +
-              contentLengthString);
+          throw new HttpException("bad content length: " + contentLengthString);
         }
       }
-      if (http.getMaxContent() >= 0 &&
-          contentLength > http.getMaxContent()) {
+      if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
         contentLength = http.getMaxContent();
       }
 
@@ -132,7 +134,8 @@
 
         content = out.toByteArray();
       } catch (Exception e) {
-        if (code == 200) throw new IOException(e.toString());
+        if (code == 200)
+          throw new IOException(e.toString());
         // for codes other than 200 OK, we are fine with empty content
       } finally {
         if (in != null) {
@@ -140,16 +143,15 @@
         }
         get.abort();
       }
-      
+
       StringBuilder fetchTrace = null;
       if (Http.LOG.isTraceEnabled()) {
         // Trace message
-        fetchTrace = new StringBuilder("url: " + url +
-            "; status code: " + code +
-            "; bytes received: " + content.length);
+        fetchTrace = new StringBuilder("url: " + url + "; status code: " + code
+            + "; bytes received: " + content.length);
         if (getHeader(Response.CONTENT_LENGTH) != null)
-          fetchTrace.append("; Content-Length: " +
-              getHeader(Response.CONTENT_LENGTH));
+          fetchTrace.append("; Content-Length: "
+              + getHeader(Response.CONTENT_LENGTH));
         if (getHeader(Response.LOCATION) != null)
           fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
       }
@@ -159,8 +161,7 @@
         String contentEncoding = headers.get(Response.CONTENT_ENCODING);
         if (contentEncoding != null && Http.LOG.isTraceEnabled())
           fetchTrace.append("; Content-Encoding: " + contentEncoding);
-        if ("gzip".equals(contentEncoding) ||
-            "x-gzip".equals(contentEncoding)) {
+        if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
           content = http.processGzipEncoded(content, url);
           if (Http.LOG.isTraceEnabled())
             fetchTrace.append("; extracted to " + content.length + " bytes");
@@ -170,14 +171,14 @@
             fetchTrace.append("; extracted to " + content.length + " bytes");
         }
       }
-      
+
       // add headers in metadata to row
-	  if (page.getHeaders() != null) {
-	    page.getHeaders().clear();
-	  }
-	  for (String key : headers.names()) {
-	    page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
-	  }
+      if (page.getHeaders() != null) {
+        page.getHeaders().clear();
+      }
+      for (String key : headers.names()) {
+        page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
+      }
 
       // Logger trace message
       if (Http.LOG.isTraceEnabled()) {
@@ -188,15 +189,15 @@
     }
   }
 
-  
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
+
   public URL getUrl() {
     return url;
   }
-  
+
   public int getCode() {
     return code;
   }
@@ -204,7 +205,7 @@
   public String getHeader(String name) {
     return headers.get(name);
   }
-  
+
   public Metadata getHeaders() {
     return headers;
   }
@@ -213,8 +214,8 @@
     return content;
   }
 
-  /* -------------------------- *
-   * </implementation:Response> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Response> *
+   * --------------------------
+   */
 }
-
Index: src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
===================================================================
--- src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(revision 1650444)
+++ src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(working copy)
@@ -40,191 +40,192 @@
  */
 public class TestProtocolHttpClient {
 
-	private Server server;
-	private Configuration conf;
-	private static final String RES_DIR = System.getProperty("test.data", ".");
-	private int port;
-	private Http http = new Http();
+  private Server server;
+  private Configuration conf;
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+  private int port;
+  private Http http = new Http();
 
   @Before
-	public void setUp() throws Exception {
+  public void setUp() throws Exception {
 
-		server = new Server();
-		
-//		Context scontext = new Context();
-//		scontext.setContextPath("/");
-//		scontext.setResourceBase(RES_DIR);
-//		// servlet handler?
-//		scontext.addServlet("JSP", "*.jsp",
-//				"org.apache.jasper.servlet.JspServlet");
-//		scontext.addHandler(new ResourceHandler());
+    server = new Server();
 
-		Context root = new Context(server,"/",Context.SESSIONS);
-		root.setContextPath("/");
-		root.setResourceBase(RES_DIR);
-		ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
-		root.addServlet(sh, "*.jsp");
+    // Context scontext = new Context();
+    // scontext.setContextPath("/");
+    // scontext.setResourceBase(RES_DIR);
+    // // servlet handler?
+    // scontext.addServlet("JSP", "*.jsp",
+    // "org.apache.jasper.servlet.JspServlet");
+    // scontext.addHandler(new ResourceHandler());
 
-		conf = new Configuration();
-		conf.addResource("nutch-default.xml");
-		conf.addResource("nutch-site-test.xml");
+    Context root = new Context(server, "/", Context.SESSIONS);
+    root.setContextPath("/");
+    root.setResourceBase(RES_DIR);
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
+    root.addServlet(sh, "*.jsp");
 
-		http = new Http();
-		http.setConf(conf);
-	}
+    conf = new Configuration();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("nutch-site-test.xml");
 
+    http = new Http();
+    http.setConf(conf);
+  }
+
   @After
-	public void tearDown() throws Exception {
-		server.stop();
-	}
+  public void tearDown() throws Exception {
+    server.stop();
+  }
 
-	/**
-	 * Tests whether the client can remember cookies.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testCookies() throws Exception {
-		startServer(47500);
-		fetchPage("/cookies.jsp", 200);
-		fetchPage("/cookies.jsp?cookie=yes", 200);
-		tearDown();
-	}
+  /**
+   * Tests whether the client can remember cookies.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testCookies() throws Exception {
+    startServer(47500);
+    fetchPage("/cookies.jsp", 200);
+    fetchPage("/cookies.jsp?cookie=yes", 200);
+    tearDown();
+  }
 
-	/**
-	 * Tests that no pre-emptive authorization headers are sent by the client.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testNoPreemptiveAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/noauth.jsp", 200);
-		tearDown();
-	}
+  /**
+   * Tests that no pre-emptive authorization headers are sent by the client.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testNoPreemptiveAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/noauth.jsp", 200);
+    tearDown();
+  }
 
-	/**
-	 * Tests default credentials.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testDefaultCredentials() throws Exception {
-		startServer(47502);
-		fetchPage("/basic.jsp", 200);
-		tearDown();
-	}
+  /**
+   * Tests default credentials.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testDefaultCredentials() throws Exception {
+    startServer(47502);
+    fetchPage("/basic.jsp", 200);
+    tearDown();
+  }
 
-	/**
-	 * Tests basic authentication scheme for various realms.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testBasicAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/basic.jsp", 200);
-		fetchPage("/basic.jsp?case=1", 200);
-		fetchPage("/basic.jsp?case=2", 200);
-		tearDown();
-	}
+  /**
+   * Tests basic authentication scheme for various realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testBasicAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 200);
+    fetchPage("/basic.jsp?case=2", 200);
+    tearDown();
+  }
 
-	/**
-	 * Tests that authentication happens for a defined realm and not for other
-	 * realms for a host:port when an extra <code>authscope</code> tag is not
-	 * defined to match all other realms.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testOtherRealmsNoAuth() throws Exception {
-		startServer(47501);
-		fetchPage("/basic.jsp", 200);
-		fetchPage("/basic.jsp?case=1", 401);
-		fetchPage("/basic.jsp?case=2", 401);
-		tearDown();
-	}
+  /**
+   * Tests that authentication happens for a defined realm and not for other
+   * realms for a host:port when an extra <code>authscope</code> tag is not
+   * defined to match all other realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testOtherRealmsNoAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 401);
+    fetchPage("/basic.jsp?case=2", 401);
+    tearDown();
+  }
 
-	/**
-	 * Tests Digest authentication scheme.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testDigestAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/digest.jsp", 200);
-		tearDown();
-	}
+  /**
+   * Tests Digest authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testDigestAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/digest.jsp", 200);
+    tearDown();
+  }
 
-	/**
-	 * Tests NTLM authentication scheme.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	@Test
-	public void testNtlmAuth() throws Exception {
-		startServer(47501);
-		fetchPage("/ntlm.jsp", 200);
-		tearDown();
-	}
+  /**
+   * Tests NTLM authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testNtlmAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/ntlm.jsp", 200);
+    tearDown();
+  }
 
-	/**
-	 * Starts the Jetty server at a specified port.
-	 * 
-	 * @param portno
-	 *            Port number.
-	 * @throws Exception
-	 *             When an error occurs.
-	 */
-	private void startServer(int portno) throws Exception {
-		port = portno;
+  /**
+   * Starts the Jetty server at a specified port.
+   * 
+   * @param portno
+   *          Port number.
+   * @throws Exception
+   *           When an error occurs.
+   */
+  private void startServer(int portno) throws Exception {
+    port = portno;
 
-		SelectChannelConnector connector1 = new SelectChannelConnector();
-		connector1.setHost("127.0.0.1");
-		connector1.setPort(port);
+    SelectChannelConnector connector1 = new SelectChannelConnector();
+    connector1.setHost("127.0.0.1");
+    connector1.setPort(port);
 
-		server.addConnector(connector1);
-		server.start();
-	}
+    server.addConnector(connector1);
+    server.start();
+  }
 
-	/**
-	 * Fetches the specified <code>page</code> from the local Jetty server and
-	 * checks whether the HTTP response status code matches with the expected
-	 * code.
-	 * 
-	 * @param page
-	 *            Page to be fetched.
-	 * @param expectedCode
-	 *            HTTP response status code expected while fetching the page.
-	 * @throws Exception
-	 *             When an error occurs or test case fails.
-	 */
-	private void fetchPage(String page, int expectedCode) throws Exception {
-		URL url = new URL("http", "127.0.0.1", port, page);
-		Response response = null;
-		response = http.getResponse(url, WebPage.newBuilder().build(), true);
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   * @throws Exception
+   *           When an error occurs or test case fails.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    Response response = null;
+    response = http.getResponse(url, WebPage.newBuilder().build(), true);
 
-		int code = response.getCode();
-		assertEquals("HTTP Status Code for " + url, expectedCode, code);
-	}
+    int code = response.getCode();
+    assertEquals("HTTP Status Code for " + url, expectedCode, code);
+  }
 
-	/**
-	 * Returns an URL to the specified page.
-	 * 
-	 * @param page
-	 *            Page available in the local Jetty server.
-	 * @throws MalformedURLException
-	 *             If an URL can not be formed.
-	 */
-	private URL getURL(String page) throws MalformedURLException {
-		return new URL("http", "127.0.0.1", port, page);
-	}
+  /**
+   * Returns an URL to the specified page.
+   * 
+   * @param page
+   *          Page available in the local Jetty server.
+   * @throws MalformedURLException
+   *           If an URL can not be formed.
+   */
+  private URL getURL(String page) throws MalformedURLException {
+    return new URL("http", "127.0.0.1", port, page);
+  }
 }
Index: src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
===================================================================
--- src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java	(revision 1650444)
+++ src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java	(working copy)
@@ -163,12 +163,14 @@
       bytes = new byte[size];
       iStream.read(bytes);
     } catch (SftpException e) {
-      logger.error("SftpException in getFileProtocolOutput(), file: "
-          + url.getFile(), e);
+      logger
+          .error(
+              "SftpException in getFileProtocolOutput(), file: "
+                  + url.getFile(), e);
       throw e;
     } catch (IOException e) {
-      logger.error("IOException in getFileProtocolOutput(), file: "
-          + url.getFile(), e);
+      logger.error(
+          "IOException in getFileProtocolOutput(), file: " + url.getFile(), e);
       throw e;
     } finally {
       if (iStream != null) {
@@ -213,8 +215,8 @@
 
       Metadata metadata = new Metadata();
       metadata.set(Response.CONTENT_TYPE, "text/html");
-      metadata.set(Response.CONTENT_LENGTH, String.valueOf(directoryList
-          .length()));
+      metadata.set(Response.CONTENT_LENGTH,
+          String.valueOf(directoryList.length()));
       metadata.set(Response.LAST_MODIFIED, channelSftp.lstat(url.getFile())
           .getMtimeString());
       metadata.set(Response.LOCATION, url.toExternalForm());
@@ -250,7 +252,7 @@
     if (server == null) {
       return;
     }
-    
+
     if (channelSftpByHostMap.containsKey(server)) {
       return;
     }
Index: src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java
===================================================================
--- src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(revision 1650444)
+++ src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(working copy)
@@ -30,65 +30,65 @@
 
 public class LinkAnalysisScoringFilter implements ScoringFilter {
 
-	private Configuration conf;
-	private float normalizedScore = 1.00f;
+  private Configuration conf;
+  private float normalizedScore = 1.00f;
 
-	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.METADATA);
-		FIELDS.add(WebPage.Field.SCORE);
-	}
+  static {
+    FIELDS.add(WebPage.Field.METADATA);
+    FIELDS.add(WebPage.Field.SCORE);
+  }
 
-	public LinkAnalysisScoringFilter() {
-	}
+  public LinkAnalysisScoringFilter() {
+  }
 
-	public Configuration getConf() {
-		return conf;
-	}
+  public Configuration getConf() {
+    return conf;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-		normalizedScore = conf.getFloat("link.analyze.normalize.score", 1.00f);
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    normalizedScore = conf.getFloat("link.analyze.normalize.score", 1.00f);
+  }
 
-	@Override
-	public Collection<WebPage.Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<WebPage.Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public void injectedScore(String url, WebPage page)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void initialScore(String url, WebPage page)
-			throws ScoringFilterException {
-		page.setScore(0.0f);
-	}
+  @Override
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException {
+    page.setScore(0.0f);
+  }
 
-	@Override
-	public float generatorSortValue(String url, WebPage page, float initSort)
-			throws ScoringFilterException {
-		return page.getScore() * initSort;
-	}
+  @Override
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException {
+    return page.getScore() * initSort;
+  }
 
-	@Override
-	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
-			Collection<ScoreDatum> scoreData, int allCount)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void updateScore(String url, WebPage page,
-			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
-	}
+  @Override
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
+  }
 
-	@Override
-	public float indexerScore(String url, NutchDocument doc, WebPage page,
-			float initScore) throws ScoringFilterException {
-		return (normalizedScore * page.getScore());
-	}
+  @Override
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException {
+    return (normalizedScore * page.getScore());
+  }
 
 }
Index: src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
===================================================================
--- src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java	(revision 1650444)
+++ src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * {@link org.apache.nutch.scoring.webgraph.WebGraph}.
  */
 package org.apache.nutch.scoring.link;
+
Index: src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
===================================================================
--- src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(revision 1650444)
+++ src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(working copy)
@@ -38,17 +38,17 @@
 
 /**
  * This plugin implements a variant of an Online Page Importance Computation
- * (OPIC) score, described in this paper:
- * <a href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
- * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003),
- * Adaptive On-Line Page Importance Computation
- * </a>.
- *
+ * (OPIC) score, described in this paper: <a
+ * href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
+ * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003), Adaptive
+ * On-Line Page Importance Computation </a>.
+ * 
  * @author Andrzej Bialecki
  */
 public class OPICScoringFilter implements ScoringFilter {
 
-  private final static Logger LOG = LoggerFactory.getLogger(OPICScoringFilter.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(OPICScoringFilter.class);
 
   private final static Utf8 CASH_KEY = new Utf8("_csh_");
 
@@ -80,15 +80,18 @@
 
   @Override
   public void injectedScore(String url, WebPage row)
-  throws ScoringFilterException {
+      throws ScoringFilterException {
     float score = row.getScore();
     row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score)));
   }
 
-  /** Set to 0.0f (unknown value) - inlink contributions will bring it to
-   * a correct level. Newly discovered pages have at least one inlink. */
+  /**
+   * Set to 0.0f (unknown value) - inlink contributions will bring it to a
+   * correct level. Newly discovered pages have at least one inlink.
+   */
   @Override
-  public void initialScore(String url, WebPage row) throws ScoringFilterException {
+  public void initialScore(String url, WebPage row)
+      throws ScoringFilterException {
     row.setScore(0.0f);
     row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
   }
@@ -95,13 +98,15 @@
 
   /** Use {@link WebPage#getScore()}. */
   @Override
-  public float generatorSortValue(String url, WebPage row, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(String url, WebPage row, float initSort)
+      throws ScoringFilterException {
     return row.getScore() * initSort;
   }
 
   /** Increase the score by a sum of inlinked scores. */
   @Override
-  public void updateScore(String url, WebPage row, List<ScoreDatum> inlinkedScoreData) {
+  public void updateScore(String url, WebPage row,
+      List<ScoreDatum> inlinkedScoreData) {
     float adjust = 0.0f;
     for (ScoreDatum scoreDatum : inlinkedScoreData) {
       adjust += scoreDatum.getScore();
@@ -111,21 +116,23 @@
     ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY);
     float cash = 0.0f;
     if (cashRaw != null) {
-      cash = Bytes.toFloat(cashRaw.array(), cashRaw.arrayOffset() + cashRaw.position());
+      cash = Bytes.toFloat(cashRaw.array(),
+          cashRaw.arrayOffset() + cashRaw.position());
     }
-    row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(cash + adjust)));
+    row.getMetadata().put(CASH_KEY,
+        ByteBuffer.wrap(Bytes.toBytes(cash + adjust)));
   }
 
   /** Get cash on hand, divide it by the number of outlinks and apply. */
   @Override
-  public void distributeScoreToOutlinks(String fromUrl,
-      WebPage row, Collection<ScoreDatum> scoreData,
-      int allCount) {
+  public void distributeScoreToOutlinks(String fromUrl, WebPage row,
+      Collection<ScoreDatum> scoreData, int allCount) {
     ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY);
     if (cashRaw == null) {
       return;
     }
-    float cash = Bytes.toFloat(cashRaw.array(), cashRaw.arrayOffset() + cashRaw.position());
+    float cash = Bytes.toFloat(cashRaw.array(),
+        cashRaw.arrayOffset() + cashRaw.position());
     if (cash == 0) {
       return;
     }
@@ -138,7 +145,7 @@
       try {
         String toHost = new URL(scoreDatum.getUrl()).getHost();
         String fromHost = new URL(fromUrl.toString()).getHost();
-        if(toHost.equalsIgnoreCase(fromHost)){
+        if (toHost.equalsIgnoreCase(fromHost)) {
           scoreDatum.setScore(internalScore);
         } else {
           scoreDatum.setScore(externalScore);
@@ -152,9 +159,10 @@
     row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
   }
 
-  /** Dampen the boost value by scorePower.*/
-  public float indexerScore(String url, NutchDocument doc, WebPage row, float initScore) {
-    return (float)Math.pow(row.getScore(), scorePower) * initScore;
+  /** Dampen the boost value by scorePower. */
+  public float indexerScore(String url, NutchDocument doc, WebPage row,
+      float initScore) {
+    return (float) Math.pow(row.getScore(), scorePower) * initScore;
   }
 
   @Override
Index: src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
===================================================================
--- src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java	(revision 1650444)
+++ src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * (OPIC) algorithm.
  */
 package org.apache.nutch.scoring.opic;
+
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(revision 1650444)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(working copy)
@@ -45,198 +45,197 @@
 
 public class CollectionManager extends Configured {
 
-	public static final String DEFAULT_FILE_NAME = "subcollections.xml";
+  public static final String DEFAULT_FILE_NAME = "subcollections.xml";
 
-	static final Logger LOG = LoggerFactory.getLogger(CollectionManager.class);
+  static final Logger LOG = LoggerFactory.getLogger(CollectionManager.class);
 
-	transient Map<String, Subcollection> collectionMap = new HashMap<String, Subcollection>();
+  transient Map<String, Subcollection> collectionMap = new HashMap<String, Subcollection>();
 
-	transient URL configfile;
+  transient URL configfile;
 
-	public CollectionManager(Configuration conf) {
-		super(conf);
-		init();
-	}
+  public CollectionManager(Configuration conf) {
+    super(conf);
+    init();
+  }
 
-	/**
-	 * Used for testing
-	 */
-	protected CollectionManager() {
-		super(NutchConfiguration.create());
-	}
+  /**
+   * Used for testing
+   */
+  protected CollectionManager() {
+    super(NutchConfiguration.create());
+  }
 
-	protected void init() {
-		try {
-			if (LOG.isInfoEnabled()) {
-				LOG.info("initializing CollectionManager");
-			}
-			// initialize known subcollections
-			configfile = getConf().getResource(
-					getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+  protected void init() {
+    try {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("initializing CollectionManager");
+      }
+      // initialize known subcollections
+      configfile = getConf().getResource(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
 
-			InputStream input = getConf().getConfResourceAsInputStream(
-					getConf().get("subcollections.config", DEFAULT_FILE_NAME));
-			parse(input);
-		} catch (Exception e) {
-			if (LOG.isWarnEnabled()) {
-				LOG.warn("Error occured: " + e);
-			}
-		}
-	}
+      InputStream input = getConf().getConfResourceAsInputStream(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+      parse(input);
+    } catch (Exception e) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Error occured: " + e);
+      }
+    }
+  }
 
-	protected void parse(InputStream input) {
-		Element collections = DomUtil.getDom(input);
+  protected void parse(InputStream input) {
+    Element collections = DomUtil.getDom(input);
 
-		if (collections != null) {
-			NodeList nodeList = collections
-					.getElementsByTagName(Subcollection.TAG_COLLECTION);
+    if (collections != null) {
+      NodeList nodeList = collections
+          .getElementsByTagName(Subcollection.TAG_COLLECTION);
 
-			if (LOG.isInfoEnabled()) {
-				LOG.info("file has" + nodeList.getLength() + " elements");
-			}
+      if (LOG.isInfoEnabled()) {
+        LOG.info("file has" + nodeList.getLength() + " elements");
+      }
 
-			for (int i = 0; i < nodeList.getLength(); i++) {
-				Element scElem = (Element) nodeList.item(i);
-				Subcollection subCol = new Subcollection(getConf());
-				subCol.initialize(scElem);
-				collectionMap.put(subCol.name, subCol);
-			}
-		} else if (LOG.isInfoEnabled()) {
-			LOG.info("Cannot find collections");
-		}
-	}
+      for (int i = 0; i < nodeList.getLength(); i++) {
+        Element scElem = (Element) nodeList.item(i);
+        Subcollection subCol = new Subcollection(getConf());
+        subCol.initialize(scElem);
+        collectionMap.put(subCol.name, subCol);
+      }
+    } else if (LOG.isInfoEnabled()) {
+      LOG.info("Cannot find collections");
+    }
+  }
 
-	public static CollectionManager getCollectionManager(Configuration conf) {
-		String key = "collectionmanager";
-		ObjectCache objectCache = ObjectCache.get(conf);
-		CollectionManager impl = (CollectionManager) objectCache.getObject(key);
-		if (impl == null) {
-			try {
-				if (LOG.isInfoEnabled()) {
-					LOG.info("Instantiating CollectionManager");
-				}
-				impl = new CollectionManager(conf);
-				objectCache.setObject(key, impl);
-			} catch (Exception e) {
-				throw new RuntimeException("Couldn't create CollectionManager",
-						e);
-			}
-		}
-		return impl;
-	}
+  public static CollectionManager getCollectionManager(Configuration conf) {
+    String key = "collectionmanager";
+    ObjectCache objectCache = ObjectCache.get(conf);
+    CollectionManager impl = (CollectionManager) objectCache.getObject(key);
+    if (impl == null) {
+      try {
+        if (LOG.isInfoEnabled()) {
+          LOG.info("Instantiating CollectionManager");
+        }
+        impl = new CollectionManager(conf);
+        objectCache.setObject(key, impl);
+      } catch (Exception e) {
+        throw new RuntimeException("Couldn't create CollectionManager", e);
+      }
+    }
+    return impl;
+  }
 
-	/**
-	 * Returns named subcollection
-	 * 
-	 * @param id
-	 * @return Named SubCollection (or null if not existing)
-	 */
-	public Subcollection getSubColection(final String id) {
-		return (Subcollection) collectionMap.get(id);
-	}
+  /**
+   * Returns named subcollection
+   * 
+   * @param id
+   * @return Named SubCollection (or null if not existing)
+   */
+  public Subcollection getSubColection(final String id) {
+    return (Subcollection) collectionMap.get(id);
+  }
 
-	/**
-	 * Delete named subcollection
-	 * 
-	 * @param id
-	 *            Id of SubCollection to delete
-	 */
-	public void deleteSubCollection(final String id) throws IOException {
-		final Subcollection subCol = getSubColection(id);
-		if (subCol != null) {
-			collectionMap.remove(id);
-		}
-	}
+  /**
+   * Delete named subcollection
+   * 
+   * @param id
+   *          Id of SubCollection to delete
+   */
+  public void deleteSubCollection(final String id) throws IOException {
+    final Subcollection subCol = getSubColection(id);
+    if (subCol != null) {
+      collectionMap.remove(id);
+    }
+  }
 
-	/**
-	 * Create a new subcollection.
-	 * 
-	 * @param name
-	 *            Name of SubCollection to create
-	 * @return Created SubCollection or null if allready existed
-	 */
-	public Subcollection createSubCollection(final String id, final String name) {
-		Subcollection subCol = null;
+  /**
+   * Create a new subcollection.
+   * 
+   * @param name
+   *          Name of SubCollection to create
+   * @return Created SubCollection or null if allready existed
+   */
+  public Subcollection createSubCollection(final String id, final String name) {
+    Subcollection subCol = null;
 
-		if (!collectionMap.containsKey(id)) {
-			subCol = new Subcollection(id, name, getConf());
-			collectionMap.put(id, subCol);
-		}
+    if (!collectionMap.containsKey(id)) {
+      subCol = new Subcollection(id, name, getConf());
+      collectionMap.put(id, subCol);
+    }
 
-		return subCol;
-	}
+    return subCol;
+  }
 
-	/**
-	 * Return names of collections url is part of
-	 * 
-	 * @param url
-	 *            The url to test against Collections
-	 * @return Space delimited string of collection names url is part of
-	 */
-	public List<String> getSubCollections(final String url) {
-		List<String> collections = new ArrayList<String>();
-		final Iterator<Subcollection> iterator = collectionMap.values().iterator();
+  /**
+   * Return names of collections url is part of
+   * 
+   * @param url
+   *          The url to test against Collections
+   * @return Space delimited string of collection names url is part of
+   */
+  public List<String> getSubCollections(final String url) {
+    List<String> collections = new ArrayList<String>();
+    final Iterator<Subcollection> iterator = collectionMap.values().iterator();
 
-		while (iterator.hasNext()) {
-			final Subcollection subCol = iterator.next();
-			if (subCol.filter(url) != null) {
-				collections.add(subCol.name);
-			}
-		}
-		if (LOG.isTraceEnabled()) {
-			LOG.trace("subcollections:"
-					+ Arrays.toString(collections.toArray()));
-		}
+    while (iterator.hasNext()) {
+      final Subcollection subCol = iterator.next();
+      if (subCol.filter(url) != null) {
+        collections.add(subCol.name);
+      }
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("subcollections:" + Arrays.toString(collections.toArray()));
+    }
 
-		return collections;
-	}
+    return collections;
+  }
 
-	/**
-	 * Returns all collections
-	 * 
-	 * @return All collections CollectionManager knows about
-	 */
-	public Collection<Subcollection> getAll() {
-		return collectionMap.values();
-	}
+  /**
+   * Returns all collections
+   * 
+   * @return All collections CollectionManager knows about
+   */
+  public Collection<Subcollection> getAll() {
+    return collectionMap.values();
+  }
 
-	/**
-	 * Save collections into file
-	 * 
-	 * @throws Exception
-	 */
-	public void save() throws IOException {
-		try {
-			final FileOutputStream fos = new FileOutputStream(new File(
-					configfile.getFile()));
-			final Document doc = new DocumentImpl();
-			final Element collections = doc
-					.createElement(Subcollection.TAG_COLLECTIONS);
-			final Iterator<Subcollection> iterator = collectionMap.values().iterator();
+  /**
+   * Save collections into file
+   * 
+   * @throws Exception
+   */
+  public void save() throws IOException {
+    try {
+      final FileOutputStream fos = new FileOutputStream(new File(
+          configfile.getFile()));
+      final Document doc = new DocumentImpl();
+      final Element collections = doc
+          .createElement(Subcollection.TAG_COLLECTIONS);
+      final Iterator<Subcollection> iterator = collectionMap.values()
+          .iterator();
 
-			while (iterator.hasNext()) {
-				final Subcollection subCol = iterator.next();
-				final Element collection = doc
-						.createElement(Subcollection.TAG_COLLECTION);
-				collections.appendChild(collection);
-				final Element name = doc.createElement(Subcollection.TAG_NAME);
-				name.setNodeValue(subCol.getName());
-				collection.appendChild(name);
-				final Element whiteList = doc
-						.createElement(Subcollection.TAG_WHITELIST);
-				whiteList.setNodeValue(subCol.getWhiteListString());
-				collection.appendChild(whiteList);
-				final Element blackList = doc
-						.createElement(Subcollection.TAG_BLACKLIST);
-				blackList.setNodeValue(subCol.getBlackListString());
-				collection.appendChild(blackList);
-			}
+      while (iterator.hasNext()) {
+        final Subcollection subCol = iterator.next();
+        final Element collection = doc
+            .createElement(Subcollection.TAG_COLLECTION);
+        collections.appendChild(collection);
+        final Element name = doc.createElement(Subcollection.TAG_NAME);
+        name.setNodeValue(subCol.getName());
+        collection.appendChild(name);
+        final Element whiteList = doc
+            .createElement(Subcollection.TAG_WHITELIST);
+        whiteList.setNodeValue(subCol.getWhiteListString());
+        collection.appendChild(whiteList);
+        final Element blackList = doc
+            .createElement(Subcollection.TAG_BLACKLIST);
+        blackList.setNodeValue(subCol.getBlackListString());
+        collection.appendChild(blackList);
+      }
 
-			DomUtil.saveDom(fos, collections);
-			fos.flush();
-			fos.close();
-		} catch (FileNotFoundException e) {
-			throw new IOException(e.toString());
-		}
-	}
+      DomUtil.saveDom(fos, collections);
+      fos.flush();
+      fos.close();
+    } catch (FileNotFoundException e) {
+      throw new IOException(e.toString());
+    }
+  }
 }
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(revision 1650444)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(working copy)
@@ -31,30 +31,30 @@
  * SubCollection represents a subset of index, you can define url patterns that
  * will indicate that particular page (url) is part of SubCollection.
  */
-public class Subcollection extends Configured implements URLFilter{
-  
-  public static final String TAG_COLLECTIONS="subcollections";
-  public static final String TAG_COLLECTION="subcollection";
-  public static final String TAG_WHITELIST="whitelist";
-  public static final String TAG_BLACKLIST="blacklist";
-  public static final String TAG_NAME="name";
-  public static final String TAG_ID="id";
+public class Subcollection extends Configured implements URLFilter {
 
+  public static final String TAG_COLLECTIONS = "subcollections";
+  public static final String TAG_COLLECTION = "subcollection";
+  public static final String TAG_WHITELIST = "whitelist";
+  public static final String TAG_BLACKLIST = "blacklist";
+  public static final String TAG_NAME = "name";
+  public static final String TAG_ID = "id";
+
   ArrayList<String> blackList = new ArrayList<String>();
 
   ArrayList<String> whiteList = new ArrayList<String>();
 
-  /** 
+  /**
    * SubCollection identifier
    */
   String id;
 
-  /** 
+  /**
    * SubCollection name
    */
   String name;
 
-  /** 
+  /**
    * SubCollection whitelist as String
    */
   String wlString;
@@ -64,21 +64,24 @@
    */
   String blString;
 
-  /** public Constructor
+  /**
+   * public Constructor
    * 
-   * @param id id of SubCollection
-   * @param name name of SubCollection
+   * @param id
+   *          id of SubCollection
+   * @param name
+   *          name of SubCollection
    */
   public Subcollection(String id, String name, Configuration conf) {
     this(conf);
-    this.id=id;
+    this.id = id;
     this.name = name;
   }
 
-  public Subcollection(Configuration conf){
+  public Subcollection(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * @return Returns the name
    */
@@ -203,7 +206,8 @@
   /**
    * Set contents of blacklist from String
    * 
-   * @param list the blacklist contents
+   * @param list
+   *          the blacklist contents
    */
   public void setBlackList(String list) {
     this.blString = list;
@@ -213,7 +217,8 @@
   /**
    * Set contents of whitelist from String
    * 
-   * @param list the whitelist contents
+   * @param list
+   *          the whitelist contents
    */
   public void setWhiteList(String list) {
     this.wlString = list;
Index: src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(revision 1650444)
+++ src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(working copy)
@@ -32,48 +32,49 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 public class SubcollectionIndexingFilter extends Configured implements
-		IndexingFilter {
+    IndexingFilter {
 
-	public SubcollectionIndexingFilter() {
-		super(NutchConfiguration.create());
-	}
+  public SubcollectionIndexingFilter() {
+    super(NutchConfiguration.create());
+  }
 
-	public SubcollectionIndexingFilter(Configuration conf) {
-		super(conf);
-	}
+  public SubcollectionIndexingFilter(Configuration conf) {
+    super(conf);
+  }
 
-	/**
-	 * Doc field name
-	 */
-	public static final String FIELD_NAME = "subcollection";
+  /**
+   * Doc field name
+   */
+  public static final String FIELD_NAME = "subcollection";
 
-	/**
-	 * Logger
-	 */
-	public static final Logger LOG = LoggerFactory
-			.getLogger(SubcollectionIndexingFilter.class);
+  /**
+   * Logger
+   */
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SubcollectionIndexingFilter.class);
 
-	/**
-	 * "Mark" document to be a part of subcollection
-	 * 
-	 * @param doc
-	 * @param url
-	 */
-	private void addSubCollectionField(NutchDocument doc, String url) {
-		for (String collname: CollectionManager.getCollectionManager(getConf()).getSubCollections(url)) {
-			doc.add(FIELD_NAME, collname);
-		}
-	}
+  /**
+   * "Mark" document to be a part of subcollection
+   * 
+   * @param doc
+   * @param url
+   */
+  private void addSubCollectionField(NutchDocument doc, String url) {
+    for (String collname : CollectionManager.getCollectionManager(getConf())
+        .getSubCollections(url)) {
+      doc.add(FIELD_NAME, collname);
+    }
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return new ArrayList<Field>();
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return new ArrayList<Field>();
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
-		addSubCollectionField(doc, url);
-		return doc;
-	}
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
+    addSubCollectionField(doc, url);
+    return doc;
+  }
 }
Index: src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java	(revision 1650444)
+++ src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java	(working copy)
@@ -22,3 +22,4 @@
  * {@link org.apache.nutch.collection}.
  */
 package org.apache.nutch.indexer.subcollection;
+
Index: src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
===================================================================
--- src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(revision 1650444)
+++ src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(working copy)
@@ -26,31 +26,33 @@
 import static org.junit.Assert.*;
 
 public class TestSubcollection {
-  
-  /**Test filtering logic
+
+  /**
+   * Test filtering logic
    * 
    * @throws Exception
    */
   @Test
   public void testFilter() throws Exception {
-    Subcollection sc=new Subcollection(NutchConfiguration.create());
+    Subcollection sc = new Subcollection(NutchConfiguration.create());
     sc.setWhiteList("www.nutch.org\nwww.apache.org");
     sc.setBlackList("jpg\nwww.apache.org/zecret/");
-    
-    //matches whitelist
-    assertEquals("http://www.apache.org/index.html", sc.filter("http://www.apache.org/index.html"));
-    
-    //matches blacklist
+
+    // matches whitelist
+    assertEquals("http://www.apache.org/index.html",
+        sc.filter("http://www.apache.org/index.html"));
+
+    // matches blacklist
     assertEquals(null, sc.filter("http://www.apache.org/zecret/index.html"));
     assertEquals(null, sc.filter("http://www.apache.org/img/image.jpg"));
-    
-    //no match
+
+    // no match
     assertEquals(null, sc.filter("http://www.google.com/"));
   }
-  
+
   @Test
-  public void testInput(){
-    StringBuffer xml=new StringBuffer();
+  public void testInput() {
+    StringBuffer xml = new StringBuffer();
     xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
     xml.append("<!-- just a comment -->");
     xml.append("<subcollections>");
@@ -66,44 +68,45 @@
     xml.append("</blacklist>");
     xml.append("</subcollection>");
     xml.append("</subcollections>");
-    
-    InputStream is=new ByteArrayInputStream(xml.toString().getBytes());
-    
-    CollectionManager cm=new CollectionManager();
+
+    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
+
+    CollectionManager cm = new CollectionManager();
     cm.parse(is);
-    
-    Collection c=cm.getAll();
-    
+
+    Collection c = cm.getAll();
+
     // test that size matches
-    assertEquals(1,c.size());
-    
-    Subcollection collection=(Subcollection)c.toArray()[0];
-    
-    //test collection id
+    assertEquals(1, c.size());
+
+    Subcollection collection = (Subcollection) c.toArray()[0];
+
+    // test collection id
     assertEquals("nutch", collection.getId());
-    
-    //test collection name
+
+    // test collection name
     assertEquals("nutch collection", collection.getName());
 
-    //test whitelist
-    assertEquals(2,collection.whiteList.size());
-    
-    String wlUrl=(String)collection.whiteList.get(0);
+    // test whitelist
+    assertEquals(2, collection.whiteList.size());
+
+    String wlUrl = (String) collection.whiteList.get(0);
     assertEquals("http://lucene.apache.org/nutch/", wlUrl);
 
-    wlUrl=(String)collection.whiteList.get(1);
+    wlUrl = (String) collection.whiteList.get(1);
     assertEquals("http://wiki.apache.org/nutch/", wlUrl);
-    
-    //matches whitelist
-    assertEquals("http://lucene.apache.org/nutch/", collection.filter("http://lucene.apache.org/nutch/"));
 
-    //test blacklist
-    assertEquals(1,collection.blackList.size());
+    // matches whitelist
+    assertEquals("http://lucene.apache.org/nutch/",
+        collection.filter("http://lucene.apache.org/nutch/"));
 
-    String blUrl=(String)collection.blackList.get(0);
+    // test blacklist
+    assertEquals(1, collection.blackList.size());
+
+    String blUrl = (String) collection.blackList.get(0);
     assertEquals("http://www.xxx.yyy", blUrl);
 
-    //no match
+    // no match
     assertEquals(null, collection.filter("http://www.google.com/"));
   }
 }
Index: src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(revision 1650444)
+++ src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(working copy)
@@ -38,12 +38,13 @@
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(TLDIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(TLDIndexingFilter.class);
 
   private Configuration conf;
 
   private static final Collection<Field> fields = new ArrayList<Field>();
-  
+
   @Override
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
@@ -52,7 +53,7 @@
       DomainSuffix d = URLUtil.getDomainSuffix(_url);
       doc.add("tld", d.getDomain());
     } catch (Exception ex) {
-      LOG.warn("Exception in TLDIndexingFilter",ex);
+      LOG.warn("Exception in TLDIndexingFilter", ex);
     }
 
     return doc;
Index: src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(revision 1650444)
+++ src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(working copy)
@@ -38,70 +38,70 @@
  */
 public class TLDScoringFilter implements ScoringFilter {
 
-	private Configuration conf;
-	private DomainSuffixes tldEntries;
+  private Configuration conf;
+  private DomainSuffixes tldEntries;
 
-	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	public TLDScoringFilter() {
-		tldEntries = DomainSuffixes.getInstance();
-	}
+  public TLDScoringFilter() {
+    tldEntries = DomainSuffixes.getInstance();
+  }
 
-	public Configuration getConf() {
-		return conf;
-	}
+  public Configuration getConf() {
+    return conf;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	@Override
-	public Collection<WebPage.Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<WebPage.Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public void injectedScore(String url, WebPage page)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void initialScore(String url, WebPage page)
-			throws ScoringFilterException {
+  @Override
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException {
 
-	}
+  }
 
-	@Override
-	public float generatorSortValue(String url, WebPage page, float initSort)
-			throws ScoringFilterException {
-		return initSort;
-	}
+  @Override
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException {
+    return initSort;
+  }
 
-	@Override
-	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
-			Collection<ScoreDatum> scoreData, int allCount)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void updateScore(String url, WebPage page,
-			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
-	}
+  @Override
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
+  }
 
-	@Override
-	public float indexerScore(String url, NutchDocument doc, WebPage page,
-			float initScore) throws ScoringFilterException {
-		List<String> tlds = doc.getFieldValues("tld");
-		float boost = 1.0f;
+  @Override
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException {
+    List<String> tlds = doc.getFieldValues("tld");
+    float boost = 1.0f;
 
-		if (tlds != null) {
-			for (String tld : tlds) {
-				DomainSuffix entry = tldEntries.get(tld);
-				if (entry != null)
-					boost *= entry.getBoost();
-			}
-		}
-		return initScore * boost;
-	}
+    if (tlds != null) {
+      for (String tld : tlds) {
+        DomainSuffix entry = tldEntries.get(tld);
+        if (entry != null)
+          boost *= entry.getBoost();
+      }
+    }
+    return initScore * boost;
+  }
 
 }
Index: src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java
===================================================================
--- src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java	(revision 1650444)
+++ src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java	(working copy)
@@ -28,11 +28,10 @@
 import org.junit.Test;
 
 /**
- * JUnit test case which populates a HashMap
- * with URL's and top level domain qualifiers 
- * as key's and value's respectively.
- * We assert that each value entry in the HashMap equals 
- * the expect field value for the document after being filtered.
+ * JUnit test case which populates a HashMap with URL's and top level domain
+ * qualifiers as key's and value's respectively. We assert that each value entry
+ * in the HashMap equals the expect field value for the document after being
+ * filtered.
  * 
  */
 
Index: src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(working copy)
@@ -32,12 +32,11 @@
 import org.apache.nutch.urlfilter.api.RegexRule;
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 
-
 /**
- * RegexURLFilterBase implementation based on the
- * <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
- * Finite-State Automata for Java<sup>TM</sup>.
- *
+ * RegexURLFilterBase implementation based on the <a
+ * href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
+ * Automata for Java<sup>TM</sup>.
+ * 
  * @author J&eacute;r&ocirc;me Charron
  * @see <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
  */
@@ -49,24 +48,24 @@
     super();
   }
 
-  public AutomatonURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public AutomatonURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  AutomatonURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  AutomatonURLFilter(Reader reader) throws IOException,
+      IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_AUTOMATON_RULES);
@@ -81,21 +80,20 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     main(new AutomatonURLFilter(), args);
   }
 
+  private class Rule extends RegexRule {
 
-  private class Rule extends RegexRule {
-    
     private RunAutomaton automaton;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton());
@@ -105,5 +103,5 @@
       return automaton.run(url);
     }
   }
-  
+
 }
Index: src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(working copy)
@@ -27,14 +27,13 @@
 import org.junit.Test;
 import static org.junit.Assert.*;
 
-
 /**
  * JUnit based test of class <code>AutomatonURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestAutomatonURLFilter extends RegexURLFilterBaseTest {
-  
+
   protected URLFilter getURLFilter(Reader rules) {
     try {
       return new AutomatonURLFilter(rules);
@@ -43,7 +42,7 @@
       return null;
     }
   }
-  
+
   @Test
   public void test() {
     test("WholeWebCrawling");
Index: src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(working copy)
@@ -35,35 +35,48 @@
 import org.apache.nutch.util.domain.DomainSuffix;
 
 /**
- * <p>Filters URLs based on a file containing domain suffixes, domain names, and
+ * <p>
+ * Filters URLs based on a file containing domain suffixes, domain names, and
  * hostnames. Only a url that matches one of the suffixes, domains, or hosts
- * present in the file is allowed.</p>
+ * present in the file is allowed.
+ * </p>
  * 
- * <p>Urls are checked in order of domain suffix, domain name, and hostname
- * against entries in the domain file. The domain file would be setup as follows
- * with one entry per line:
+ * <p>
+ * Urls are checked in order of domain suffix, domain name, and hostname against
+ * entries in the domain file. The domain file would be setup as follows with
+ * one entry per line:
  * 
- * <pre> com apache.org www.apache.org </pre>
+ * <pre>
+ * com apache.org www.apache.org
+ * </pre>
  * 
- * <p>The first line is an example of a filter that would allow all .com
- * domains. The second line allows all urls from apache.org and all of its
- * subdomains such as lucene.apache.org and hadoop.apache.org. The third line
- * would allow only urls from www.apache.org. There is no specific ordering to
- * entries. The entries are from more general to more specific with the more
- * general overridding the more specific.</p>
+ * <p>
+ * The first line is an example of a filter that would allow all .com domains.
+ * The second line allows all urls from apache.org and all of its subdomains
+ * such as lucene.apache.org and hadoop.apache.org. The third line would allow
+ * only urls from www.apache.org. There is no specific ordering to entries. The
+ * entries are from more general to more specific with the more general
+ * overridding the more specific.
+ * </p>
  * 
  * The domain file defaults to domain-urlfilter.txt in the classpath but can be
  * overridden using the:
  * 
- * <ul> <ol>property "urlfilter.domain.file" in ./conf/nutch-*.xml, and</ol>
- * <ol>attribute "file" in plugin.xml of this plugin</ol> </ul>
+ * <ul>
+ * <ol>
+ * property "urlfilter.domain.file" in ./conf/nutch-*.xml, and
+ * </ol>
+ * <ol>
+ * attribute "file" in plugin.xml of this plugin
+ * </ol>
+ * </ul>
  * 
  * the attribute "file" has higher precedence if defined.
  */
-public class DomainURLFilter
-  implements URLFilter {
+public class DomainURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -71,8 +84,7 @@
   private String domainFile = null;
   private Set<String> domainSet = new LinkedHashSet<String>();
 
-  private void readConfiguration(Reader configReader)
-    throws IOException {
+  private void readConfiguration(Reader configReader) throws IOException {
 
     // read the configuration file, line by line
     BufferedReader reader = new BufferedReader(configReader);
@@ -95,7 +107,8 @@
   /**
    * Constructor that specifies the domain file to use.
    * 
-   * @param domainFile The domain file, overrides domain-urlfilter.text default.
+   * @param domainFile
+   *          The domain file, overrides domain-urlfilter.text default.
    * 
    * @throws IOException
    */
@@ -111,8 +124,8 @@
 
     // get the extensions for domain urlfilter
     String pluginName = "urlfilter-domain";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-      URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -120,32 +133,30 @@
         break;
       }
     }
-    
+
     // handle blank non empty input
     if (attributeFile != null && attributeFile.trim().equals("")) {
       attributeFile = null;
     }
-    
+
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
         LOG.info("Attribute \"file\" is defined for plugin " + pluginName
-          + " as " + attributeFile);
+            + " as " + attributeFile);
       }
-    }
-    else {
+    } else {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin "
-          + pluginName);
+            + pluginName);
       }
     }
 
     // domain file and attribute "file" take precedence if defined
-    String file = conf.get("urlfilter.domain.file");    
+    String file = conf.get("urlfilter.domain.file");
     String stringRules = conf.get("urlfilter.domain.rules");
     if (domainFile != null) {
       file = domainFile;
-    }
-    else if (attributeFile != null) {
+    } else if (attributeFile != null) {
       file = attributeFile;
     }
     Reader reader = null;
@@ -159,8 +170,7 @@
         reader = new FileReader(file);
       }
       readConfiguration(reader);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
     }
   }
@@ -173,7 +183,7 @@
 
     try {
 
-      // match for suffix, domain, and host in that order.  more general will
+      // match for suffix, domain, and host in that order. more general will
       // override more specific
       String domain = URLUtil.getDomainName(url).toLowerCase().trim();
       String host = URLUtil.getHost(url);
@@ -182,20 +192,19 @@
       if (domainSuffix != null) {
         suffix = domainSuffix.getDomain();
       }
-      
+
       if (domainSet.contains(suffix) || domainSet.contains(domain)
-        || domainSet.contains(host)) {
+          || domainSet.contains(host)) {
         return url;
       }
 
       // doesn't match, don't allow
       return null;
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // if an error happens, allow the url to pass
       LOG.error("Could not apply filter on url: " + url + "\n"
-        + org.apache.hadoop.util.StringUtils.stringifyException(e));
+          + org.apache.hadoop.util.StringUtils.stringifyException(e));
       return null;
     }
   }
Index: src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(working copy)
@@ -26,14 +26,14 @@
 
 public class TestDomainURLFilter {
 
-  protected static final Logger LOG = LoggerFactory.getLogger(TestDomainURLFilter.class);
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(TestDomainURLFilter.class);
 
   private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
 
   @Test
-  public void testFilter()
-    throws Exception {
+  public void testFilter() throws Exception {
 
     String domainFile = SAMPLES + SEPARATOR + "hosts.txt";
     Configuration conf = NutchConfiguration.create();
Index: src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
===================================================================
--- src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(working copy)
@@ -41,16 +41,19 @@
 import java.util.ArrayList;
 
 /**
- * Filters URLs based on a file of URL prefixes. The file is named by
- * (1) property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and
- * (2) attribute "file" in plugin.xml of this plugin
- * Attribute "file" has higher precedence if defined.
- *
- * <p>The format of this file is one URL prefix per line.</p>
+ * Filters URLs based on a file of URL prefixes. The file is named by (1)
+ * property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and (2)
+ * attribute "file" in plugin.xml of this plugin Attribute "file" has higher
+ * precedence if defined.
+ * 
+ * <p>
+ * The format of this file is one URL prefix per line.
+ * </p>
  */
 public class PrefixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(PrefixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(PrefixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -60,7 +63,7 @@
   private Configuration conf;
 
   public PrefixURLFilter() throws IOException {
-   
+
   }
 
   public PrefixURLFilter(String stringRules) throws IOException {
@@ -74,23 +77,24 @@
       return url;
   }
 
-  private TrieStringMatcher readConfiguration(Reader reader)
-    throws IOException {
-    
-    BufferedReader in=new BufferedReader(reader);
+  private TrieStringMatcher readConfiguration(Reader reader) throws IOException {
+
+    BufferedReader in = new BufferedReader(reader);
     List<String> urlprefixes = new ArrayList<String>();
     String line;
 
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0)
         continue;
 
-      char first=line.charAt(0);
+      char first = line.charAt(0);
       switch (first) {
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-	urlprefixes.add(line);
+      default:
+        urlprefixes.add(line);
       }
     }
 
@@ -97,20 +101,19 @@
     return new PrefixStringMatcher(urlprefixes);
   }
 
-  public static void main(String args[])
-    throws IOException {
-    
+  public static void main(String args[]) throws IOException {
+
     PrefixURLFilter filter;
     if (args.length >= 1)
       filter = new PrefixURLFilter(args[0]);
     else
       filter = new PrefixURLFilter();
-    
-    BufferedReader in=new BufferedReader(new InputStreamReader(System.in));
+
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.println(out);
       }
     }
@@ -120,8 +123,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-prefix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-        URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -138,8 +141,8 @@
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
@@ -161,7 +164,9 @@
       try {
         trie = readConfiguration(reader);
       } catch (IOException e) {
-        if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+        if (LOG.isErrorEnabled()) {
+          LOG.error(e.getMessage());
+        }
         // TODO mb@media-style.com: throw Exception? Because broken api.
         throw new RuntimeException(e.getMessage(), e);
       }
@@ -171,5 +176,5 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
 }
Index: src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
===================================================================
--- src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java	(working copy)
@@ -23,39 +23,23 @@
 
 import java.io.IOException;
 
-
 /**
  * JUnit test for <code>PrefixURLFilter</code>.
- *
+ * 
  * @author Talat Uyarer
  * @author Cihad Guzel
  */
 public class TestPrefixURLFilter extends TestCase {
-  private static final String prefixes =
-    "# this is a comment\n" +
-    "\n" +
-    "http://\n" +
-    "https://\n" +
-    "file://\n" +
-    "ftp://\n";
+  private static final String prefixes = "# this is a comment\n" + "\n"
+      + "http://\n" + "https://\n" + "file://\n" + "ftp://\n";
 
   private static final String[] urls = new String[] {
-    "http://www.example.com/",
-    "https://www.example.com/",
-    "ftp://www.example.com/",
-    "file://www.example.com/",
-    "abcd://www.example.com/",
-    "www.example.com/",
-  };
+      "http://www.example.com/", "https://www.example.com/",
+      "ftp://www.example.com/", "file://www.example.com/",
+      "abcd://www.example.com/", "www.example.com/", };
 
-  private static String[] urlsModeAccept = new String[] {
-    urls[0],
-    urls[1],
-    urls[2],
-    urls[3],
-    null,
-    null
-  };
+  private static String[] urlsModeAccept = new String[] { urls[0], urls[1],
+      urls[2], urls[3], null, null };
 
   private PrefixURLFilter filter = null;
 
Index: src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(working copy)
@@ -28,13 +28,12 @@
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * Filters URLs based on a file of regular expressions using the
  * {@link java.util.regex Java Regex implementation}.
  */
 public class RegexURLFilter extends RegexURLFilterBase {
-  
+
   public static final String URLFILTER_REGEX_FILE = "urlfilter.regex.file";
   public static final String URLFILTER_REGEX_RULES = "urlfilter.regex.rules";
 
@@ -42,24 +41,23 @@
     super();
   }
 
-  public RegexURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public RegexURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  RegexURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  RegexURLFilter(Reader reader) throws IOException, IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_REGEX_RULES);
@@ -74,12 +72,12 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     RegexURLFilter filter = new RegexURLFilter();
     filter.setConf(NutchConfiguration.create());
@@ -86,11 +84,10 @@
     main(filter, args);
   }
 
+  private class Rule extends RegexRule {
 
-  private class Rule extends RegexRule {
-    
     private Pattern pattern;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       pattern = Pattern.compile(regex);
@@ -100,5 +97,5 @@
       return pattern.matcher(url).find();
     }
   }
-  
+
 }
Index: src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(working copy)
@@ -28,11 +28,11 @@
 
 /**
  * JUnit based test of class <code>RegexURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestRegexURLFilter extends RegexURLFilterBaseTest {
-  
+
   protected URLFilter getURLFilter(Reader rules) {
     try {
       return new RegexURLFilter(rules);
@@ -41,7 +41,7 @@
       return null;
     }
   }
-  
+
   @Test
   public void test() {
     test("WholeWebCrawling");
Index: src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(working copy)
@@ -51,14 +51,15 @@
  * Attribute "file" has higher precedence if defined. If the config file is
  * missing, all URLs will be rejected.
  * 
- * <p>This filter can be configured to work in one of two modes:
+ * <p>
+ * This filter can be configured to work in one of two modes:
  * <ul>
- * <li><b>default to reject</b> ('-'): in this mode, only URLs that match suffixes
- * specified in the config file will be accepted, all other URLs will be
- * rejected.</li>
- * <li><b>default to accept</b> ('+'): in this mode, only URLs that match suffixes
- * specified in the config file will be rejected, all other URLs will be
- * accepted.</li>
+ * <li><b>default to reject</b> ('-'): in this mode, only URLs that match
+ * suffixes specified in the config file will be accepted, all other URLs will
+ * be rejected.</li>
+ * <li><b>default to accept</b> ('+'): in this mode, only URLs that match
+ * suffixes specified in the config file will be rejected, all other URLs will
+ * be accepted.</li>
  * </ul>
  * <p>
  * The format of this config file is one URL suffix per line, with no preceding
@@ -67,10 +68,10 @@
  * </p>
  * <p>
  * A single '+' or '-' sign not followed by any suffix must be used once, to
- * signify the mode this plugin operates in. An optional single 'I' can be appended,
- * to signify that suffix matches should be case-insensitive. The default, if 
- * not specified, is to use case-sensitive matches, i.e. suffix '.JPG'
- * does not match '.jpg'.
+ * signify the mode this plugin operates in. An optional single 'I' can be
+ * appended, to signify that suffix matches should be case-insensitive. The
+ * default, if not specified, is to use case-sensitive matches, i.e. suffix
+ * '.JPG' does not match '.jpg'.
  * </p>
  * <p>
  * NOTE: the format of this file is different from urlfilter-prefix, because
@@ -82,8 +83,8 @@
  * <h4>Example 1</h4>
  * <p>
  * The configuration shown below will accept all URLs with '.html' or '.htm'
- * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected),
- * and prohibit all other suffixes.
+ * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected), and prohibit
+ * all other suffixes.
  * <p>
  * 
  * <pre>
@@ -91,7 +92,7 @@
  *  
  *  # prohibit all unknown, case-sensitive matching
  *  -
- *
+ * 
  *  # collect only HTML files.
  *  .html
  *  .htm
@@ -119,11 +120,13 @@
  * </pre>
  * 
  * </p>
+ * 
  * @author Andrzej Bialecki
  */
 public class SuffixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SuffixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SuffixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private String attributeFile = null;
@@ -144,11 +147,13 @@
   }
 
   public String filter(String url) {
-    if (url == null) return null;
+    if (url == null)
+      return null;
     String _url;
     if (ignoreCase)
       _url = url.toLowerCase();
-    else _url = url;
+    else
+      _url = url;
     if (filterFromPath) {
       try {
         URL pUrl = new URL(_url);
@@ -160,11 +165,15 @@
 
     String a = suffixes.shortestMatch(_url);
     if (a == null) {
-      if (modeAccept) return url;
-      else return null;
+      if (modeAccept)
+        return url;
+      else
+        return null;
     } else {
-      if (modeAccept) return null;
-      else return url;
+      if (modeAccept)
+        return null;
+      else
+        return url;
     }
   }
 
@@ -187,30 +196,31 @@
     String line;
 
     while ((line = in.readLine()) != null) {
-      if (line.length() == 0) continue;
+      if (line.length() == 0)
+        continue;
 
       char first = line.charAt(0);
       switch (first) {
-        case ' ':
-        case '\n':
-        case '#': // skip blank & comment lines
-          break;
-        case '-':
-          allow = false;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        case '+':
-          allow = true;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        default:
-          aSuffixes.add(line);
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
+        break;
+      case '-':
+        allow = false;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      case '+':
+        allow = true;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      default:
+        aSuffixes.add(line);
       }
     }
     if (ignore) {
@@ -249,7 +259,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-suffix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -257,15 +268,17 @@
         break;
       }
     }
-    if (attributeFile != null && attributeFile.trim().equals("")) attributeFile = null;
+    if (attributeFile != null && attributeFile.trim().equals(""))
+      attributeFile = null;
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
-        LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile);
+        LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+            + " as " + attributeFile);
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
@@ -272,7 +285,8 @@
     String file = conf.get("urlfilter.suffix.file");
     String stringRules = conf.get("urlfilter.suffix.rules");
     // attribute "file" takes precedence if defined
-    if (attributeFile != null) file = attributeFile;
+    if (attributeFile != null)
+      file = attributeFile;
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
@@ -283,7 +297,9 @@
     try {
       readConfiguration(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
Index: src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(revision 1650444)
+++ src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(working copy)
@@ -23,104 +23,47 @@
 import org.junit.Test;
 import static org.junit.Assert.*;
 
-
 /**
  * JUnit test for <code>SuffixURLFilter</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestSuffixURLFilter {
-  private static final String suffixes =
-    "# this is a comment\n" +
-    "\n" +
-    ".gif\n" +
-    ".jpg\n" +
-    ".js\n";
-  
+  private static final String suffixes = "# this is a comment\n" + "\n"
+      + ".gif\n" + ".jpg\n" + ".js\n";
+
   private static final String[] urls = new String[] {
-    "http://www.example.com/test.gif",
-    "http://www.example.com/TEST.GIF",
-    "http://www.example.com/test.jpg",
-    "http://www.example.com/test.JPG",
-    "http://www.example.com/test.html",
-    "http://www.example.com/test.HTML",
-    "http://www.example.com/test.html?q=abc.js",
-    "http://www.example.com/test.js?foo=bar&baz=bar#12333",
-  };
-  
-  private static String[] urlsModeAccept = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
-  private static String[] urlsModeReject = new String[] {
-    urls[0],
-    null,
-    urls[2],
-    null,
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptIgnoreCase = new String[] {
-    null,
-    null,
-    null,
-    null,
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
- 
-  private static String[] urlsModeRejectIgnoreCase = new String[] {
-    urls[0],
-    urls[1],
-    urls[2],
-    urls[3],
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndNonPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
+      "http://www.example.com/test.gif", "http://www.example.com/TEST.GIF",
+      "http://www.example.com/test.jpg", "http://www.example.com/test.JPG",
+      "http://www.example.com/test.html", "http://www.example.com/test.HTML",
+      "http://www.example.com/test.html?q=abc.js",
+      "http://www.example.com/test.js?foo=bar&baz=bar#12333", };
+
+  private static String[] urlsModeAccept = new String[] { null, urls[1], null,
+      urls[3], urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeReject = new String[] { urls[0], null,
+      urls[2], null, null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptIgnoreCase = new String[] { null, null,
+      null, null, urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeRejectIgnoreCase = new String[] { urls[0],
+      urls[1], urls[2], urls[3], null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptAndPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], urls[6], null };
+
+  private static String[] urlsModeAcceptAndNonPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], null, urls[7] };
+
   private SuffixURLFilter filter = null;
-  
+
   @Before
   public void setUp() throws IOException {
     filter = new SuffixURLFilter(new StringReader(suffixes));
   }
-  
+
   @Test
   public void testModeAccept() {
     filter.setIgnoreCase(false);
@@ -156,7 +99,7 @@
       assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
     }
   }
-  
+
   @Test
   public void testModeAcceptAndNonPathFilter() {
     filter.setModeAccept(true);
@@ -165,7 +108,7 @@
       assertTrue(urlsModeAcceptAndNonPathFilter[i] == filter.filter(urls[i]));
     }
   }
-  
+
   @Test
   public void testModeAcceptAndPathFilter() {
     filter.setModeAccept(true);
Index: src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
===================================================================
--- src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(revision 1650444)
+++ src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(working copy)
@@ -134,8 +134,10 @@
 
   private int maxTldLength;
 
-  private static String TOP_LEVEL_DOMAIN_LENGTH = "urlfilter.tld.length"; // maximum length of TLD
-                                                                          
+  private static String TOP_LEVEL_DOMAIN_LENGTH = "urlfilter.tld.length"; // maximum
+                                                                          // length
+                                                                          // of
+                                                                          // TLD
 
   private static final int TOP_LEVEL_DOMAIN_LENGTH_VALUE = 8;
 
@@ -287,8 +289,7 @@
         segCount++;
       }
       String topLevel = domainSegment[segCount - 1];
-      if (topLevel.length() < 2
-          || topLevel.length() > maxTldLength) {
+      if (topLevel.length() < 2 || topLevel.length() > maxTldLength) {
         return false;
       }
 
Index: src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
===================================================================
--- src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java	(revision 1650444)
+++ src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java	(working copy)
@@ -113,8 +113,7 @@
   }
 
   /**
-   * Generate Sample of Invalid Tld. 
-   * character
+   * Generate Sample of Invalid Tld. character
    */
   public String generateInvalidTld(int length) {
 
Index: src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(working copy)
@@ -39,177 +39,171 @@
  * </ul>
  */
 public class BasicURLNormalizer extends Configured implements URLNormalizer {
-    public static final Logger LOG = LoggerFactory.getLogger(BasicURLNormalizer.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicURLNormalizer.class);
 
-    private Perl5Compiler compiler = new Perl5Compiler();
-    private ThreadLocal<Perl5Matcher> matchers = new ThreadLocal<Perl5Matcher>() {
-        protected Perl5Matcher initialValue() {
-          return new Perl5Matcher();
-        }
-      };
-    private final Rule relativePathRule;
-    private final Rule leadingRelativePathRule;
-    private final Rule currentPathRule;
-    private final Rule adjacentSlashRule;
+  private Perl5Compiler compiler = new Perl5Compiler();
+  private ThreadLocal<Perl5Matcher> matchers = new ThreadLocal<Perl5Matcher>() {
+    protected Perl5Matcher initialValue() {
+      return new Perl5Matcher();
+    }
+  };
+  private final Rule relativePathRule;
+  private final Rule leadingRelativePathRule;
+  private final Rule currentPathRule;
+  private final Rule adjacentSlashRule;
 
-    private Configuration conf;
+  private Configuration conf;
 
-    public BasicURLNormalizer() {
-      try {
-        // this pattern tries to find spots like "/xx/../" in the url, which
-        // could be replaced by "/" xx consists of chars, different then "/"
-        // (slash) and needs to have at least one char different from "."
-        relativePathRule = new Rule();
-        relativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/[^/]*[^/.]{1}[^/]*/\\.\\./)",
-                           Perl5Compiler.READ_ONLY_MASK);
-        relativePathRule.substitution = new Perl5Substitution("/");
+  public BasicURLNormalizer() {
+    try {
+      // this pattern tries to find spots like "/xx/../" in the url, which
+      // could be replaced by "/" xx consists of chars, different then "/"
+      // (slash) and needs to have at least one char different from "."
+      relativePathRule = new Rule();
+      relativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "(/[^/]*[^/.]{1}[^/]*/\\.\\./)", Perl5Compiler.READ_ONLY_MASK);
+      relativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like leading "/../" in the url,
-        // which could be replaced by "/"
-        leadingRelativePathRule = new Rule();
-        leadingRelativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
-        leadingRelativePathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like leading "/../" in the url,
+      // which could be replaced by "/"
+      leadingRelativePathRule = new Rule();
+      leadingRelativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
+      leadingRelativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "/./" in the url,
-        // which could be replaced by "/"
-        currentPathRule = new Rule();
-        currentPathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/\\./)", Perl5Compiler.READ_ONLY_MASK);
-        currentPathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like "/./" in the url,
+      // which could be replaced by "/"
+      currentPathRule = new Rule();
+      currentPathRule.pattern = (Perl5Pattern) compiler.compile("(/\\./)",
+          Perl5Compiler.READ_ONLY_MASK);
+      currentPathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "xx//yy" in the url,
-        // which could be replaced by a "/"
-        adjacentSlashRule = new Rule();
-        adjacentSlashRule.pattern = (Perl5Pattern)      
-          compiler.compile("/{2,}", Perl5Compiler.READ_ONLY_MASK);     
-        adjacentSlashRule.substitution = new Perl5Substitution("/");
-        
-      } catch (MalformedPatternException e) {
-        throw new RuntimeException(e);
-      }
+      // this pattern tries to find spots like "xx//yy" in the url,
+      // which could be replaced by a "/"
+      adjacentSlashRule = new Rule();
+      adjacentSlashRule.pattern = (Perl5Pattern) compiler.compile("/{2,}",
+          Perl5Compiler.READ_ONLY_MASK);
+      adjacentSlashRule.substitution = new Perl5Substitution("/");
+
+    } catch (MalformedPatternException e) {
+      throw new RuntimeException(e);
     }
+  }
 
-    public String normalize(String urlString, String scope)
-            throws MalformedURLException {
-        if ("".equals(urlString))                     // permit empty
-            return urlString;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
+    if ("".equals(urlString)) // permit empty
+      return urlString;
 
-        urlString = urlString.trim();                 // remove extra spaces
+    urlString = urlString.trim(); // remove extra spaces
 
-        URL url = new URL(urlString);
+    URL url = new URL(urlString);
 
-        String protocol = url.getProtocol();
-        String host = url.getHost();
-        int port = url.getPort();
-        String file = url.getFile();
+    String protocol = url.getProtocol();
+    String host = url.getHost();
+    int port = url.getPort();
+    String file = url.getFile();
 
-        boolean changed = false;
+    boolean changed = false;
 
-        if (!urlString.startsWith(protocol))        // protocol was lowercased
-            changed = true;
+    if (!urlString.startsWith(protocol)) // protocol was lowercased
+      changed = true;
 
-        if ("http".equals(protocol) || "https".equals(protocol) || "ftp".equals(protocol)) {
+    if ("http".equals(protocol) || "https".equals(protocol)
+        || "ftp".equals(protocol)) {
 
-            if (host != null) {
-                String newHost = host.toLowerCase();    // lowercase host
-                if (!host.equals(newHost)) {
-                    host = newHost;
-                    changed = true;
-                }
-            }
+      if (host != null) {
+        String newHost = host.toLowerCase(); // lowercase host
+        if (!host.equals(newHost)) {
+          host = newHost;
+          changed = true;
+        }
+      }
 
-            if (port == url.getDefaultPort()) {       // uses default port
-                port = -1;                              // so don't specify it
-                changed = true;
-            }
+      if (port == url.getDefaultPort()) { // uses default port
+        port = -1; // so don't specify it
+        changed = true;
+      }
 
-            if (file == null || "".equals(file)) {    // add a slash
-                file = "/";
-                changed = true;
-            }
+      if (file == null || "".equals(file)) { // add a slash
+        file = "/";
+        changed = true;
+      }
 
-            if (url.getRef() != null) {                 // remove the ref
-                changed = true;
-            }
+      if (url.getRef() != null) { // remove the ref
+        changed = true;
+      }
 
-            // check for unnecessary use of "/../"
-            String file2 = substituteUnnecessaryRelativePaths(file);
+      // check for unnecessary use of "/../"
+      String file2 = substituteUnnecessaryRelativePaths(file);
 
-            if (!file.equals(file2)) {
-                changed = true;
-                file = file2;
-            }
+      if (!file.equals(file2)) {
+        changed = true;
+        file = file2;
+      }
 
-        }
+    }
 
-        if (changed)
-            urlString = new URL(protocol, host, port, file).toString();
+    if (changed)
+      urlString = new URL(protocol, host, port, file).toString();
 
-        return urlString;
-    }
+    return urlString;
+  }
 
-    private String substituteUnnecessaryRelativePaths(String file) {
-        String fileWorkCopy = file;
-        int oldLen = file.length();
-        int newLen = oldLen - 1;
+  private String substituteUnnecessaryRelativePaths(String file) {
+    String fileWorkCopy = file;
+    int oldLen = file.length();
+    int newLen = oldLen - 1;
 
-        // All substitutions will be done step by step, to ensure that certain
-        // constellations will be normalized, too
-        //
-        // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
-        // following manner:
-        //   "/aa/bb/../../cc/../foo.html"
-        //   "/aa/../cc/../foo.html"
-        //   "/cc/../foo.html"
-        //   "/foo.html"
-        //
-        // The normalization also takes care of leading "/../", which will be
-        // replaced by "/", because this is a rather a sign of bad webserver
-        // configuration than of a wanted link.  For example, urls like
-        // "http://www.foo.com/../" should return a http 404 error instead of
-        // redirecting to "http://www.foo.com".
-        //
-        Perl5Matcher matcher = matchers.get();
+    // All substitutions will be done step by step, to ensure that certain
+    // constellations will be normalized, too
+    //
+    // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
+    // following manner:
+    // "/aa/bb/../../cc/../foo.html"
+    // "/aa/../cc/../foo.html"
+    // "/cc/../foo.html"
+    // "/foo.html"
+    //
+    // The normalization also takes care of leading "/../", which will be
+    // replaced by "/", because this is a rather a sign of bad webserver
+    // configuration than of a wanted link. For example, urls like
+    // "http://www.foo.com/../" should return a http 404 error instead of
+    // redirecting to "http://www.foo.com".
+    //
+    Perl5Matcher matcher = matchers.get();
 
-        while (oldLen != newLen) {
-            // substitue first occurence of "/xx/../" by "/"
-            oldLen = fileWorkCopy.length();
-            fileWorkCopy = Util.substitute
-              (matcher, relativePathRule.pattern,
-               relativePathRule.substitution, fileWorkCopy, 1);
+    while (oldLen != newLen) {
+      // substitue first occurence of "/xx/../" by "/"
+      oldLen = fileWorkCopy.length();
+      fileWorkCopy = Util.substitute(matcher, relativePathRule.pattern,
+          relativePathRule.substitution, fileWorkCopy, 1);
 
-            // remove leading "/../"
-            fileWorkCopy = Util.substitute
-              (matcher, leadingRelativePathRule.pattern,
-               leadingRelativePathRule.substitution, fileWorkCopy, 1);
+      // remove leading "/../"
+      fileWorkCopy = Util.substitute(matcher, leadingRelativePathRule.pattern,
+          leadingRelativePathRule.substitution, fileWorkCopy, 1);
 
-            // remove unnecessary "/./"
-            fileWorkCopy = Util.substitute
-            (matcher, currentPathRule.pattern,
-            		currentPathRule.substitution, fileWorkCopy, 1);
-            
-            
-            // collapse adjacent slashes with "/"
-            fileWorkCopy = Util.substitute
-            (matcher, adjacentSlashRule.pattern,
-              adjacentSlashRule.substitution, fileWorkCopy, 1);
-            
-            newLen = fileWorkCopy.length();
-        }
+      // remove unnecessary "/./"
+      fileWorkCopy = Util.substitute(matcher, currentPathRule.pattern,
+          currentPathRule.substitution, fileWorkCopy, 1);
 
-        return fileWorkCopy;
+      // collapse adjacent slashes with "/"
+      fileWorkCopy = Util.substitute(matcher, adjacentSlashRule.pattern,
+          adjacentSlashRule.substitution, fileWorkCopy, 1);
+
+      newLen = fileWorkCopy.length();
     }
 
+    return fileWorkCopy;
+  }
 
-    /**
-     * Class which holds a compiled pattern and its corresponding substition
-     * string.
-     */
-    private static class Rule {
-        public Perl5Pattern pattern;
-        public Perl5Substitution substitution;
-    }
+  /**
+   * Class which holds a compiled pattern and its corresponding substition
+   * string.
+   */
+  private static class Rule {
+    public Perl5Pattern pattern;
+    public Perl5Substitution substitution;
+  }
 }
-
Index: src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java	(revision 1650444)
+++ src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * and dot segments in path.
  */
 package org.apache.nutch.net.urlnormalizer.basic;
+
Index: src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(working copy)
@@ -29,7 +29,7 @@
 public class TestBasicURLNormalizer {
   private BasicURLNormalizer normalizer;
   private Configuration conf;
-  
+
   @Before
   public void setUp() {
     normalizer = new BasicURLNormalizer();
@@ -59,57 +59,47 @@
     // check that references are removed
     normalizeTest("http://foo.com/foo.html#ref", "http://foo.com/foo.html");
 
-    //     // check that encoding is normalized
-    //     normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
+    // // check that encoding is normalized
+    // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
 
     // check that unnecessary "../" are removed
 
-    normalizeTest("http://foo.com/aa/./foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/aa/../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/aa/bb/../",
-                  "http://foo.com/aa/");
-    normalizeTest("http://foo.com/aa/..",
-                  "http://foo.com/aa/..");
+    normalizeTest("http://foo.com/aa/./foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/aa/../", "http://foo.com/");
+    normalizeTest("http://foo.com/aa/bb/../", "http://foo.com/aa/");
+    normalizeTest("http://foo.com/aa/..", "http://foo.com/aa/..");
     normalizeTest("http://foo.com/aa/bb/cc/../../foo.html",
-                  "http://foo.com/aa/foo.html");
+        "http://foo.com/aa/foo.html");
     normalizeTest("http://foo.com/aa/bb/../cc/dd/../ee/foo.html",
-                  "http://foo.com/aa/cc/ee/foo.html");
-    normalizeTest("http://foo.com/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/aa/../../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/aa/cc/ee/foo.html");
+    normalizeTest("http://foo.com/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/aa/../../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/aa/../bb/../foo.html/../../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/../aa/foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/");
+    normalizeTest("http://foo.com/../aa/foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/a..a/foo.html",
-                  "http://foo.com/a..a/foo.html" );
-    normalizeTest("http://foo.com/a..a/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/a..a/foo.html");
+    normalizeTest("http://foo.com/a..a/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/foo.foo/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/foo.html");
     normalizeTest("http://foo.com//aa/bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa//bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa/bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com//aa//bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com////aa////bb////foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
   }
 
   private void normalizeTest(String weird, String normal) throws Exception {
-    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals(normal,
+        normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
   }
 
 }
\ No newline at end of file
Index: src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(working copy)
@@ -24,7 +24,8 @@
 
 /**
  * This URLNormalizer doesn't change urls. It is sometimes useful if for a given
- * scope at least one normalizer must be defined but no transformations are required.
+ * scope at least one normalizer must be defined but no transformations are
+ * required.
  * 
  * @author Andrzej Bialecki
  */
@@ -31,8 +32,9 @@
 public class PassURLNormalizer implements URLNormalizer {
 
   private Configuration conf;
-  
-  public String normalize(String urlString, String scope) throws MalformedURLException {
+
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
     return urlString;
   }
 
@@ -41,7 +43,7 @@
   }
 
   public void setConf(Configuration conf) {
-    this.conf = conf;    
+    this.conf = conf;
   }
 
 }
Index: src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java	(revision 1650444)
+++ src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * one URL normalizer must be defined in any scope.
  */
 package org.apache.nutch.net.urlnormalizer.pass;
+
Index: src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.net.urlnormalizer.pass;
 
-
 import java.net.MalformedURLException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -31,7 +30,7 @@
   @Test
   public void testPassURLNormalizer() {
     Configuration conf = NutchConfiguration.create();
-    
+
     PassURLNormalizer normalizer = new PassURLNormalizer();
     normalizer.setConf(conf);
     String url = "http://www.example.com/test/..//";
@@ -41,7 +40,7 @@
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
-    
+
     assertEquals(url, result);
   }
 }
Index: src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(working copy)
@@ -51,12 +51,15 @@
  * Allows users to do regex substitutions on all/any URLs that are encountered,
  * which is useful for stripping session IDs from URLs.
  * 
- * <p>This class uses the <tt>urlnormalizer.regex.file</tt> property.
- * It should be set to the file name of an xml file which should contain the
- * patterns and substitutions to be done on encountered URLs.
+ * <p>
+ * This class uses the <tt>urlnormalizer.regex.file</tt> property. It should be
+ * set to the file name of an xml file which should contain the patterns and
+ * substitutions to be done on encountered URLs.
  * </p>
- * <p>This class also supports different rules depending on the scope. Please see
- * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.</p>
+ * <p>
+ * This class also supports different rules depending on the scope. Please see
+ * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.
+ * </p>
  * 
  * @author Luke Baker
  * @author Andrzej Bialecki
@@ -63,7 +66,8 @@
  */
 public class RegexURLNormalizer extends Configured implements URLNormalizer {
 
-  private static final Logger LOG = LoggerFactory.getLogger(RegexURLNormalizer.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLNormalizer.class);
 
   /**
    * Class which holds a compiled pattern and its corresponding substition
@@ -75,19 +79,18 @@
     public String substitution;
   }
 
-  private ThreadLocal<HashMap<String, List<Rule>>> scopedRulesThreadLocal = 
-      new ThreadLocal<HashMap<String,List<Rule>>>() {
-    protected java.util.HashMap<String,java.util.List<Rule>> initialValue() {
+  private ThreadLocal<HashMap<String, List<Rule>>> scopedRulesThreadLocal = new ThreadLocal<HashMap<String, List<Rule>>>() {
+    protected java.util.HashMap<String, java.util.List<Rule>> initialValue() {
       return new HashMap<String, List<Rule>>();
     };
   };
-  
+
   public HashMap<String, List<Rule>> getScopedRules() {
     return scopedRulesThreadLocal.get();
   }
-  
-  private List<Rule> defaultRules; 
-  
+
+  private List<Rule> defaultRules;
+
   private static final List<Rule> EMPTY_RULES = Collections.emptyList();
 
   /**
@@ -107,7 +110,7 @@
    * configuration files for it.
    */
   public RegexURLNormalizer(Configuration conf, String filename)
-          throws IOException, PatternSyntaxException {
+      throws IOException, PatternSyntaxException {
     super(conf);
     List<Rule> rules = readConfigurationFile(filename);
     if (rules != null) {
@@ -117,7 +120,8 @@
 
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     // the default constructor was called
 
     String filename = getConf().get("urlnormalizer.regex.file");
@@ -147,9 +151,10 @@
   void setConfiguration(Reader reader, String scope) {
     List<Rule> rules = readConfiguration(reader);
     getScopedRules().put(scope, rules);
-    LOG.debug("Set config for scope '" + scope + "': " + rules.size() + " rules.");
+    LOG.debug("Set config for scope '" + scope + "': " + rules.size()
+        + " rules.");
   }
-  
+
   /**
    * This function does the replacements by iterating through all the regex
    * patterns. It accepts a string url as input and returns the altered string.
@@ -190,7 +195,7 @@
   }
 
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     return regexNormalize(urlString, scope);
   }
 
@@ -207,7 +212,7 @@
       return EMPTY_RULES;
     }
   }
-  
+
   private List<Rule> readConfiguration(Reader reader) {
     List<Rule> rules = new ArrayList<Rule>();
     try {
@@ -214,10 +219,10 @@
 
       // borrowed heavily from code in Configuration.java
       Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-              .parse(new InputSource(reader));
+          .parse(new InputSource(reader));
       Element root = doc.getDocumentElement();
       if ((!"regex-normalize".equals(root.getTagName()))
-              && (LOG.isErrorEnabled())) {
+          && (LOG.isErrorEnabled())) {
         LOG.error("bad conf file: top-level element not <regex-normalize>");
       }
       NodeList regexes = root.getChildNodes();
@@ -240,7 +245,7 @@
           if ("pattern".equals(field.getTagName()) && field.hasChildNodes())
             patternValue = ((Text) field.getFirstChild()).getData();
           if ("substitution".equals(field.getTagName())
-                  && field.hasChildNodes())
+              && field.hasChildNodes())
             subValue = ((Text) field.getFirstChild()).getData();
           if (!field.hasChildNodes())
             subValue = "";
@@ -251,7 +256,8 @@
             rule.pattern = Pattern.compile(patternValue);
           } catch (PatternSyntaxException e) {
             if (LOG.isErrorEnabled()) {
-              LOG.error("skipped rule: " + patternValue + " -> " + subValue + " : invalid regular expression pattern: " + e);
+              LOG.error("skipped rule: " + patternValue + " -> " + subValue
+                  + " : invalid regular expression pattern: " + e);
             }
             continue;
           }
@@ -265,13 +271,14 @@
       }
       return EMPTY_RULES;
     }
-    if (rules.size() == 0) return EMPTY_RULES;
+    if (rules.size() == 0)
+      return EMPTY_RULES;
     return rules;
   }
 
   /** Spits out patterns and substitutions that are in the configuration file. */
   public static void main(String args[]) throws PatternSyntaxException,
-          IOException {
+      IOException {
     RegexURLNormalizer normalizer = new RegexURLNormalizer();
     normalizer.setConf(NutchConfiguration.create());
     HashMap<String, List<Rule>> scopedRules = normalizer.getScopedRules();
@@ -290,9 +297,10 @@
       Iterator<String> it = scopedRules.keySet().iterator();
       while (it.hasNext()) {
         String scope = it.next();
-        if (URLNormalizers.SCOPE_DEFAULT.equals(scope)) continue;
+        if (URLNormalizers.SCOPE_DEFAULT.equals(scope))
+          continue;
         System.out.println("* Rules for '" + scope + "' scope:");
-        i = ((List<Rule>)scopedRules.get(scope)).iterator();
+        i = ((List<Rule>) scopedRules.get(scope)).iterator();
         while (i.hasNext()) {
           Rule r = (Rule) i.next();
           System.out.print("  " + r.pattern.pattern() + " -> ");
@@ -303,10 +311,12 @@
     if (args.length > 0) {
       System.out.println("\n---------- Normalizer test -----------");
       String scope = URLNormalizers.SCOPE_DEFAULT;
-      if (args.length > 1) scope = args[1];
+      if (args.length > 1)
+        scope = args[1];
       System.out.println("Scope: " + scope);
       System.out.println("Input url:  '" + args[0] + "'");
-      System.out.println("Output url: '" + normalizer.normalize(args[0], scope) + "'");
+      System.out.println("Output url: '" + normalizer.normalize(args[0], scope)
+          + "'");
     }
     System.exit(0);
   }
Index: src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java	(revision 1650444)
+++ src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java	(working copy)
@@ -20,3 +20,4 @@
  * ({@link java.util.regex.Pattern}).
  */
 package org.apache.nutch.net.urlnormalizer.regex;
+
Index: src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(revision 1650444)
+++ src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(working copy)
@@ -40,17 +40,19 @@
 
 /** Unit tests for RegexUrlNormalizer. */
 public class TestRegexURLNormalizer {
-  private static final Logger LOG = LoggerFactory.getLogger(TestRegexURLNormalizer.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(TestRegexURLNormalizer.class);
+
   private RegexURLNormalizer normalizer;
   private Configuration conf;
   private HashMap<String, NormalizedURL[]> testData = new HashMap<String, NormalizedURL[]>();
-  
+
   // This system property is defined in ./src/plugin/build-plugin.xml
   private String sampleDir = System.getProperty("test.data", ".");
+
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/urlnormalizer-regex/build.xml during plugin compilation.
-  
+
   @Before
   public void setUp() throws IOException {
     normalizer = new RegexURLNormalizer();
@@ -58,7 +60,8 @@
     normalizer.setConf(conf);
     File[] configs = new File(sampleDir).listFiles(new FileFilter() {
       public boolean accept(File f) {
-        if (f.getName().endsWith(".xml") && f.getName().startsWith("regex-normalize-"))
+        if (f.getName().endsWith(".xml")
+            && f.getName().startsWith("regex-normalize-"))
           return true;
         return false;
       }
@@ -79,8 +82,8 @@
 
   @Test
   public void testNormalizerDefault() throws Exception {
-    normalizeTest((NormalizedURL[])testData.get(URLNormalizers.SCOPE_DEFAULT),
-            URLNormalizers.SCOPE_DEFAULT);
+    normalizeTest((NormalizedURL[]) testData.get(URLNormalizers.SCOPE_DEFAULT),
+        URLNormalizers.SCOPE_DEFAULT);
   }
 
   @Test
@@ -88,37 +91,32 @@
     Iterator<String> it = testData.keySet().iterator();
     while (it.hasNext()) {
       String scope = it.next();
-      normalizeTest((NormalizedURL[])testData.get(scope), scope);
+      normalizeTest((NormalizedURL[]) testData.get(scope), scope);
     }
   }
 
-  private void normalizeTest(NormalizedURL[] urls, String scope) throws Exception {
+  private void normalizeTest(NormalizedURL[] urls, String scope)
+      throws Exception {
     for (int i = 0; i < urls.length; i++) {
       String url = urls[i].url;
       String normalized = normalizer.normalize(urls[i].url, scope);
       String expected = urls[i].expectedURL;
-      LOG.info("scope: " + scope + " url: " + url + " | normalized: " + normalized + " | expected: " + expected);
+      LOG.info("scope: " + scope + " url: " + url + " | normalized: "
+          + normalized + " | expected: " + expected);
       assertEquals(urls[i].expectedURL, normalized);
     }
   }
-	
-  /** Currently this is not being used in this class
-  private void bench(int loops, String scope) {
-    long start = System.currentTimeMillis();
-    try {
-      NormalizedURL[] expected = (NormalizedURL[])testData.get(scope);
-      if (expected == null) return;
-      for (int i = 0; i < loops; i++) {
-        normalizeTest(expected, scope);
-      }
-    } catch (Exception e) {
-      fail(e.toString());
-    }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis() - start) + "ms");
-  }
-  */
 
+  /**
+   * Currently this is not being used in this class private void bench(int
+   * loops, String scope) { long start = System.currentTimeMillis(); try {
+   * NormalizedURL[] expected = (NormalizedURL[])testData.get(scope); if
+   * (expected == null) return; for (int i = 0; i < loops; i++) {
+   * normalizeTest(expected, scope); } } catch (Exception e) {
+   * fail(e.toString()); } LOG.info("bench time (" + loops + ") " +
+   * (System.currentTimeMillis() - start) + "ms"); }
+   */
+
   private static class NormalizedURL {
     String url;
     String expectedURL;
@@ -132,16 +130,17 @@
 
   private NormalizedURL[] readTestFile(String scope) throws IOException {
     File f = new File(sampleDir, "regex-normalize-" + scope + ".test");
-    BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
+    BufferedReader in = new BufferedReader(new InputStreamReader(
+        new FileInputStream(f), "UTF-8"));
     List<NormalizedURL> list = new ArrayList<NormalizedURL>();
     String line;
-    while((line = in.readLine()) != null) {
-      if (  line.trim().length() == 0 ||
-            line.startsWith("#") ||
-            line.startsWith(" ")) continue;
+    while ((line = in.readLine()) != null) {
+      if (line.trim().length() == 0 || line.startsWith("#")
+          || line.startsWith(" "))
+        continue;
       list.add(new NormalizedURL(line));
     }
     in.close();
     return (NormalizedURL[]) list.toArray(new NormalizedURL[list.size()]);
-  }  
+  }
 }
Index: src/test/org/apache/nutch/api/TestAPI.java
===================================================================
--- src/test/org/apache/nutch/api/TestAPI.java	(revision 1650444)
+++ src/test/org/apache/nutch/api/TestAPI.java	(working copy)
@@ -37,186 +37,189 @@
 
 public class TestAPI {
   @Test
-  public void test() throws Exception {}
-//  
-//  private static NutchServer server;
-//  ClientResource cli;
-//  
-//  private static String baseUrl = "http://localhost:8192/nutch/";
-//  
-//  @BeforeClass
-//  public static void before() throws Exception {
-//    server = new NutchServer(8192);
-//    server.start();
-//  }
-//  
-//  @AfterClass
-//  public static void after() throws Exception {
-//    if (!server.stop(false)) {
-//      for (int i = 1; i < 11; i++) {
-//        System.err.println("Waiting for jobs to complete - " + i + "s");
-//        try {
-//          Thread.sleep(1000);
-//        } catch (Exception e) {};
-//        server.stop(false);
-//        if (!server.isRunning()) {
-//          break;
-//        }
-//      }
-//    }
-//    if (server.isRunning()) {
-//      System.err.println("Forcibly stopping server...");
-//      server.stop(true);
-//    }
-//  }
-//  
-//  @Test
-//  public void testInfoAPI() throws Exception {
-//    ClientResource cli = new ClientResource(baseUrl);
-//    String expected = "[[\"admin\",\"Service admin actions\"],[\"confs\",\"Configuration manager\"],[\"db\",\"DB data streaming\"],[\"jobs\",\"Job manager\"]]";
-//    String got = cli.get().getText();
-//    assertEquals(expected, got);
-//  }
-//  
-//  @SuppressWarnings("rawtypes")
-//  @Test
-//  public void testConfsAPI() throws Exception {
-//    ClientResource cli = new ClientResource(baseUrl + ConfResource.PATH);
-//    assertEquals("[\"default\"]", cli.get().getText());
-//    // create
-//    Map<String,Object> map = new HashMap<String,Object>();
-//    map.put(Params.CONF_ID, "test");
-//    HashMap<String,String> props = new HashMap<String,String>();
-//    props.put("testProp", "blurfl");
-//    map.put(Params.PROPS, props);
-//    JacksonRepresentation<Map<String,Object>> jr =
-//      new JacksonRepresentation<Map<String,Object>>(map);
-//    System.out.println(cli.put(jr).getText());
-//    assertEquals("[\"default\",\"test\"]", cli.get().getText());
-//    cli = new ClientResource(baseUrl + ConfResource.PATH + "/test");
-//    Map res = cli.get(Map.class);
-//    assertEquals("blurfl", res.get("testProp"));
-//    // delete
-//    cli.delete();
-//    cli = new ClientResource(baseUrl + ConfResource.PATH);
-//    assertEquals("[\"default\"]", cli.get().getText());
-//  }
-//  
-//  @SuppressWarnings("rawtypes")
-//  @Test
-//  public void testJobsAPI() throws Exception {
-//    ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
-//    assertEquals("[]", cli.get().getText());
-//    // create
-//    Map<String,Object> map = new HashMap<String,Object>();
-//    map.put(Params.JOB_TYPE, JobType.READDB.toString());
-//    map.put(Params.CONF_ID, "default");
-//    Representation r = cli.put(map);
-//    String jobId = r.getText();
-//    assertNotNull(jobId);
-//    assertTrue(jobId.startsWith("default-READDB-"));
-//    // list
-//    Map[] list = cli.get(Map[].class);
-//    assertEquals(1, list.length);
-//    String id = (String)list[0].get("id");
-//    String state = (String)list[0].get("state");
-//    assertEquals(jobId, id);
-//    assertEquals(state, "RUNNING");
-//    int cnt = 10;
-//    do {
-//      try {
-//        Thread.sleep(2000);
-//      } catch (Exception e) {};
-//      list = cli.get(Map[].class);
-//      state = (String)list[0].get("state");
-//      if (!state.equals("RUNNING")) {
-//        break;
-//      }
-//    } while (--cnt > 0);
-//    assertTrue(cnt > 0);
-//    if (list == null) return;
-//    for (Map m : list) {
-//      System.out.println(m);
-//    }
-//  }
-//  
-//  @SuppressWarnings("unchecked")
-//  @Test
-//  public void testStopKill() throws Exception {
-//    ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
-//    // create
-//    Map<String,Object> map = new HashMap<String,Object>();
-//    map.put(Params.JOB_TYPE, JobType.CLASS.toString());
-//    Map<String,Object> args = new HashMap<String,Object>();
-//    map.put(Params.ARGS, args);
-//    args.put(Nutch.ARG_CLASS, SpinningJob.class.getName());
-//    map.put(Params.CONF_ID, "default");
-//    Representation r = cli.put(map);
-//    String jobId = r.getText();
-//    cli.release();
-//    assertNotNull(jobId);
-//    System.out.println(jobId);
-//    assertTrue(jobId.startsWith("default-CLASS-"));
-//    ClientResource stopCli = new ClientResource(baseUrl + JobResource.PATH +
-//        "?job=" + jobId + "&cmd=stop");
-//    r = stopCli.get();
-//    assertEquals("true", r.getText());
-//    stopCli.release();
-//    Thread.sleep(2000); // wait for the job to finish
-//    ClientResource jobCli = new ClientResource(baseUrl + JobResource.PATH + "/" + jobId);
-//    Map<String,Object> res = jobCli.get(Map.class);
-//    res = (Map<String,Object>)res.get("result");
-//    assertEquals("stopped", res.get("res"));
-//    jobCli.release();
-//    // restart and kill
-//    r = cli.put(map);
-//    jobId = r.getText();
-//    cli.release();
-//    assertNotNull(jobId);
-//    System.out.println(jobId);
-//    assertTrue(jobId.startsWith("default-CLASS-"));
-//    ClientResource killCli = new ClientResource(baseUrl + JobResource.PATH +
-//        "?job=" + jobId + "&cmd=abort");
-//    r = killCli.get();
-//    assertEquals("true", r.getText());
-//    killCli.release();
-//    Thread.sleep(2000); // wait for the job to finish
-//    jobCli = new ClientResource(baseUrl + JobResource.PATH + "/" + jobId);
-//    res = jobCli.get(Map.class);
-//    res = (Map<String,Object>)res.get("result");
-//    assertEquals("killed", res.get("res"));
-//    jobCli.release();
-//  }
-//  
-//  public static class SpinningJob extends NutchTool {
-//    volatile boolean shouldStop = false;
-//
-//    @Override
-//    public Map<String, Object> run(Map<String, Object> args) throws Exception {
-//      status.put(Nutch.STAT_MESSAGE, "running");
-//      int cnt = 60;
-//      while (!shouldStop && cnt-- > 0) {
-//        Thread.sleep(1000);
-//      }
-//      if (cnt == 0) {
-//        results.put("res", "failed");
-//      }
-//      return results;
-//    }
-//
-//    @Override
-//    public boolean stopJob() throws Exception {
-//      results.put("res", "stopped");
-//      shouldStop = true;
-//      return true;
-//    }
-//
-//    @Override
-//    public boolean killJob() throws Exception {
-//      results.put("res", "killed");
-//      shouldStop = true;
-//      return true;
-//    }
-//    
-//  }
+  public void test() throws Exception {
+  }
+  //
+  // private static NutchServer server;
+  // ClientResource cli;
+  //
+  // private static String baseUrl = "http://localhost:8192/nutch/";
+  //
+  // @BeforeClass
+  // public static void before() throws Exception {
+  // server = new NutchServer(8192);
+  // server.start();
+  // }
+  //
+  // @AfterClass
+  // public static void after() throws Exception {
+  // if (!server.stop(false)) {
+  // for (int i = 1; i < 11; i++) {
+  // System.err.println("Waiting for jobs to complete - " + i + "s");
+  // try {
+  // Thread.sleep(1000);
+  // } catch (Exception e) {};
+  // server.stop(false);
+  // if (!server.isRunning()) {
+  // break;
+  // }
+  // }
+  // }
+  // if (server.isRunning()) {
+  // System.err.println("Forcibly stopping server...");
+  // server.stop(true);
+  // }
+  // }
+  //
+  // @Test
+  // public void testInfoAPI() throws Exception {
+  // ClientResource cli = new ClientResource(baseUrl);
+  // String expected =
+  // "[[\"admin\",\"Service admin actions\"],[\"confs\",\"Configuration manager\"],[\"db\",\"DB data streaming\"],[\"jobs\",\"Job manager\"]]";
+  // String got = cli.get().getText();
+  // assertEquals(expected, got);
+  // }
+  //
+  // @SuppressWarnings("rawtypes")
+  // @Test
+  // public void testConfsAPI() throws Exception {
+  // ClientResource cli = new ClientResource(baseUrl + ConfResource.PATH);
+  // assertEquals("[\"default\"]", cli.get().getText());
+  // // create
+  // Map<String,Object> map = new HashMap<String,Object>();
+  // map.put(Params.CONF_ID, "test");
+  // HashMap<String,String> props = new HashMap<String,String>();
+  // props.put("testProp", "blurfl");
+  // map.put(Params.PROPS, props);
+  // JacksonRepresentation<Map<String,Object>> jr =
+  // new JacksonRepresentation<Map<String,Object>>(map);
+  // System.out.println(cli.put(jr).getText());
+  // assertEquals("[\"default\",\"test\"]", cli.get().getText());
+  // cli = new ClientResource(baseUrl + ConfResource.PATH + "/test");
+  // Map res = cli.get(Map.class);
+  // assertEquals("blurfl", res.get("testProp"));
+  // // delete
+  // cli.delete();
+  // cli = new ClientResource(baseUrl + ConfResource.PATH);
+  // assertEquals("[\"default\"]", cli.get().getText());
+  // }
+  //
+  // @SuppressWarnings("rawtypes")
+  // @Test
+  // public void testJobsAPI() throws Exception {
+  // ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
+  // assertEquals("[]", cli.get().getText());
+  // // create
+  // Map<String,Object> map = new HashMap<String,Object>();
+  // map.put(Params.JOB_TYPE, JobType.READDB.toString());
+  // map.put(Params.CONF_ID, "default");
+  // Representation r = cli.put(map);
+  // String jobId = r.getText();
+  // assertNotNull(jobId);
+  // assertTrue(jobId.startsWith("default-READDB-"));
+  // // list
+  // Map[] list = cli.get(Map[].class);
+  // assertEquals(1, list.length);
+  // String id = (String)list[0].get("id");
+  // String state = (String)list[0].get("state");
+  // assertEquals(jobId, id);
+  // assertEquals(state, "RUNNING");
+  // int cnt = 10;
+  // do {
+  // try {
+  // Thread.sleep(2000);
+  // } catch (Exception e) {};
+  // list = cli.get(Map[].class);
+  // state = (String)list[0].get("state");
+  // if (!state.equals("RUNNING")) {
+  // break;
+  // }
+  // } while (--cnt > 0);
+  // assertTrue(cnt > 0);
+  // if (list == null) return;
+  // for (Map m : list) {
+  // System.out.println(m);
+  // }
+  // }
+  //
+  // @SuppressWarnings("unchecked")
+  // @Test
+  // public void testStopKill() throws Exception {
+  // ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
+  // // create
+  // Map<String,Object> map = new HashMap<String,Object>();
+  // map.put(Params.JOB_TYPE, JobType.CLASS.toString());
+  // Map<String,Object> args = new HashMap<String,Object>();
+  // map.put(Params.ARGS, args);
+  // args.put(Nutch.ARG_CLASS, SpinningJob.class.getName());
+  // map.put(Params.CONF_ID, "default");
+  // Representation r = cli.put(map);
+  // String jobId = r.getText();
+  // cli.release();
+  // assertNotNull(jobId);
+  // System.out.println(jobId);
+  // assertTrue(jobId.startsWith("default-CLASS-"));
+  // ClientResource stopCli = new ClientResource(baseUrl + JobResource.PATH +
+  // "?job=" + jobId + "&cmd=stop");
+  // r = stopCli.get();
+  // assertEquals("true", r.getText());
+  // stopCli.release();
+  // Thread.sleep(2000); // wait for the job to finish
+  // ClientResource jobCli = new ClientResource(baseUrl + JobResource.PATH + "/"
+  // + jobId);
+  // Map<String,Object> res = jobCli.get(Map.class);
+  // res = (Map<String,Object>)res.get("result");
+  // assertEquals("stopped", res.get("res"));
+  // jobCli.release();
+  // // restart and kill
+  // r = cli.put(map);
+  // jobId = r.getText();
+  // cli.release();
+  // assertNotNull(jobId);
+  // System.out.println(jobId);
+  // assertTrue(jobId.startsWith("default-CLASS-"));
+  // ClientResource killCli = new ClientResource(baseUrl + JobResource.PATH +
+  // "?job=" + jobId + "&cmd=abort");
+  // r = killCli.get();
+  // assertEquals("true", r.getText());
+  // killCli.release();
+  // Thread.sleep(2000); // wait for the job to finish
+  // jobCli = new ClientResource(baseUrl + JobResource.PATH + "/" + jobId);
+  // res = jobCli.get(Map.class);
+  // res = (Map<String,Object>)res.get("result");
+  // assertEquals("killed", res.get("res"));
+  // jobCli.release();
+  // }
+  //
+  // public static class SpinningJob extends NutchTool {
+  // volatile boolean shouldStop = false;
+  //
+  // @Override
+  // public Map<String, Object> run(Map<String, Object> args) throws Exception {
+  // status.put(Nutch.STAT_MESSAGE, "running");
+  // int cnt = 60;
+  // while (!shouldStop && cnt-- > 0) {
+  // Thread.sleep(1000);
+  // }
+  // if (cnt == 0) {
+  // results.put("res", "failed");
+  // }
+  // return results;
+  // }
+  //
+  // @Override
+  // public boolean stopJob() throws Exception {
+  // results.put("res", "stopped");
+  // shouldStop = true;
+  // return true;
+  // }
+  //
+  // @Override
+  // public boolean killJob() throws Exception {
+  // results.put("res", "killed");
+  // shouldStop = true;
+  // return true;
+  // }
+  //
+  // }
 }
Index: src/test/org/apache/nutch/crawl/DummyWritable.java
===================================================================
--- src/test/org/apache/nutch/crawl/DummyWritable.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/DummyWritable.java	(working copy)
@@ -21,12 +21,12 @@
 
 public class DummyWritable extends IntWritable {
 
-    public DummyWritable() {
+  public DummyWritable() {
 
-    }
+  }
 
-    public DummyWritable(int i) {
-        super(i);
-    }
+  public DummyWritable(int i) {
+    super(i);
+  }
 
 }
Index: src/test/org/apache/nutch/crawl/TestGenerator.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestGenerator.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/TestGenerator.java	(working copy)
@@ -40,7 +40,7 @@
  * Basic generator test. 1. Insert entries in webtable 2. Generates entries to
  * fetch 3. Verifies that number of generated urls match 4. Verifies that
  * highest scoring urls are generated
- *
+ * 
  */
 public class TestGenerator extends AbstractNutchTest {
 
@@ -47,25 +47,23 @@
   public static final Logger LOG = LoggerFactory.getLogger(TestGenerator.class);
 
   private static String[] FIELDS = new String[] {
-    WebPage.Field.MARKERS.getName(),
-    WebPage.Field.SCORE.getName()
-  };
-  
+      WebPage.Field.MARKERS.getName(), WebPage.Field.SCORE.getName() };
+
   @Override
   @Before
-  public void setUp() throws Exception{
+  public void setUp() throws Exception {
     super.setUp();
   }
-  
+
   @Override
   @After
-  public void tearDown()throws Exception {
+  public void tearDown() throws Exception {
     super.tearDown();
   }
 
   /**
    * Test that generator generates fetchlist ordered by score (desc).
-   *
+   * 
    * @throws Exception
    */
   @Test
@@ -87,7 +85,8 @@
 
     generateFetchlist(NUM_RESULTS, conf, false);
 
-    ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // sort urls by score desc
     Collections.sort(l, new ScoreComparator());
@@ -125,8 +124,9 @@
   }
 
   /**
-   * Test that generator obeys the property "generate.max.count" and "generate.count.mode".
-   *
+   * Test that generator obeys the property "generate.max.count" and
+   * "generate.count.mode".
+   * 
    * @throws Exception
    */
   @Test
@@ -145,10 +145,12 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -157,25 +159,27 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
-    assertEquals(3, fetchList.size()); //3 as 2 + 1 skipped (already generated)
+    assertEquals(3, fetchList.size()); // 3 as 2 + 1 skipped (already generated)
 
     myConfiguration = new Configuration(conf);
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
-    assertEquals(3, fetchList.size()); //3 as now all have generate mark 
+    assertEquals(3, fetchList.size()); // 3 as now all have generate mark
   }
 
   /**
    * Test that generator obeys the property "generator.max.count" and
    * "generator.count.value=domain".
-   *
+   * 
    * @throws Exception
    */
   @Test
@@ -197,11 +201,13 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);
+    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -210,7 +216,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(3, fetchList.size()); // 2 + 1 skipped (already generated)
@@ -219,7 +226,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(6, fetchList.size()); // 3 + 3 skipped (already generated)
@@ -227,7 +235,7 @@
 
   /**
    * Test generator obeys the filter setting.
-   *
+   * 
    * @throws Exception
    * @throws IOException
    */
@@ -251,13 +259,15 @@
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, true);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     assertEquals(0, fetchList.size());
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify nothing got filtered
     assertEquals(list.size(), fetchList.size());
@@ -266,7 +276,7 @@
 
   /**
    * Generate Fetchlist.
-   *
+   * 
    * @param numResults
    *          number of results to generate
    * @param config
@@ -279,7 +289,8 @@
     // generate batch
     GeneratorJob g = new GeneratorJob();
     g.setConf(config);
-    String batchId = g.generate(numResults, System.currentTimeMillis(), filter, false);
+    String batchId = g.generate(numResults, System.currentTimeMillis(), filter,
+        false);
     if (batchId == null)
       throw new RuntimeException("Generator failed");
   }
@@ -286,7 +297,7 @@
 
   /**
    * Constructs new {@link URLWebPage} from submitted parameters.
-   *
+   * 
    * @param url
    *          url to use
    * @param fetchInterval
@@ -298,7 +309,7 @@
     WebPage page = WebPage.newBuilder().build();
     page.setFetchInterval(fetchInterval);
     page.setScore(score);
-    page.setStatus((int)CrawlStatus.STATUS_UNFETCHED);
+    page.setStatus((int) CrawlStatus.STATUS_UNFETCHED);
     return new URLWebPage(url, page);
   }
 
Index: src/test/org/apache/nutch/crawl/TestInjector.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestInjector.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/TestInjector.java	(working copy)
@@ -38,7 +38,7 @@
  * Basic injector test: 1. Creates a text file with urls 2. Injects them into
  * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
  * into webdb 5. Reads crawldb entries and verifies contents
- *
+ * 
  */
 public class TestInjector extends AbstractNutchTest {
   Path urlPath;
@@ -101,13 +101,12 @@
   }
 
   private static final String[] fields = new String[] {
-    WebPage.Field.MARKERS.getName(),
-    WebPage.Field.METADATA.getName(),
-    WebPage.Field.SCORE.getName()
-  };
+      WebPage.Field.MARKERS.getName(), WebPage.Field.METADATA.getName(),
+      WebPage.Field.SCORE.getName() };
 
   private List<String> readDb() throws Exception {
-    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null, fields);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null,
+        fields);
     ArrayList<String> read = new ArrayList<String>();
     for (URLWebPage up : pages) {
       WebPage page = up.getDatum();
Index: src/test/org/apache/nutch/crawl/TestSignatureFactory.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(working copy)
@@ -26,9 +26,9 @@
 
   @Test
   public void testGetSignature() {
-    Configuration conf=NutchConfiguration.create();
-    Signature signature1=SignatureFactory.getSignature(conf);
-    Signature signature2=SignatureFactory.getSignature(conf);
+    Configuration conf = NutchConfiguration.create();
+    Signature signature1 = SignatureFactory.getSignature(conf);
+    Signature signature2 = SignatureFactory.getSignature(conf);
     assertNotNull(signature1);
     assertNotNull(signature2);
     assertEquals(signature1, signature2);
Index: src/test/org/apache/nutch/crawl/TestURLPartitioner.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestURLPartitioner.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/TestURLPartitioner.java	(working copy)
@@ -38,21 +38,25 @@
 public class TestURLPartitioner {
 
   /**
-   * tests one reducer, everything goes into one partition, using host partitioner.
+   * tests one reducer, everything goes into one partition, using host
+   * partitioner.
    */
   @Test
   public void testOneReducer() {
     URLPartitioner partitioner = new URLPartitioner();
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_HOST);
     partitioner.setConf(conf);
-    
+
     int numReduceTasks = 1;
-    
-    assertEquals(0, partitioner.getPartition("http://example.org", numReduceTasks));
-    assertEquals(0, partitioner.getPartition("http://www.apache.org", numReduceTasks)); 
+
+    assertEquals(0,
+        partitioner.getPartition("http://example.org", numReduceTasks));
+    assertEquals(0,
+        partitioner.getPartition("http://www.apache.org", numReduceTasks));
   }
-  
+
   /**
    * tests partitioning by host
    */
@@ -60,22 +64,27 @@
   public void testModeHost() {
     URLPartitioner partitioner = new URLPartitioner();
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_HOST);
     partitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    
-    int partitionWithoutWWW = partitioner.getPartition("http://example.org/", numReduceTasks);
-    int partitionWithWWW = partitioner.getPartition("http://www.example.org/", numReduceTasks);
-    assertNotSame("partitions should differ because of different host", 
+
+    int partitionWithoutWWW = partitioner.getPartition("http://example.org/",
+        numReduceTasks);
+    int partitionWithWWW = partitioner.getPartition("http://www.example.org/",
+        numReduceTasks);
+    assertNotSame("partitions should differ because of different host",
         partitionWithoutWWW, partitionWithWWW);
-    
-    int partitionSame1 = partitioner.getPartition("http://www.example.org/paris", numReduceTasks);
-    int partitionSame2 = partitioner.getPartition("http://www.example.org/london", numReduceTasks);
-    assertEquals("partitions should be same because of same host", 
+
+    int partitionSame1 = partitioner.getPartition(
+        "http://www.example.org/paris", numReduceTasks);
+    int partitionSame2 = partitioner.getPartition(
+        "http://www.example.org/london", numReduceTasks);
+    assertEquals("partitions should be same because of same host",
         partitionSame1, partitionSame2);
   }
-  
+
   /**
    * tests partitioning by domain
    */
@@ -83,22 +92,27 @@
   public void testModeDomain() {
     URLPartitioner partitioner = new URLPartitioner();
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_DOMAIN);
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_DOMAIN);
     partitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    
-    int partitionExample = partitioner.getPartition("http://www.example.org/", numReduceTasks);
-    int partitionApache = partitioner.getPartition("http://www.apache.org/", numReduceTasks);
-    assertNotSame("partitions should differ because of different domain", 
+
+    int partitionExample = partitioner.getPartition("http://www.example.org/",
+        numReduceTasks);
+    int partitionApache = partitioner.getPartition("http://www.apache.org/",
+        numReduceTasks);
+    assertNotSame("partitions should differ because of different domain",
         partitionExample, partitionApache);
-    
-    int partitionWithoutWWW = partitioner.getPartition("http://example.org/", numReduceTasks);
-    int partitionWithWWW = partitioner.getPartition("http://www.example.org/", numReduceTasks);
-    assertEquals("partitions should be same because of same domain", 
+
+    int partitionWithoutWWW = partitioner.getPartition("http://example.org/",
+        numReduceTasks);
+    int partitionWithWWW = partitioner.getPartition("http://www.example.org/",
+        numReduceTasks);
+    assertEquals("partitions should be same because of same domain",
         partitionWithoutWWW, partitionWithWWW);
   }
-  
+
   /**
    * tests partitioning by IP
    */
@@ -106,23 +120,29 @@
   public void testModeIP() {
     URLPartitioner partitioner = new URLPartitioner();
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_IP);
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_IP);
     partitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    
-    int partitionExample = partitioner.getPartition("http://www.example.org/", numReduceTasks);
-    int partitionApache = partitioner.getPartition("http://www.apache.org/", numReduceTasks);
-    assertNotSame("partitions should differ because of different ip", 
+
+    int partitionExample = partitioner.getPartition("http://www.example.org/",
+        numReduceTasks);
+    int partitionApache = partitioner.getPartition("http://www.apache.org/",
+        numReduceTasks);
+    assertNotSame("partitions should differ because of different ip",
         partitionExample, partitionApache);
-    
-    int partitionWithoutWWW = partitioner.getPartition("http://example.org/", numReduceTasks);
-    int partitionWithWWW = partitioner.getPartition("http://www.example.org/", numReduceTasks);
-    //the following has dependendy on example.org (that is has the same ip as www.example.org)
-    assertEquals("partitions should be same because of same ip", 
+
+    int partitionWithoutWWW = partitioner.getPartition("http://example.org/",
+        numReduceTasks);
+    int partitionWithWWW = partitioner.getPartition("http://www.example.org/",
+        numReduceTasks);
+    // the following has dependendy on example.org (that is has the same ip as
+    // www.example.org)
+    assertEquals("partitions should be same because of same ip",
         partitionWithoutWWW, partitionWithWWW);
   }
-  
+
   /**
    * Test the seed functionality, using host partitioner.
    */
@@ -130,84 +150,92 @@
   public void testSeed() {
     URLPartitioner partitioner = new URLPartitioner();
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_HOST);
     partitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    int partitionNoSeed = partitioner.getPartition("http://example.org/", numReduceTasks);
-    
+    int partitionNoSeed = partitioner.getPartition("http://example.org/",
+        numReduceTasks);
+
     conf.setInt(URLPartitioner.PARTITION_URL_SEED, 1);
     partitioner.setConf(conf);
-    
-    int partitionWithSeed = partitioner.getPartition("http://example.org/", numReduceTasks);
-    
-    assertNotSame("partitions should differ because of different seed", 
+
+    int partitionWithSeed = partitioner.getPartition("http://example.org/",
+        numReduceTasks);
+
+    assertNotSame("partitions should differ because of different seed",
         partitionNoSeed, partitionWithSeed);
   }
 
-  
   /**
    * Tests the {@link SelectorEntryPartitioner}.
    */
   @Test
   public void testSelectorEntryPartitioner() {
-    //The reference partitioner
+    // The reference partitioner
     URLPartitioner refPartitioner = new URLPartitioner();
-    
-    //The to be tested partitioner with specific signature
-    URLPartitioner.SelectorEntryPartitioner sigPartitioner = 
-        new URLPartitioner.SelectorEntryPartitioner();
-        
+
+    // The to be tested partitioner with specific signature
+    URLPartitioner.SelectorEntryPartitioner sigPartitioner = new URLPartitioner.SelectorEntryPartitioner();
+
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
-    
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_HOST);
+
     refPartitioner.setConf(conf);
     sigPartitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    
-    int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks);
-    //init selector entry (score shouldn't matter)
-    SelectorEntry selectorEntry = new SelectorEntry("http://www.example.org/", 1337);
+
+    int partitionFromRef = refPartitioner.getPartition(
+        "http://www.example.org/", numReduceTasks);
+    // init selector entry (score shouldn't matter)
+    SelectorEntry selectorEntry = new SelectorEntry("http://www.example.org/",
+        1337);
     WebPage page = WebPage.newBuilder().build();
-    int partitionFromSig = sigPartitioner.getPartition(selectorEntry, page, numReduceTasks);
-    
-    assertEquals("partitions should be same", 
-        partitionFromRef, partitionFromSig);
-    
+    int partitionFromSig = sigPartitioner.getPartition(selectorEntry, page,
+        numReduceTasks);
+
+    assertEquals("partitions should be same", partitionFromRef,
+        partitionFromSig);
+
   }
-  
+
   /**
    * Tests the {@link FetchEntryPartitioner}
-   * @throws MalformedURLException 
+   * 
+   * @throws MalformedURLException
    */
   @Test
   public void testFetchEntryPartitioner() throws MalformedURLException {
-    //The reference partitioner
+    // The reference partitioner
     URLPartitioner refPartitioner = new URLPartitioner();
-    
-    //The to be tested partitioner with specific signature
-    URLPartitioner.FetchEntryPartitioner sigPartitioner = 
-        new URLPartitioner.FetchEntryPartitioner();
-        
+
+    // The to be tested partitioner with specific signature
+    URLPartitioner.FetchEntryPartitioner sigPartitioner = new URLPartitioner.FetchEntryPartitioner();
+
     Configuration conf = NutchConfiguration.create();
-    conf.set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
-    
+    conf.set(URLPartitioner.PARTITION_MODE_KEY,
+        URLPartitioner.PARTITION_MODE_HOST);
+
     refPartitioner.setConf(conf);
     sigPartitioner.setConf(conf);
-    
+
     int numReduceTasks = 100;
-    
-    int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks);
-    IntWritable intWritable = new IntWritable(1337); //doesn't matter
+
+    int partitionFromRef = refPartitioner.getPartition(
+        "http://www.example.org/", numReduceTasks);
+    IntWritable intWritable = new IntWritable(1337); // doesn't matter
     WebPage page = WebPage.newBuilder().build();
     String key = TableUtil.reverseUrl("http://www.example.org/");
     FetchEntry fetchEntry = new FetchEntry(conf, key, page);
-    int partitionFromSig = sigPartitioner.getPartition(intWritable, fetchEntry, numReduceTasks);
-    
-    assertEquals("partitions should be same", 
-        partitionFromRef, partitionFromSig);
-    
+    int partitionFromSig = sigPartitioner.getPartition(intWritable, fetchEntry,
+        numReduceTasks);
+
+    assertEquals("partitions should be same", partitionFromRef,
+        partitionFromSig);
+
   }
-  
+
 }
Index: src/test/org/apache/nutch/crawl/TestUrlWithScore.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestUrlWithScore.java	(revision 1650444)
+++ src/test/org/apache/nutch/crawl/TestUrlWithScore.java	(working copy)
@@ -42,7 +42,7 @@
     UrlWithScore keyOut = new UrlWithScore("http://example.org/", 1f);
     assertEquals("http://example.org/", keyOut.getUrl().toString());
     assertEquals(1f, keyOut.getScore().get(), 0.001);
-    
+
     // write to out
     ByteArrayOutputStream bos = new ByteArrayOutputStream();
     DataOutputStream out = new DataOutputStream(bos);
@@ -59,45 +59,45 @@
     in.close();
     out.close();
   }
-  
+
   @Test
   public void testPartitioner() throws IOException {
     UrlOnlyPartitioner part = new UrlOnlyPartitioner();
-    
+
     UrlWithScore k1 = new UrlWithScore("http://example.org/1", 1f);
     UrlWithScore k2 = new UrlWithScore("http://example.org/1", 2f);
     UrlWithScore k3 = new UrlWithScore("http://example.org/2", 1f);
     UrlWithScore k4 = new UrlWithScore("http://example.org/2", 2f);
     UrlWithScore k5 = new UrlWithScore("http://example.org/2", 3f);
-    
+
     int numReduces = 7;
-    
+
     // keys 1 and 2 should be partitioned together
     int partForKey1 = part.getPartition(k1, null, numReduces);
     assertEquals(partForKey1, part.getPartition(k2, null, numReduces));
     assertEquals(partForKey1, part.getPartition(k2, null, numReduces));
-    
+
     // keys 3, 4 and 5 should be partitioned together
     int partForKey3 = part.getPartition(k3, null, numReduces);
     assertEquals(partForKey3, part.getPartition(k4, null, numReduces));
     assertEquals(partForKey3, part.getPartition(k5, null, numReduces));
   }
-  
+
   @Test
   public void testUrlOnlySorting() throws IOException {
     UrlOnlyComparator comp = new UrlOnlyComparator();
-    
+
     UrlWithScore k1 = new UrlWithScore("http://example.org/1", 1f);
     UrlWithScore k2 = new UrlWithScore("http://example.org/1", 2f);
     UrlWithScore k3 = new UrlWithScore("http://example.org/2", 1f);
     UrlWithScore k4 = new UrlWithScore("http://example.org/2", 2f);
     UrlWithScore k5 = new UrlWithScore("http://example.org/2", 3f);
-    
+
     // k1 should be equal to k2
     assertEquals(0, compareBothRegularAndRaw(comp, k1, k2));
     // test symmetry
     assertEquals(0, compareBothRegularAndRaw(comp, k2, k1));
-    
+
     // k1 is before k3, k4 and k5
     assertEquals(-1, compareBothRegularAndRaw(comp, k1, k3));
     assertEquals(-1, compareBothRegularAndRaw(comp, k1, k4));
@@ -107,22 +107,22 @@
     assertEquals(1, compareBothRegularAndRaw(comp, k4, k1));
     assertEquals(1, compareBothRegularAndRaw(comp, k5, k1));
   }
-  
+
   @Test
   public void testUrlScoreSorting() throws IOException {
     UrlScoreComparator comp = new UrlScoreComparator();
-    
+
     UrlWithScore k1 = new UrlWithScore("http://example.org/1", 1f);
     UrlWithScore k2 = new UrlWithScore("http://example.org/1", 2f);
     UrlWithScore k3 = new UrlWithScore("http://example.org/2", 1f);
     UrlWithScore k4 = new UrlWithScore("http://example.org/2", 2f);
     UrlWithScore k5 = new UrlWithScore("http://example.org/2", 3f);
-    
+
     // k1 is after k2, because score is lower
     assertEquals(1, comp.compare(k1, k2));
     // test symmetry
     assertEquals(-1, comp.compare(k2, k1));
-    
+
     // k1 is before k3, k4 and k5, because url is lower
     assertEquals(-1, compareBothRegularAndRaw(comp, k1, k3));
     assertEquals(-1, compareBothRegularAndRaw(comp, k1, k4));
@@ -131,7 +131,7 @@
     assertEquals(1, compareBothRegularAndRaw(comp, k3, k1));
     assertEquals(1, compareBothRegularAndRaw(comp, k4, k1));
     assertEquals(1, compareBothRegularAndRaw(comp, k5, k1));
-    
+
     // k3 after k4 and k4 after k5 and therefore k3 after k5 (transitivity)
     assertEquals(1, compareBothRegularAndRaw(comp, k3, k4));
     assertEquals(1, compareBothRegularAndRaw(comp, k4, k5));
@@ -150,19 +150,19 @@
    * @param k1
    * @param k2
    * @return The compare result. (When k1 != k2, assert failure kicks in)
-   * @throws IOException 
+   * @throws IOException
    */
-  private Object compareBothRegularAndRaw(RawComparator<UrlWithScore> comp, 
+  private Object compareBothRegularAndRaw(RawComparator<UrlWithScore> comp,
       UrlWithScore k1, UrlWithScore k2) throws IOException {
     int regular = comp.compare(k1, k2);
-    
+
     byte[] bytes1 = extractBytes(k1);
     byte[] bytes2 = extractBytes(k2);
-    
+
     int raw = comp.compare(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length);
-    
+
     assertEquals("Regular compare should equal raw compare", regular, raw);
-    
+
     return regular;
   }
 
@@ -181,5 +181,5 @@
     out.close();
     return bytes;
   }
-  
+
 }
Index: src/test/org/apache/nutch/fetcher/TestFetcher.java
===================================================================
--- src/test/org/apache/nutch/fetcher/TestFetcher.java	(revision 1650444)
+++ src/test/org/apache/nutch/fetcher/TestFetcher.java	(working copy)
@@ -38,32 +38,29 @@
 import static org.junit.Assert.*;
 
 /**
- * Basic fetcher test
- * 1. generate seedlist
- * 2. inject
- * 3. generate
- * 3. fetch
- * 4. Verify contents
- *
+ * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
+ * Verify contents
+ * 
  */
 public class TestFetcher extends AbstractNutchTest {
 
-  final static Path testdir=new Path("build/test/fetch-test");
+  final static Path testdir = new Path("build/test/fetch-test");
   Path urlPath;
   Server server;
 
   @Override
   @Before
-  public void setUp() throws Exception{
+  public void setUp() throws Exception {
     super.setUp();
     urlPath = new Path(testdir, "urls");
-    server = CrawlTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site");
+    server = CrawlTestUtil.getServer(conf.getInt("content.server.port", 50000),
+        "build/test/data/fetch-test-site");
     server.start();
   }
 
   @Override
   @After
-  public void tearDown() throws Exception{
+  public void tearDown() throws Exception {
     server.stop();
     fs.delete(testdir, true);
   }
@@ -72,28 +69,28 @@
   @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testFetch() throws Exception {
 
-    //generate seedlist
+    // generate seedlist
     ArrayList<String> urls = new ArrayList<String>();
 
-    addUrl(urls,"index.html");
-    addUrl(urls,"pagea.html");
-    addUrl(urls,"pageb.html");
-    addUrl(urls,"dup_of_pagea.html");
-    addUrl(urls,"nested_spider_trap.html");
-    addUrl(urls,"exception.html");
+    addUrl(urls, "index.html");
+    addUrl(urls, "pagea.html");
+    addUrl(urls, "pageb.html");
+    addUrl(urls, "dup_of_pagea.html");
+    addUrl(urls, "nested_spider_trap.html");
+    addUrl(urls, "exception.html");
 
     CrawlTestUtil.generateSeedList(fs, urlPath, urls);
 
-    //inject
+    // inject
     InjectorJob injector = new InjectorJob(conf);
     injector.inject(urlPath);
 
-    //generate
+    // generate
     long time = System.currentTimeMillis();
     GeneratorJob g = new GeneratorJob(conf);
     String batchId = g.generate(Long.MAX_VALUE, time, false, false);
 
-    //fetch
+    // fetch
     time = System.currentTimeMillis();
     conf.setBoolean(FetcherJob.PARSE_KEY, true);
     FetcherJob fetcher = new FetcherJob(conf);
@@ -101,12 +98,13 @@
 
     time = System.currentTimeMillis() - time;
 
-    //verify politeness, time taken should be more than (num_of_pages +1)*delay
-    int minimumTime = (int) ((urls.size() + 1) * 1000 *
-        conf.getFloat("fetcher.server.delay", 5));
+    // verify politeness, time taken should be more than (num_of_pages +1)*delay
+    int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
+        "fetcher.server.delay", 5));
     assertTrue(time > minimumTime);
 
-    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore,
+        Mark.FETCH_MARK, (String[]) null);
     assertEquals(urls.size(), pages.size());
     List<String> handledurls = new ArrayList<String>();
     for (URLWebPage up : pages) {
@@ -115,7 +113,7 @@
         continue;
       }
       String content = Bytes.toString(bb);
-      if (content.indexOf("Nutch fetcher test page")!=-1) {
+      if (content.indexOf("Nutch fetcher test page") != -1) {
         handledurls.add(up.getUrl());
       }
     }
@@ -122,16 +120,17 @@
     Collections.sort(urls);
     Collections.sort(handledurls);
 
-    //verify that enough pages were handled
+    // verify that enough pages were handled
     assertEquals(urls.size(), handledurls.size());
 
-    //verify that correct pages were handled
+    // verify that correct pages were handled
     assertTrue(handledurls.containsAll(urls));
     assertTrue(urls.containsAll(handledurls));
   }
 
   private void addUrl(ArrayList<String> urls, String page) {
-    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page);
+    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/"
+        + page);
   }
 
   @Test
Index: src/test/org/apache/nutch/indexer/TestIndexingFilters.java
===================================================================
--- src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(revision 1650444)
+++ src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(working copy)
@@ -30,6 +30,7 @@
 
   /**
    * Test behaviour when defined filter does not exist.
+   * 
    * @throws IndexingException
    */
   @Test
@@ -46,15 +47,16 @@
     WebPage page = WebPage.newBuilder().build();
     page.setText(new Utf8("text"));
     page.setTitle(new Utf8("title"));
-    filters.filter(new NutchDocument(),"http://www.example.com/",page);
+    filters.filter(new NutchDocument(), "http://www.example.com/", page);
   }
 
   /**
    * Test behaviour when NutchDOcument is null
+   * 
    * @throws IndexingException
    */
   @Test
-  public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+  public void testNutchDocumentNullIndexingFilter() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
     conf.addResource("nutch-default.xml");
     conf.addResource("crawl-tests.xml");
@@ -63,7 +65,7 @@
     WebPage page = WebPage.newBuilder().build();
     page.setText(new Utf8("text"));
     page.setTitle(new Utf8("title"));
-    NutchDocument doc = filters.filter(null,"http://www.example.com/",page);
+    NutchDocument doc = filters.filter(null, "http://www.example.com/", page);
 
     assertNull(doc);
   }
@@ -70,11 +72,11 @@
 
   /**
    * Test behaviour when reset the index filter order will not take effect
-   *
+   * 
    * @throws IndexingException
    */
   @Test
-  public void testFilterCacheIndexingFilter() throws IndexingException{
+  public void testFilterCacheIndexingFilter() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
     conf.addResource("nutch-default.xml");
     conf.addResource("crawl-tests.xml");
@@ -86,18 +88,20 @@
     WebPage page = WebPage.newBuilder().build();
     page.setText(new Utf8("text"));
     page.setTitle(new Utf8("title"));
-    NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page);
+    NutchDocument fdoc1 = filters1.filter(new NutchDocument(),
+        "http://www.example.com/", page);
 
     // add another index filter
     String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
     // set content metadata
     Metadata md = new Metadata();
-    md.add("example","data");
+    md.add("example", "data");
     // add MetadataIndxer filter
     conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
     IndexingFilters filters2 = new IndexingFilters(conf);
-    NutchDocument fdoc2 = filters2.filter(new NutchDocument(),"http://www.example.com/",page);
-    assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
+    NutchDocument fdoc2 = filters2.filter(new NutchDocument(),
+        "http://www.example.com/", page);
+    assertEquals(fdoc1.getFieldNames().size(), fdoc2.getFieldNames().size());
   }
 
 }
Index: src/test/org/apache/nutch/metadata/TestMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 1650444)
+++ src/test/org/apache/nutch/metadata/TestMetadata.java	(working copy)
@@ -278,4 +278,3 @@
   }
 
 }
-
Index: src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(revision 1650444)
+++ src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(working copy)
@@ -28,7 +28,7 @@
 /**
  * JUnit based tests of class
  * {@link org.apache.nutch.metadata.SpellCheckedMetadata}.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
@@ -40,20 +40,20 @@
   /** Test for the <code>getNormalizedName(String)</code> method. */
   @Test
   public void testGetNormalizedName() {
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-Type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("ContentType"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contenttype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contentype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contntype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contntype"));
   }
-  
+
   /** Test for the <code>add(String, String)</code> method. */
   @Test
   public void testAdd() {
@@ -253,8 +253,8 @@
   }
 
   /**
-   * IO Test method, usable only when you plan to do changes in metadata
-   * to measure relative performance impact.
+   * IO Test method, usable only when you plan to do changes in metadata to
+   * measure relative performance impact.
    */
   @Test
   public final void testHandlingSpeed() {
Index: src/test/org/apache/nutch/net/TestURLFilters.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLFilters.java	(revision 1650444)
+++ src/test/org/apache/nutch/net/TestURLFilters.java	(working copy)
@@ -26,6 +26,7 @@
 
   /**
    * Testcase for NUTCH-325.
+   * 
    * @throws URLFilterException
    */
   @Test
Index: src/test/org/apache/nutch/net/TestURLNormalizers.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLNormalizers.java	(revision 1650444)
+++ src/test/org/apache/nutch/net/TestURLNormalizers.java	(working copy)
@@ -32,12 +32,14 @@
     String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer";
     String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer";
     conf.set("urlnormalizer.order", clazz1 + " " + clazz2);
-    
-    URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
-    
+
+    URLNormalizers normalizers = new URLNormalizers(conf,
+        URLNormalizers.SCOPE_DEFAULT);
+
     assertNotNull(normalizers);
     try {
-      normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT);
+      normalizers.normalize("http://www.example.com/",
+          URLNormalizers.SCOPE_DEFAULT);
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
@@ -44,8 +46,11 @@
 
     // NUTCH-1011 - Get rid of superfluous slashes
     try {
-      String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT);
-      assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html");
+      String normalizedSlashes = normalizers.normalize(
+          "http://www.example.org//path/to//somewhere.html",
+          URLNormalizers.SCOPE_DEFAULT);
+      assertEquals(normalizedSlashes,
+          "http://www.example.org/path/to/somewhere.html");
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
@@ -52,10 +57,13 @@
 
     // check the order
     int pos1 = -1, pos2 = -1;
-    URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
+    URLNormalizer[] impls = normalizers
+        .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
     for (int i = 0; i < impls.length; i++) {
-      if (impls[i].getClass().getName().equals(clazz1)) pos1 = i;
-      if (impls[i].getClass().getName().equals(clazz2)) pos2 = i;
+      if (impls[i].getClass().getName().equals(clazz1))
+        pos1 = i;
+      if (impls[i].getClass().getName().equals(clazz2))
+        pos2 = i;
     }
     if (pos1 != -1 && pos2 != -1) {
       assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2);
Index: src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
===================================================================
--- src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(revision 1650444)
+++ src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.parse;
 
 import org.apache.nutch.parse.Outlink;
@@ -35,51 +35,60 @@
 public class TestOutlinkExtractor {
 
   private static Configuration conf = NutchConfiguration.create();
+
   public void testGetNoOutlinks() {
-    Outlink[]  outlinks = null;
-            
+    Outlink[] outlinks = null;
+
     outlinks = OutlinkExtractor.getOutlinks(null, conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
-    
+
     outlinks = OutlinkExtractor.getOutlinks("", conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
   }
-  
+
   @Test
   public void testGetOutlinksHttp() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
-  
+
   @Test
   public void testGetOutlinksHttp2() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            "http://www.sybit.de", conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
-  
+
   @Test
   public void testGetOutlinksFtp() {
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with ftp://www.nutch.org is it found? " +
-        "What about www.google.com at ftp://www.google.de", conf);
-    
-    assertTrue("Url not found!", outlinks.length >1);
+        "Test with ftp://www.nutch.org is it found? "
+            + "What about www.google.com at ftp://www.google.de", conf);
+
+    assertTrue("Url not found!", outlinks.length > 1);
     assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl());
     assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl());
   }
Index: src/test/org/apache/nutch/parse/TestParserFactory.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParserFactory.java	(revision 1650444)
+++ src/test/org/apache/nutch/parse/TestParserFactory.java	(working copy)
@@ -28,24 +28,24 @@
 
 /**
  * Unit test for new parse plugin selection.
- *
+ * 
  * @author Sebastien Le Callonnec
  */
 public class TestParserFactory {
-	
+
   private Configuration conf;
   private ParserFactory parserFactory;
-    
+
   /** Inits the Test Case with the test parse-plugin file */
   @Before
   public void setUp() throws Exception {
-      conf = NutchConfiguration.create();
-      conf.set("plugin.includes", ".*");
-      conf.set("parse.plugin.file",
-               "org/apache/nutch/parse/parse-plugin-test.xml");
-      parserFactory = new ParserFactory(conf);
+    conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    conf.set("parse.plugin.file",
+        "org/apache/nutch/parse/parse-plugin-test.xml");
+    parserFactory = new ParserFactory(conf);
   }
-    
+
   /** Unit test for <code>getExtensions(String)</code> method. */
   @Test
   public void testGetExtensions() throws Exception {
@@ -56,48 +56,49 @@
     ext = parserFactory.getExtensions("foo/bar").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
   }
-  
+
   /** Unit test to check <code>getParsers</code> method */
   @Test
   public void testGetParsers() throws Exception {
-    Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
+    Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
 
     parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("application/x-javascript",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.js.JSParseFilter",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("text/plain", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
     Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
-   
+
     assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
-    
-    //test and make sure that the rss parser is loaded even though its plugin.xml
-    //doesn't claim to support text/rss, only application/rss+xml
-    parsers = parserFactory.getParsers("text/rss","http://foo.com");
+
+    // test and make sure that the rss parser is loaded even though its
+    // plugin.xml
+    // doesn't claim to support text/rss, only application/rss+xml
+    parsers = parserFactory.getParsers("text/rss", "http://foo.com");
     assertNotNull(parsers);
-    assertEquals(1,parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
   }
- 
+
 }
Index: src/test/org/apache/nutch/plugin/HelloWorldExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(revision 1650444)
+++ src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(working copy)
@@ -24,8 +24,11 @@
  */
 public class HelloWorldExtension implements ITestExtension {
 
-  /* (non-Javadoc)
-   * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
    */
   public String testGetExtension(String hello) {
     return hello + " World";
Index: src/test/org/apache/nutch/plugin/ITestExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/ITestExtension.java	(revision 1650444)
+++ src/test/org/apache/nutch/plugin/ITestExtension.java	(working copy)
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * A Simple Test Extension Interface.
  * 
  * @author joa23
- *
+ * 
  */
 public interface ITestExtension {
   public String testGetExtension(String hello);
Index: src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
===================================================================
--- src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(revision 1650444)
+++ src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(working copy)
@@ -28,8 +28,8 @@
 public class SimpleTestPlugin extends Plugin {
 
   /**
-   * @param pDescriptor 
-   * @param conf 
+   * @param pDescriptor
+   * @param conf
    */
   public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) {
 
@@ -55,4 +55,3 @@
   }
 
 }
-
Index: src/test/org/apache/nutch/plugin/TestPluginSystem.java
===================================================================
--- src/test/org/apache/nutch/plugin/TestPluginSystem.java	(revision 1650444)
+++ src/test/org/apache/nutch/plugin/TestPluginSystem.java	(working copy)
@@ -42,266 +42,260 @@
  * @author joa23
  */
 public class TestPluginSystem {
-    private int fPluginCount;
+  private int fPluginCount;
 
-    private LinkedList<File> fFolders = new LinkedList<File>();
-    private Configuration conf ;
-    private PluginRepository repository;
+  private LinkedList<File> fFolders = new LinkedList<File>();
+  private Configuration conf;
+  private PluginRepository repository;
 
-    @Before
-    public void setUp() throws Exception {
-        this.conf = NutchConfiguration.create();
-        conf.set("plugin.includes", ".*");
-//        String string = this.conf.get("plugin.includes", "");
-//        conf.set("plugin.includes", string + "|Dummy*");
-        fPluginCount = 5;
-        createDummyPlugins(fPluginCount);
-        this.repository = PluginRepository.get(conf);
+  @Before
+  public void setUp() throws Exception {
+    this.conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    // String string = this.conf.get("plugin.includes", "");
+    // conf.set("plugin.includes", string + "|Dummy*");
+    fPluginCount = 5;
+    createDummyPlugins(fPluginCount);
+    this.repository = PluginRepository.get(conf);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    for (int i = 0; i < fFolders.size(); i++) {
+      File folder = (File) fFolders.get(i);
+      delete(folder);
+      folder.delete();
     }
 
-    @After
-    public void tearDown() throws Exception {
-        for (int i = 0; i < fFolders.size(); i++) {
-            File folder = (File) fFolders.get(i);
-            delete(folder);
-            folder.delete();
-        }
+  }
 
-    }
-
-    /**
+  /**
      */
-    @Test
-    public void testPluginConfiguration() {
-        String string = getPluginFolder();
-        File file = new File(string);
-        if (!file.exists()) {
-            file.mkdir();
-        }
-        assertTrue(file.exists());
+  @Test
+  public void testPluginConfiguration() {
+    String string = getPluginFolder();
+    File file = new File(string);
+    if (!file.exists()) {
+      file.mkdir();
     }
+    assertTrue(file.exists());
+  }
 
-    /**
+  /**
      */
-    @Test
-    public void testLoadPlugins() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        int k = descriptors.length;
-        assertTrue(fPluginCount <= k);
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            assertEquals(1, descriptor.getExportedLibUrls().length);
-            assertEquals(1, descriptor.getNotExportedLibUrls().length);
-        }
+  @Test
+  public void testLoadPlugins() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    int k = descriptors.length;
+    assertTrue(fPluginCount <= k);
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      assertEquals(1, descriptor.getExportedLibUrls().length);
+      assertEquals(1, descriptor.getNotExportedLibUrls().length);
     }
+  }
 
-    @Test
-    public void testRepositoryCache() {
-      Configuration config = NutchConfiguration.create();
-      PluginRepository repo = PluginRepository.get(config);
-      JobConf job = new NutchJobConf(config);
-      PluginRepository repo1 = PluginRepository.get(job);
-      assertTrue(repo == repo1);
-      // now construct a config without UUID
-      config = new Configuration();
-      config.addResource("nutch-default.xml");
-      config.addResource("nutch-site.xml");
-      repo = PluginRepository.get(config);
-      job = new NutchJobConf(config);
-      repo1 = PluginRepository.get(job);
-      assertTrue(repo1 != repo);
-    }
+  @Test
+  public void testRepositoryCache() {
+    Configuration config = NutchConfiguration.create();
+    PluginRepository repo = PluginRepository.get(config);
+    JobConf job = new NutchJobConf(config);
+    PluginRepository repo1 = PluginRepository.get(job);
+    assertTrue(repo == repo1);
+    // now construct a config without UUID
+    config = new Configuration();
+    config.addResource("nutch-default.xml");
+    config.addResource("nutch-site.xml");
+    repo = PluginRepository.get(config);
+    job = new NutchJobConf(config);
+    repo1 = PluginRepository.get(job);
+    assertTrue(repo1 != repo);
+  }
 
-    /**
+  /**
      *  
      */
-    @Test
-    public void testGetExtensionAndAttributes() {
-        String xpId = " sdsdsd";
-        ExtensionPoint extensionPoint =repository
-                .getExtensionPoint(xpId);
-        assertEquals(extensionPoint, null);
-        Extension[] extension1 = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extension1.length, fPluginCount);
-        for (int i = 0; i < extension1.length; i++) {
-            Extension extension2 = extension1[i];
-            String string = extension2.getAttribute(getGetConfigElementName());
-            assertEquals(string, getParameterValue());
-        }
+  @Test
+  public void testGetExtensionAndAttributes() {
+    String xpId = " sdsdsd";
+    ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId);
+    assertEquals(extensionPoint, null);
+    Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extension1.length, fPluginCount);
+    for (int i = 0; i < extension1.length; i++) {
+      Extension extension2 = extension1[i];
+      String string = extension2.getAttribute(getGetConfigElementName());
+      assertEquals(string, getParameterValue());
     }
+  }
 
-    /**
-     * @throws PluginRuntimeException
-     */
-    @Test
-    public void testGetExtensionInstances() throws PluginRuntimeException {
-        Extension[] extensions = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extensions.length, fPluginCount);
-        for (int i = 0; i < extensions.length; i++) {
-            Extension extension = extensions[i];
-            Object object = extension.getExtensionInstance();
-            if (!(object instanceof HelloWorldExtension))
-                fail(" object is not a instance of HelloWorldExtension");
-            ((ITestExtension) object).testGetExtension("Bla ");
-            String string = ((ITestExtension) object).testGetExtension("Hello");
-            assertEquals("Hello World", string);
-        }
+  /**
+   * @throws PluginRuntimeException
+   */
+  @Test
+  public void testGetExtensionInstances() throws PluginRuntimeException {
+    Extension[] extensions = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extensions.length, fPluginCount);
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+      Object object = extension.getExtensionInstance();
+      if (!(object instanceof HelloWorldExtension))
+        fail(" object is not a instance of HelloWorldExtension");
+      ((ITestExtension) object).testGetExtension("Bla ");
+      String string = ((ITestExtension) object).testGetExtension("Hello");
+      assertEquals("Hello World", string);
     }
+  }
 
-    /**
+  /**
      * 
      *  
      */
-    @Test
-    public void testGetClassLoader() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            assertNotNull(descriptor.getClassLoader());
-        }
+  @Test
+  public void testGetClassLoader() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      assertNotNull(descriptor.getClassLoader());
     }
+  }
 
-    /**
-     * @throws IOException
-     */
-    @Test
-    public void testGetResources() throws IOException {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            String value = descriptor.getResourceString("key", Locale.UK);
-            assertEquals("value", value);
-            value = descriptor.getResourceString("key",
-                    Locale.TRADITIONAL_CHINESE);
-            assertEquals("value", value);
+  /**
+   * @throws IOException
+   */
+  @Test
+  public void testGetResources() throws IOException {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      String value = descriptor.getResourceString("key", Locale.UK);
+      assertEquals("value", value);
+      value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE);
+      assertEquals("value", value);
 
-        }
     }
+  }
 
-    /**
-     * @return a PluginFolderPath
-     */
-    private String getPluginFolder() {
-        String[] strings = conf.getStrings("plugin.folders");
-        if (strings == null || strings.length == 0)
-            fail("no plugin directory setuped..");
+  /**
+   * @return a PluginFolderPath
+   */
+  private String getPluginFolder() {
+    String[] strings = conf.getStrings("plugin.folders");
+    if (strings == null || strings.length == 0)
+      fail("no plugin directory setuped..");
 
-        String name = strings[0];
-        return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString();
-    }
+    String name = strings[0];
+    return new PluginManifestParser(conf, this.repository)
+        .getPluginFolder(name).toString();
+  }
 
-    /**
-     * Creates some Dummy Plugins
-     * 
-     * @param pCount
-     */
-    private void createDummyPlugins(int pCount) {
-        String string = getPluginFolder();
-        try {
-            File folder = new File(string);
-            folder.mkdir();
-            for (int i = 0; i < pCount; i++) {
-                String pluginFolder = string + File.separator + "DummyPlugin"
-                        + i;
-                File file = new File(pluginFolder);
-                file.mkdir();
-                fFolders.add(file);
-                createPluginManifest(i, file.getAbsolutePath());
-                createResourceFile(file.getAbsolutePath());
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
+  /**
+   * Creates some Dummy Plugins
+   * 
+   * @param pCount
+   */
+  private void createDummyPlugins(int pCount) {
+    String string = getPluginFolder();
+    try {
+      File folder = new File(string);
+      folder.mkdir();
+      for (int i = 0; i < pCount; i++) {
+        String pluginFolder = string + File.separator + "DummyPlugin" + i;
+        File file = new File(pluginFolder);
+        file.mkdir();
+        fFolders.add(file);
+        createPluginManifest(i, file.getAbsolutePath());
+        createResourceFile(file.getAbsolutePath());
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
     }
+  }
 
-    /**
-     * Creates an ResourceFile
-     * 
-     * @param pFolderPath
-     * @throws FileNotFoundException
-     * @throws IOException
-     */
-    private void createResourceFile(String pFolderPath)
-            throws FileNotFoundException, IOException {
-        Properties properties = new Properties();
-        properties.setProperty("key", "value");
-        properties.store(new FileOutputStream(pFolderPath + File.separator
-                + "messages" + ".properties"), "");
-    }
+  /**
+   * Creates an ResourceFile
+   * 
+   * @param pFolderPath
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  private void createResourceFile(String pFolderPath)
+      throws FileNotFoundException, IOException {
+    Properties properties = new Properties();
+    properties.setProperty("key", "value");
+    properties.store(new FileOutputStream(pFolderPath + File.separator
+        + "messages" + ".properties"), "");
+  }
 
-    /**
-     * Deletes files in path
-     * 
-     * @param path
-     * @throws IOException
-     */
-    private void delete(File path) throws IOException {
-        File[] files = path.listFiles();
-        for (int i = 0; i < files.length; ++i) {
-            if (files[i].isDirectory())
-                delete(files[i]);
-            files[i].delete();
-        }
+  /**
+   * Deletes files in path
+   * 
+   * @param path
+   * @throws IOException
+   */
+  private void delete(File path) throws IOException {
+    File[] files = path.listFiles();
+    for (int i = 0; i < files.length; ++i) {
+      if (files[i].isDirectory())
+        delete(files[i]);
+      files[i].delete();
     }
+  }
 
-    /**
-     * Creates an Plugin Manifest File
-     * 
-     * @param i
-     * @param pFolderPath
-     * @throws IOException
-     */
-    private void createPluginManifest(int i, String pFolderPath)
-            throws IOException {
-        FileWriter out = new FileWriter(pFolderPath + File.separator
-                + "plugin.xml");
-        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 
-                + "<!--this is just a simple plugin for testing issues.-->"
-                + "<plugin id=\"org.apache.nutch.plugin."
-                + i
-                + "\" name=\""
-                + i
-                + "\" version=\"1.0\" provider-name=\"joa23\" "
-                + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
-                + "<extension-point id=\"aExtensioID\" "
-                + "name=\"simple Parser Extension\" "
-                + "schema=\"schema/testExtensionPoint.exsd\"/>"
-                + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
-                + "<library name=\"libs/not_exported.jar\"/></runtime>"
-                + "<extension point=\"aExtensioID\">"
-                + "<implementation name=\"simple Parser Extension\" "
-                + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
-                + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
-                + "</implementation></extension></plugin>";
-        out.write(xml);
-        out.flush();
-        out.close();
-    }
+  /**
+   * Creates an Plugin Manifest File
+   * 
+   * @param i
+   * @param pFolderPath
+   * @throws IOException
+   */
+  private void createPluginManifest(int i, String pFolderPath)
+      throws IOException {
+    FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml");
+    String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+        + "<!--this is just a simple plugin for testing issues.-->"
+        + "<plugin id=\"org.apache.nutch.plugin."
+        + i
+        + "\" name=\""
+        + i
+        + "\" version=\"1.0\" provider-name=\"joa23\" "
+        + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+        + "<extension-point id=\"aExtensioID\" "
+        + "name=\"simple Parser Extension\" "
+        + "schema=\"schema/testExtensionPoint.exsd\"/>"
+        + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
+        + "<library name=\"libs/not_exported.jar\"/></runtime>"
+        + "<extension point=\"aExtensioID\">"
+        + "<implementation name=\"simple Parser Extension\" "
+        + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+        + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
+        + "</implementation></extension></plugin>";
+    out.write(xml);
+    out.flush();
+    out.close();
+  }
 
-    private String getParameterValue() {
-        return "a simple param value";
-    }
+  private String getParameterValue() {
+    return "a simple param value";
+  }
 
-    private static String getGetExtensionId() {
-        return "aExtensioID";
-    }
+  private static String getGetExtensionId() {
+    return "aExtensioID";
+  }
 
-    private static String getGetConfigElementName() {
-        return "dummy-name";
-    }
+  private static String getGetConfigElementName() {
+    return "dummy-name";
+  }
 
-    public static void main(String[] args) throws IOException {
-        new TestPluginSystem().createPluginManifest(1, "/");
-    }
+  public static void main(String[] args) throws IOException {
+    new TestPluginSystem().createPluginManifest(1, "/");
+  }
 }
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 1650444)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -27,7 +27,6 @@
 import org.junit.Test;
 import static org.junit.Assert.*;
 
-
 /** Unit tests for Content. */
 
 public class TestContent {
@@ -46,7 +45,7 @@
     metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
-                            metaData, conf);
+        metaData, conf);
 
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
@@ -60,52 +59,36 @@
     Content c = null;
     Metadata p = new Metadata();
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "text/html; charset=UTF-8", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.png",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertNotNull(c.getContentType());
   }
 
Index: src/test/org/apache/nutch/protocol/TestProtocolFactory.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(revision 1650444)
+++ src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(working copy)
@@ -28,53 +28,54 @@
 
   Configuration conf;
   ProtocolFactory factory;
-  
+
   @Before
   public void setUp() throws Exception {
     conf = NutchConfiguration.create();
     conf.set("plugin.includes", ".*");
     conf.set("http.agent.name", "test-bot");
-    factory=new ProtocolFactory(conf);
+    factory = new ProtocolFactory(conf);
   }
 
   @Test
-  public void testGetProtocol(){
+  public void testGetProtocol() {
 
-    //non existing protocol
+    // non existing protocol
     try {
       factory.getProtocol("xyzxyz://somehost");
       fail("Must throw ProtocolNotFound");
     } catch (ProtocolNotFound e) {
-      //all is ok
-    } catch (Exception ex){
+      // all is ok
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
-    
-    Protocol httpProtocol=null;
-    
-    //existing protocol
+
+    Protocol httpProtocol = null;
+
+    // existing protocol
     try {
-      httpProtocol=factory.getProtocol("http://somehost");
+      httpProtocol = factory.getProtocol("http://somehost");
       assertNotNull(httpProtocol);
-    } catch (Exception ex){
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
 
-    //cache key
-    Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http");
+    // cache key
+    Object protocol = ObjectCache.get(conf).getObject(
+        Protocol.X_POINT_ID + "http");
     assertNotNull(protocol);
     assertEquals(httpProtocol, protocol);
-    
-    //test same object instance
+
+    // test same object instance
     try {
-      assertTrue(httpProtocol==factory.getProtocol("http://somehost"));
+      assertTrue(httpProtocol == factory.getProtocol("http://somehost"));
     } catch (ProtocolNotFound e) {
       fail("Must not throw any exception");
     }
   }
-  
+
   @Test
-  public void testContains(){
+  public void testContains() {
     assertTrue(factory.contains("http", "http"));
     assertTrue(factory.contains("http", "http,ftp"));
     assertTrue(factory.contains("http", "   http ,   ftp"));
@@ -81,5 +82,5 @@
     assertTrue(factory.contains("smb", "ftp,smb,http"));
     assertFalse(factory.contains("smb", "smbb"));
   }
-  
+
 }
Index: src/test/org/apache/nutch/storage/TestGoraStorage.java
===================================================================
--- src/test/org/apache/nutch/storage/TestGoraStorage.java	(revision 1650444)
+++ src/test/org/apache/nutch/storage/TestGoraStorage.java	(working copy)
@@ -58,7 +58,7 @@
   public void tearDown() throws Exception {
     super.tearDown();
   }
-  
+
   /**
    * Sequentially read and write pages to a store.
    * 
@@ -71,7 +71,7 @@
     readWrite(id, webPageStore);
   }
 
-  private static void readWrite(String id, DataStore<String, WebPage> store) 
+  private static void readWrite(String id, DataStore<String, WebPage> store)
       throws IOException, Exception {
     WebPage page = WebPage.newBuilder().build();
     int max = 1000;
@@ -147,9 +147,9 @@
       assertEquals(0, (int) result.get());
     }
   }
-  
+
   /**
-   * Tests multiple processes reading and writing to the same store backend, 
+   * Tests multiple processes reading and writing to the same store backend,
    * this is to simulate a multi process Nutch environment (i.e. MapReduce).
    * 
    * @throws Exception
@@ -159,23 +159,23 @@
   public void testMultiProcess() throws Exception {
     // create and start a hsql server, a stand-alone (memory backed) db
     // (important: a stand-alone server should be used because simple
-    //  file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
+    // file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
     Server server = new Server();
     server.setDaemon(true);
     server.setSilent(true); // disables LOTS of trace
     final String className = getClass().getName();
     String dbName = "test";
-    server.setDatabasePath(0, "mem:"+dbName);
+    server.setDatabasePath(0, "mem:" + dbName);
     server.setDatabaseName(0, dbName);
     server.start();
-    
-    //create the store so that the tests can start right away
+
+    // create the store so that the tests can start right away
     StorageUtils.createWebStore(conf, String.class, WebPage.class);
-    
+
     // create a fixed thread pool
     int numThreads = 4;
     ExecutorService pool = Executors.newFixedThreadPool(numThreads);
-    
+
     // spawn multiple processes, each thread spawns own process
     Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
     for (int i = 0; i < numThreads; i++) {
@@ -190,15 +190,16 @@
             classpath = "./src/testprocess" + pathSeparator + classpath;
             String path = System.getProperty("java.home") + separator + "bin"
                 + separator + "java";
-            ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp", 
+            ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp",
                 classpath, className);
             processBuilder.redirectErrorStream(true);
             Process process = processBuilder.start();
             InputStream in = process.getInputStream();
             int exit = process.waitFor();
-            //print the output of the process
-            System.out.println("===Process stream for " + Thread.currentThread() 
-                + "\n" + IOUtils.toString(in) + "===End of process stream.");
+            // print the output of the process
+            System.out.println("===Process stream for "
+                + Thread.currentThread() + "\n" + IOUtils.toString(in)
+                + "===End of process stream.");
             in.close();
             // process should exit with zero code
             return exit;
@@ -218,8 +219,8 @@
     for (Future<Integer> result : results) {
       assertEquals(0, (int) result.get());
     }
-    
-    //stop db
+
+    // stop db
     server.stop();
   }
 
@@ -228,7 +229,8 @@
     System.out.println("Starting!");
 
     Configuration localConf = CrawlTestUtil.createConfiguration();
-    localConf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+    localConf.set("storage.data.store.class",
+        "org.apache.gora.memory.store.MemStore");
 
     DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf,
         String.class, WebPage.class);
Index: src/test/org/apache/nutch/util/AbstractNutchTest.java
===================================================================
--- src/test/org/apache/nutch/util/AbstractNutchTest.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/AbstractNutchTest.java	(working copy)
@@ -37,7 +37,8 @@
 
   public void setUp() throws Exception {
     conf = CrawlTestUtil.createConfiguration();
-    conf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+    conf.set("storage.data.store.class",
+        "org.apache.gora.memory.store.MemStore");
     fs = FileSystem.get(conf);
     webPageStore = StorageUtils.createWebStore(conf, String.class,
         WebPage.class);
Index: src/test/org/apache/nutch/util/CrawlTestUtil.java
===================================================================
--- src/test/org/apache/nutch/util/CrawlTestUtil.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/CrawlTestUtil.java	(working copy)
@@ -42,7 +42,8 @@
 
 public class CrawlTestUtil {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlTestUtil.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlTestUtil.class);
 
   /**
    * For now we need to manually construct our Configuration, because we need to
@@ -93,15 +94,16 @@
     out.flush();
     out.close();
   }
-  
+
   /**
    * Read entries from a data store
-   *
+   * 
    * @return list of matching {@link URLWebPage} objects
    * @throws Exception
    */
-  public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
-      Mark requiredMark, String... fields) throws Exception {
+  public static ArrayList<URLWebPage> readContents(
+      DataStore<String, WebPage> store, Mark requiredMark, String... fields)
+      throws Exception {
     ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
 
     Query<String, WebPage> query = store.newQuery();
@@ -121,7 +123,8 @@
         if (requiredMark != null && requiredMark.checkMark(page) == null)
           continue;
 
-        l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(page).build()));
+        l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(
+            page).build()));
       } catch (Exception e) {
         e.printStackTrace();
       }
@@ -129,7 +132,6 @@
     return l;
   }
 
-
   /**
    * Creates a new JettyServer with one static root context
    * 
@@ -145,7 +147,7 @@
     ResourceHandler handler = new ResourceHandler();
     handler.setResourceBase(staticContent);
     HandlerList handlers = new HandlerList();
-    handlers.setHandlers(new Handler[]{handler, new DefaultHandler()});
+    handlers.setHandlers(new Handler[] { handler, new DefaultHandler() });
     webServer.setHandler(handlers);
     return webServer;
   }
Index: src/test/org/apache/nutch/util/TestEncodingDetector.java
===================================================================
--- src/test/org/apache/nutch/util/TestEncodingDetector.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestEncodingDetector.java	(working copy)
@@ -45,7 +45,7 @@
     // first disable auto detection
     conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, -1);
 
-    //Metadata metadata = new Metadata();
+    // Metadata metadata = new Metadata();
     EncodingDetector detector;
     // Content content;
     String encoding;
@@ -65,8 +65,9 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
-    
+    page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8,
+        new Utf8("text/plain; charset=UTF-16"));
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     encoding = detector.guessEncoding(page, "windows-1252");
@@ -76,7 +77,7 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     detector.addClue("windows-1254", "sniffed");
@@ -89,8 +90,9 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
-    
+    page.getMetadata().put(new Utf8(Response.CONTENT_TYPE),
+        ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     detector.addClue("utf-32", "sniffed");
Index: src/test/org/apache/nutch/util/TestGZIPUtils.java
===================================================================
--- src/test/org/apache/nutch/util/TestGZIPUtils.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestGZIPUtils.java	(working copy)
@@ -26,132 +26,130 @@
 public class TestGZIPUtils {
 
   /* a short, highly compressable, string */
-  String SHORT_TEST_STRING= 
-    "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
+  String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
 
   /* a short, highly compressable, string */
-  String LONGER_TEST_STRING= 
-    SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
+  String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING;
 
   /* a snapshot of the nutch webpage */
-  String WEBPAGE= 
-  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
-  + "<html>\n"
-  + "<head>\n"
-  + "  <meta http-equiv=\"content-type\"\n"
-  + " content=\"text/html; charset=ISO-8859-1\">\n"
-  + "  <title>Nutch</title>\n"
-  + "</head>\n"
-  + "<body>\n"
-  + "<h1\n"
-  + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
-  + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
-  + "<small>an open source web-search engine</small></h1>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<table\n"
-  + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
-  + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
-  + "  <tbody>\n"
-  + "    <tr>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"tutorial.html\">Tutorial</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"api/index.html\">Javadoc</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"policies.html\">Policies</a><br>\n"
-  + "      </td>\n"
-  + "    </tr>\n"
-  + "  </tbody>\n"
-  + "</table>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<h2>Introduction</h2>\n"
-  + "Nutch is a nascent effort to implement an open-source web search\n"
-  + "engine. Web search is a basic requirement for internet navigation, yet\n"
-  + "the number of web search engines is decreasing. Today's oligopoly could\n"
-  + "soon be a monopoly, with a single company controlling nearly all web\n"
-  + "search for its commercial gain. &nbsp;That would not be good for the\n"
-  + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
-  + "cost-effectively deploy a world-class web search engine.<br>\n"
-  + "<br>\n"
-  + "To succeed, the Nutch software must be able to:<br>\n"
-  + "<ul>\n"
-  + "  <li> crawl several billion pages per month</li>\n"
-  + "  <li>maintain an index of these pages</li>\n"
-  + "  <li>search that index up to 1000 times per second</li>\n"
-  + "  <li>provide very high quality search results</li>\n"
-  + "  <li>operate at minimal cost</li>\n"
-  + "</ul>\n"
-  + "<h2>Status</h2>\n"
-  + "Currently we're just a handful of developers working part-time to put\n"
-  + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
-  + "persistent data is written in well-documented formats so that modules\n"
-  + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
-  + "project progresses.<br>\n"
-  + "<br>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
-  + " href=\"http://sourceforge.net\"> </a>\n"
-  + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
-  + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
-  + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
-  + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
-  + "</body>\n"
-  + "</html>\n";
+  String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+      + "<html>\n"
+      + "<head>\n"
+      + "  <meta http-equiv=\"content-type\"\n"
+      + " content=\"text/html; charset=ISO-8859-1\">\n"
+      + "  <title>Nutch</title>\n"
+      + "</head>\n"
+      + "<body>\n"
+      + "<h1\n"
+      + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
+      + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
+      + "<small>an open source web-search engine</small></h1>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<table\n"
+      + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
+      + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+      + "  <tbody>\n"
+      + "    <tr>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"tutorial.html\">Tutorial</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"api/index.html\">Javadoc</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"policies.html\">Policies</a><br>\n"
+      + "      </td>\n"
+      + "    </tr>\n"
+      + "  </tbody>\n"
+      + "</table>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<h2>Introduction</h2>\n"
+      + "Nutch is a nascent effort to implement an open-source web search\n"
+      + "engine. Web search is a basic requirement for internet navigation, yet\n"
+      + "the number of web search engines is decreasing. Today's oligopoly could\n"
+      + "soon be a monopoly, with a single company controlling nearly all web\n"
+      + "search for its commercial gain. &nbsp;That would not be good for the\n"
+      + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
+      + "cost-effectively deploy a world-class web search engine.<br>\n"
+      + "<br>\n"
+      + "To succeed, the Nutch software must be able to:<br>\n"
+      + "<ul>\n"
+      + "  <li> crawl several billion pages per month</li>\n"
+      + "  <li>maintain an index of these pages</li>\n"
+      + "  <li>search that index up to 1000 times per second</li>\n"
+      + "  <li>provide very high quality search results</li>\n"
+      + "  <li>operate at minimal cost</li>\n"
+      + "</ul>\n"
+      + "<h2>Status</h2>\n"
+      + "Currently we're just a handful of developers working part-time to put\n"
+      + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
+      + "persistent data is written in well-documented formats so that modules\n"
+      + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+      + "project progresses.<br>\n"
+      + "<br>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+      + " href=\"http://sourceforge.net\"> </a>\n"
+      + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+      + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
+      + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+      + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+      + "</body>\n"
+      + "</html>\n";
 
   // tests
 
   @Test
   public void testZipUnzip() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzip(testBytes);
   }
 
   @Test
   public void testZipUnzipBestEffort() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzipBestEffort(testBytes);
   }
-  
+
   @Test
   public void testTruncation() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testTruncation(testBytes);
   }
 
   @Test
   public void testLimit() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testLimit(testBytes);
   }
 
@@ -158,90 +156,85 @@
   // helpers
 
   public void testZipUnzip(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= null;
+    byte[] uncompressedBytes = null;
     try {
-      uncompressedBytes= GZIPUtils.unzip(compressedBytes);
+      uncompressedBytes = GZIPUtils.unzip(compressedBytes);
     } catch (IOException e) {
       e.printStackTrace();
-      assertTrue("caught exception '" + e + "' during unzip()",
-		 false);
+      assertTrue("caught exception '" + e + "' during unzip()", false);
     }
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testZipUnzipBestEffort(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testTruncation(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     System.out.println("original data has len " + origBytes.length);
-    System.out.println("compressed data has len " 
-		       + compressedBytes.length);
+    System.out.println("compressed data has len " + compressedBytes.length);
 
-    for (int i= compressedBytes.length; i >= 0; i--) {
+    for (int i = compressedBytes.length; i >= 0; i--) {
 
-      byte[] truncCompressed= new byte[i];
+      byte[] truncCompressed = new byte[i];
 
-      for (int j= 0; j < i; j++)
-	truncCompressed[j]= compressedBytes[j];
+      for (int j = 0; j < i; j++)
+        truncCompressed[j] = compressedBytes[j];
 
-      byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
+      byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed);
 
       if (trunc == null) {
-	System.out.println("truncated to len "
-			   + i + ", trunc is null");
+        System.out.println("truncated to len " + i + ", trunc is null");
       } else {
-	System.out.println("truncated to len "
-			   + i + ", trunc.length=  " 
-			   + trunc.length);
+        System.out.println("truncated to len " + i + ", trunc.length=  "
+            + trunc.length);
 
-	for (int j= 0; j < trunc.length; j++)
-	  if (trunc[j] != origBytes[j]) 
-	    assertTrue("truncated/uncompressed array differs at pos "
-		       + j + " (compressed data had been truncated to len "
-		       + i + ")", false);
+        for (int j = 0; j < trunc.length; j++)
+          if (trunc[j] != origBytes[j])
+            assertTrue("truncated/uncompressed array differs at pos " + j
+                + " (compressed data had been truncated to len " + i + ")",
+                false);
       }
     }
   }
 
   public void testLimit(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-               compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) {
+    for (int i = 0; i < origBytes.length; i++) {
 
-      byte[] uncompressedBytes= 
-        GZIPUtils.unzipBestEffort(compressedBytes, i);
+      byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i);
 
-      assertTrue("uncompressedBytes is wrong size", 
-                 uncompressedBytes.length == i);
+      assertTrue("uncompressedBytes is wrong size",
+          uncompressedBytes.length == i);
 
-      for (int j= 0; j < i; j++) 
+      for (int j = 0; j < i; j++)
         if (origBytes[j] != uncompressedBytes[j])
           assertTrue("uncompressedBytes does not match origBytes", false);
     }
Index: src/test/org/apache/nutch/util/TestMimeUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestMimeUtil.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestMimeUtil.java	(working copy)
@@ -36,7 +36,8 @@
   private File sampleDir = new File(System.getProperty("test.build.data", "."),
       "test-mime-util");
 
-  /** test data, every element on "test page":
+  /**
+   * test data, every element on "test page":
    * <ol>
    * <li>MIME type</li>
    * <li>file name (last URL path element)</li>
@@ -67,15 +68,11 @@
           "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
               + "<html>\n<head>\n"
               + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
-              + "</head>\n<body>Hello, World!</body></html>" }
-    };
+              + "</head>\n<body>Hello, World!</body></html>" } };
 
-  public static String[][] binaryFiles = {
-    {
+  public static String[][] binaryFiles = { {
       "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-      "test.xlsx",
-      "" }
-    };
+      "test.xlsx", "" } };
 
   private String getMimeType(String url, File file, String contentType,
       boolean useMagic) throws IOException {
@@ -121,8 +118,8 @@
   public void testBinaryFiles() throws IOException {
     for (String[] testPage : binaryFiles) {
       File dataFile = new File(sampleDir, testPage[1]);
-      String mimeType = getMimeType(urlPrefix + testPage[1],
-          dataFile, testPage[2], false);
+      String mimeType = getMimeType(urlPrefix + testPage[1], dataFile,
+          testPage[2], false);
       assertEquals("", testPage[0], mimeType);
     }
   }
Index: src/test/org/apache/nutch/util/TestNodeWalker.java
===================================================================
--- src/test/org/apache/nutch/util/TestNodeWalker.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestNodeWalker.java	(working copy)
@@ -31,41 +31,40 @@
 public class TestNodeWalker {
 
   /* a snapshot of the nutch webpage */
-  private final static String WEBPAGE= 
-  "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
-  + "<body>"
-  + "<ul>"
-  + "<li>crawl several billion pages per month</li>"
-  + "<li>maintain an index of these pages</li>"
-  + "<li>search that index up to 1000 times per second</li>"
-  + "<li>provide very high quality search results</li>"
-  + "<li>operate at minimal cost</li>"
-  + "</ul>"
-  + "</body>"
-  + "</html>";
+  private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
+      + "<body>"
+      + "<ul>"
+      + "<li>crawl several billion pages per month</li>"
+      + "<li>maintain an index of these pages</li>"
+      + "<li>search that index up to 1000 times per second</li>"
+      + "<li>provide very high quality search results</li>"
+      + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>";
 
   private final static String[] ULCONTENT = new String[4];
-  
+
   @Before
-  public void setUp() throws Exception{
-    ULCONTENT[0]="crawl several billion pages per month" ;
-    ULCONTENT[1]="maintain an index of these pages" ;
-    ULCONTENT[2]="search that index up to 1000 times per second"  ;
-    ULCONTENT[3]="operate at minimal cost" ;
+  public void setUp() throws Exception {
+    ULCONTENT[0] = "crawl several billion pages per month";
+    ULCONTENT[1] = "maintain an index of these pages";
+    ULCONTENT[2] = "search that index up to 1000 times per second";
+    ULCONTENT[3] = "operate at minimal cost";
   }
 
   @Test
   public void testSkipChildren() {
-    DOMParser parser= new DOMParser();
-    
+    DOMParser parser = new DOMParser();
+
     try {
       parser.setFeature("http://xml.org/sax/features/validation", false);
-      parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
-      parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
+      parser.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+          false);
+      parser
+          .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
     } catch (Exception e) {
       e.printStackTrace();
     }
-     
+
     StringBuffer sb = new StringBuffer();
     NodeWalker walker = new NodeWalker(parser.getDocument());
     while (walker.hasNext()) {
@@ -77,30 +76,33 @@
         sb.append(text);
       }
     }
-   assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
-     
-   StringBuffer sbSkip = new StringBuffer();
-   NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
-   while (walkerSkip.hasNext()) {
-     Node currentNode = walkerSkip.nextNode();
-     String nodeName = currentNode.getNodeName();
-     short nodeType = currentNode.getNodeType();
-     if ("ul".equalsIgnoreCase(nodeName)) {
-       walkerSkip.skipChildren();
-     }
-     if (nodeType == Node.TEXT_NODE) {
-       String text = currentNode.getNodeValue();
-       text = text.replaceAll("\\s+", " ");
-       sbSkip.append(text);
-     }
-   }
-   assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
+    assertTrue("UL Content can NOT be found in the node",
+        findSomeUlContent(sb.toString()));
+
+    StringBuffer sbSkip = new StringBuffer();
+    NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
+    while (walkerSkip.hasNext()) {
+      Node currentNode = walkerSkip.nextNode();
+      String nodeName = currentNode.getNodeName();
+      short nodeType = currentNode.getNodeType();
+      if ("ul".equalsIgnoreCase(nodeName)) {
+        walkerSkip.skipChildren();
+      }
+      if (nodeType == Node.TEXT_NODE) {
+        String text = currentNode.getNodeValue();
+        text = text.replaceAll("\\s+", " ");
+        sbSkip.append(text);
+      }
+    }
+    assertFalse("UL Content can be found in the node",
+        findSomeUlContent(sbSkip.toString()));
   }
-  
+
   public boolean findSomeUlContent(String str) {
-    for(int i=0; i<ULCONTENT.length ; i++){
-      if(str.contains(ULCONTENT[i])) return true;
-    }    
+    for (int i = 0; i < ULCONTENT.length; i++) {
+      if (str.contains(ULCONTENT[i]))
+        return true;
+    }
     return false;
   }
 }
Index: src/test/org/apache/nutch/util/TestPrefixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(working copy)
@@ -23,98 +23,91 @@
 /** Unit tests for PrefixStringMatcher. */
 public class TestPrefixStringMatcher {
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_PREFIXES= 100;
-  private final static int MAX_PREFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_PREFIXES = 100;
+  private final static int MAX_PREFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   @Test
   public void testPrefixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of prefixes
-      int numPrefixes= (int) (Math.random() * MAX_TEST_PREFIXES);
-      String[] prefixes= new String[numPrefixes];
-      for (int i= 0; i < numPrefixes; i++) {
-        prefixes[i]= makeRandString(0, MAX_PREFIX_LEN);
+      int numPrefixes = (int) (Math.random() * MAX_TEST_PREFIXES);
+      String[] prefixes = new String[numPrefixes];
+      for (int i = 0; i < numPrefixes; i++) {
+        prefixes[i] = makeRandString(0, MAX_PREFIX_LEN);
       }
 
-      PrefixStringMatcher prematcher= new PrefixStringMatcher(prefixes);
+      PrefixStringMatcher prematcher = new PrefixStringMatcher(prefixes);
 
       // test random strings for prefix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < prefixes.length; j++) {
+        for (int j = 0; j < prefixes.length; j++) {
 
-          if ((prefixes[j].length() > 0) 
-              && input.startsWith(prefixes[j])) {
+          if ((prefixes[j].length() > 0) && input.startsWith(prefixes[j])) {
 
-            matches= true;
-            int matchSize= prefixes[j].length();
+            matches = true;
+            int matchSize = prefixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == prematcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == prematcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == prematcher.shortestMatch(input).length());
-          assertTrue( input.substring(0, shortestMatch).equals(
-                        prematcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == prematcher.shortestMatch(input).length());
+          assertTrue(input.substring(0, shortestMatch).equals(
+              prematcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == prematcher.longestMatch(input).length());
-          assertTrue( input.substring(0, longestMatch).equals(
-                        prematcher.longestMatch(input)) );
+          assertTrue(longestMatch == prematcher.longestMatch(input).length());
+          assertTrue(input.substring(0, longestMatch).equals(
+              prematcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestStringUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestStringUtil.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestStringUtil.java	(working copy)
@@ -25,37 +25,37 @@
 
   @Test
   public void testRightPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.rightPad(s, 0);
+    String ps = StringUtil.rightPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 9);
+    ps = StringUtil.rightPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 10);
-    assertTrue( (s+" ").equals(ps) );
+    ps = StringUtil.rightPad(s, 10);
+    assertTrue((s + " ").equals(ps));
 
-    ps= StringUtil.rightPad(s, 15);
-    assertTrue( (s+"      ").equals(ps) );
+    ps = StringUtil.rightPad(s, 15);
+    assertTrue((s + "      ").equals(ps));
 
   }
 
   @Test
   public void testLeftPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.leftPad(s, 0);
+    String ps = StringUtil.leftPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 9);
+    ps = StringUtil.leftPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 10);
-    assertTrue( (" "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 10);
+    assertTrue((" " + s).equals(ps));
 
-    ps= StringUtil.leftPad(s, 15);
-    assertTrue( ("      "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 15);
+    assertTrue(("      " + s).equals(ps));
 
   }
 
Index: src/test/org/apache/nutch/util/TestSuffixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(working copy)
@@ -23,98 +23,91 @@
 /** Unit tests for SuffixStringMatcher. */
 public class TestSuffixStringMatcher {
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_SUFFIXES= 100;
-  private final static int MAX_SUFFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_SUFFIXES = 100;
+  private final static int MAX_SUFFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   @Test
   public void testSuffixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of suffixes
-      int numSuffixes= (int) (Math.random() * MAX_TEST_SUFFIXES);
-      String[] suffixes= new String[numSuffixes];
-      for (int i= 0; i < numSuffixes; i++) {
-        suffixes[i]= makeRandString(0, MAX_SUFFIX_LEN);
+      int numSuffixes = (int) (Math.random() * MAX_TEST_SUFFIXES);
+      String[] suffixes = new String[numSuffixes];
+      for (int i = 0; i < numSuffixes; i++) {
+        suffixes[i] = makeRandString(0, MAX_SUFFIX_LEN);
       }
 
-      SuffixStringMatcher sufmatcher= new SuffixStringMatcher(suffixes);
+      SuffixStringMatcher sufmatcher = new SuffixStringMatcher(suffixes);
 
       // test random strings for suffix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < suffixes.length; j++) {
+        for (int j = 0; j < suffixes.length; j++) {
 
-          if ((suffixes[j].length() > 0) 
-              && input.endsWith(suffixes[j])) {
+          if ((suffixes[j].length() > 0) && input.endsWith(suffixes[j])) {
 
-            matches= true;
-            int matchSize= suffixes[j].length();
+            matches = true;
+            int matchSize = suffixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == sufmatcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == sufmatcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == sufmatcher.shortestMatch(input).length());
-          assertTrue( input.substring(input.length() - shortestMatch).equals(
-                        sufmatcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == sufmatcher.shortestMatch(input).length());
+          assertTrue(input.substring(input.length() - shortestMatch).equals(
+              sufmatcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == sufmatcher.longestMatch(input).length());
-          assertTrue( input.substring(input.length() - longestMatch).equals(
-                        sufmatcher.longestMatch(input)) );
+          assertTrue(longestMatch == sufmatcher.longestMatch(input).length());
+          assertTrue(input.substring(input.length() - longestMatch).equals(
+              sufmatcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestTableUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestTableUtil.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestTableUtil.java	(working copy)
@@ -44,10 +44,10 @@
     assertReverse(urlString1, reversedUrlString1);
     assertReverse(urlString2, reversedUrlString2);
     assertReverse(urlString3, reversedUrlString3);
-    assertReverse(urlString4, reversedUrlString4); 
-    assertReverse(urlString5, reversedUrlString5); 
-    assertReverse(urlString5, reversedUrlString5); 
-    assertReverse(urlString6, reversedUrlString6); 
+    assertReverse(urlString4, reversedUrlString4);
+    assertReverse(urlString5, reversedUrlString5);
+    assertReverse(urlString5, reversedUrlString5);
+    assertReverse(urlString6, reversedUrlString6);
     assertReverse(urlString7, reversedUrlString7);
   }
 
@@ -62,7 +62,8 @@
     assertUnreverse(reversedUrlString7, urlString7);
   }
 
-  private static void assertReverse(String url, String expectedReversedUrl) throws Exception {
+  private static void assertReverse(String url, String expectedReversedUrl)
+      throws Exception {
     String reversed = TableUtil.reverseUrl(url);
     assertEquals(expectedReversedUrl, reversed);
   }
Index: src/test/org/apache/nutch/util/TestURLUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestURLUtil.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/TestURLUtil.java	(working copy)
@@ -26,8 +26,7 @@
 public class TestURLUtil {
 
   @Test
-  public void testGetDomainName()
-    throws Exception {
+  public void testGetDomainName() throws Exception {
 
     URL url = null;
 
@@ -77,8 +76,7 @@
   }
 
   @Test
-  public void testGetDomainSuffix()
-    throws Exception {
+  public void testGetDomainSuffix() throws Exception {
     URL url = null;
 
     url = new URL("http://lucene.apache.org/nutch");
@@ -130,8 +128,7 @@
   }
 
   @Test
-  public void testGetHostBatches()
-    throws Exception {
+  public void testGetHostBatches() throws Exception {
     URL url;
     String[] batches;
 
@@ -163,9 +160,8 @@
   }
 
   @Test
-  public void testChooseRepr()
-    throws Exception {
-    
+  public void testChooseRepr() throws Exception {
+
     String aDotCom = "http://www.a.com";
     String bDotCom = "http://www.b.com";
     String aSubDotCom = "http://www.news.a.com";
@@ -173,71 +169,60 @@
     String aPath = "http://www.a.com/xyz/index.html";
     String aPath2 = "http://www.a.com/abc/page.html";
     String aPath3 = "http://www.news.a.com/abc/page.html";
-    
+
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, true));
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, false));
-    
+
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aQStr, false));
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, false));
-    
-    //3) permanent and not root and dest root, keep dest
-    //a.com/xyz/index.html -> a.com*
+
+    // 3) permanent and not root and dest root, keep dest
+    // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, false));
-    
-    //4) permanent and neither root keep dest
+
+    // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, false));
-    
-    //5) temp and root and dest not root keep src
-    //*a.com -> a.com/xyz/index.html
+
+    // 5) temp and root and dest not root keep src
+    // *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, true));
-    
-    //6) temp and not root and dest root keep dest
+
+    // 6) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, true));
 
-    //7) temp and neither root, keep shortest, if hosts equal by path else by hosts
-    //  a.com/xyz/index.html -> a.com/abc/page.html*
+    // 7) temp and neither root, keep shortest, if hosts equal by path else by
+    // hosts
+    // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, true));
     assertEquals(aPath, URLUtil.chooseRepr(aPath, aPath3, true));
 
-    //8) temp and both root keep shortest sub domain
+    // 8) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aSubDotCom, true));
   }
-  
+
   // from RFC3986 section 5.4.1
   private static String baseString = "http://a/b/c/d;p?q";
   private static String[][] targets = new String[][] {
-    // unknown protocol {"g:h"           ,  "g:h"},
-    {"g"             ,  "http://a/b/c/g"},
-    { "./g"           ,  "http://a/b/c/g"},
-    { "g/"            ,  "http://a/b/c/g/"},
-    { "/g"            ,  "http://a/g"},
-    { "//g"           ,  "http://g"},
-    { "?y"            ,  "http://a/b/c/d;p?y"},
-    { "g?y"           ,  "http://a/b/c/g?y"},
-    { "#s"            ,  "http://a/b/c/d;p?q#s"},
-    { "g#s"           ,  "http://a/b/c/g#s"},
-    { "g?y#s"         ,  "http://a/b/c/g?y#s"},
-    { ";x"            ,  "http://a/b/c/;x"},
-    { "g;x"           ,  "http://a/b/c/g;x"},
-    { "g;x?y#s"       ,  "http://a/b/c/g;x?y#s"},
-    { ""              ,  "http://a/b/c/d;p?q"},
-    { "."             ,  "http://a/b/c/"},
-    { "./"            ,  "http://a/b/c/"},
-    { ".."            ,  "http://a/b/"},
-    { "../"           ,  "http://a/b/"},
-    { "../g"          ,  "http://a/b/g"},
-    { "../.."         ,  "http://a/"},
-    { "../../"        ,  "http://a/"},
-    { "../../g"       ,  "http://a/g"}
-  };
+      // unknown protocol {"g:h" , "g:h"},
+      { "g", "http://a/b/c/g" }, { "./g", "http://a/b/c/g" },
+      { "g/", "http://a/b/c/g/" }, { "/g", "http://a/g" },
+      { "//g", "http://g" }, { "?y", "http://a/b/c/d;p?y" },
+      { "g?y", "http://a/b/c/g?y" }, { "#s", "http://a/b/c/d;p?q#s" },
+      { "g#s", "http://a/b/c/g#s" }, { "g?y#s", "http://a/b/c/g?y#s" },
+      { ";x", "http://a/b/c/;x" }, { "g;x", "http://a/b/c/g;x" },
+      { "g;x?y#s", "http://a/b/c/g;x?y#s" }, { "", "http://a/b/c/d;p?q" },
+      { ".", "http://a/b/c/" }, { "./", "http://a/b/c/" },
+      { "..", "http://a/b/" }, { "../", "http://a/b/" },
+      { "../g", "http://a/b/g" }, { "../..", "http://a/" },
+      { "../../", "http://a/" }, { "../../g", "http://a/g" } };
 
   @Test
   public void testResolveURL() throws Exception {
@@ -249,7 +234,8 @@
     // test NUTCH-566
     URL u566 = new URL("http://www.fleurie.org/entreprise.asp");
     abs = URLUtil.resolveURL(u566, "?id_entrep=111");
-    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111", abs.toString());
+    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111",
+        abs.toString());
     URL base = new URL(baseString);
     assertEquals("base url parsing", baseString, base.toString());
     for (int i = 0; i < targets.length; i++) {
@@ -257,31 +243,39 @@
       assertEquals(targets[i][1], targets[i][1], u.toString());
     }
   }
-  
+
   @Test
   public void testToUNICODE() throws Exception {
-    assertEquals("http://www.çevir.com", URLUtil.toUNICODE("http://www.xn--evir-zoa.com"));
-    assertEquals("http://uni-tübingen.de/", URLUtil.toUNICODE("http://xn--uni-tbingen-xhb.de/"));
+    assertEquals("http://www.çevir.com",
+        URLUtil.toUNICODE("http://www.xn--evir-zoa.com"));
+    assertEquals("http://uni-tübingen.de/",
+        URLUtil.toUNICODE("http://xn--uni-tbingen-xhb.de/"));
     assertEquals(
         "http://www.medizin.uni-tübingen.de:8080/search.php?q=abc#p1",
-        URLUtil.toUNICODE("http://www.medizin.xn--uni-tbingen-xhb.de:8080/search.php?q=abc#p1"));
-    
+        URLUtil
+            .toUNICODE("http://www.medizin.xn--uni-tbingen-xhb.de:8080/search.php?q=abc#p1"));
+
   }
-  
+
   @Test
   public void testToASCII() throws Exception {
-    assertEquals("http://www.xn--evir-zoa.com", URLUtil.toASCII("http://www.çevir.com"));
-    assertEquals("http://xn--uni-tbingen-xhb.de/", URLUtil.toASCII("http://uni-tübingen.de/"));
+    assertEquals("http://www.xn--evir-zoa.com",
+        URLUtil.toASCII("http://www.çevir.com"));
+    assertEquals("http://xn--uni-tbingen-xhb.de/",
+        URLUtil.toASCII("http://uni-tübingen.de/"));
     assertEquals(
         "http://www.medizin.xn--uni-tbingen-xhb.de:8080/search.php?q=abc#p1",
-        URLUtil.toASCII("http://www.medizin.uni-tübingen.de:8080/search.php?q=abc#p1")); 
+        URLUtil
+            .toASCII("http://www.medizin.uni-tübingen.de:8080/search.php?q=abc#p1"));
   }
 
   @Test
   public void testFileProtocol() throws Exception {
     // keep one single slash NUTCH-XXX
-    assertEquals("file:/path/file.html", URLUtil.toASCII("file:/path/file.html"));
-    assertEquals("file:/path/file.html", URLUtil.toUNICODE("file:/path/file.html"));
+    assertEquals("file:/path/file.html",
+        URLUtil.toASCII("file:/path/file.html"));
+    assertEquals("file:/path/file.html",
+        URLUtil.toUNICODE("file:/path/file.html"));
   }
 
 }
Index: src/test/org/apache/nutch/util/WritableTestUtils.java
===================================================================
--- src/test/org/apache/nutch/util/WritableTestUtils.java	(revision 1650444)
+++ src/test/org/apache/nutch/util/WritableTestUtils.java	(working copy)
@@ -35,23 +35,22 @@
     TestCase.assertEquals(before, writeRead(before, conf));
   }
 
-  
   /** Utility method for testing writables. */
   public static Writable writeRead(Writable before, Configuration conf)
-    throws Exception {
-    
+      throws Exception {
+
     DataOutputBuffer dob = new DataOutputBuffer();
     before.write(dob);
-    
+
     DataInputBuffer dib = new DataInputBuffer();
     dib.reset(dob.getData(), dob.getLength());
-    
-    Writable after = (Writable)before.getClass().newInstance();
+
+    Writable after = (Writable) before.getClass().newInstance();
     if (conf != null) {
-      ((Configurable)after).setConf(conf);
+      ((Configurable) after).setConf(conf);
     }
     after.readFields(dib);
     return after;
   }
-  
+
 }
Index: src/test/org/apache/nutch/webui/client/TestCrawlCycle.java
===================================================================
--- src/test/org/apache/nutch/webui/client/TestCrawlCycle.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/client/TestCrawlCycle.java	(working copy)
@@ -59,13 +59,15 @@
   public void setUp() {
     JobInfo jobInfo = new JobInfo();
     jobInfo.setState(State.FINISHED);
-    given(executor.executeRemoteJob(any(RemoteCommand.class))).willReturn(jobInfo);
+    given(executor.executeRemoteJob(any(RemoteCommand.class))).willReturn(
+        jobInfo);
   }
 
   @Test
   public void shouldInvokeCrawlStartedAndFinished() {
     // given
-    List<RemoteCommand> commands = newArrayList(RemoteCommandBuilder.instance(INJECT).build());
+    List<RemoteCommand> commands = newArrayList(RemoteCommandBuilder.instance(
+        INJECT).build());
     Crawl crawl = new Crawl();
 
     crawlingCycle = new CrawlingCycle(listener, executor, crawl, commands);
@@ -81,7 +83,8 @@
   @Test
   public void shouldInvokeOnError() {
     // given
-    List<RemoteCommand> commands = newArrayList(RemoteCommandBuilder.instance(INJECT).build());
+    List<RemoteCommand> commands = newArrayList(RemoteCommandBuilder.instance(
+        INJECT).build());
     Crawl crawl = new Crawl();
     crawlingCycle = new CrawlingCycle(listener, executor, crawl, commands);
     JobInfo jobInfo = new JobInfo();
@@ -88,7 +91,8 @@
     jobInfo.setMsg("Some error message");
     jobInfo.setState(State.FAILED);
 
-    given(executor.executeRemoteJob(any(RemoteCommand.class))).willReturn(jobInfo);
+    given(executor.executeRemoteJob(any(RemoteCommand.class))).willReturn(
+        jobInfo);
 
     // when
     crawlingCycle.executeCrawlCycle();
@@ -101,7 +105,8 @@
   public void shouldCalculateProgress() {
     // given
     RemoteCommand firstCommand = RemoteCommandBuilder.instance(INJECT).build();
-    RemoteCommand secondCommand = RemoteCommandBuilder.instance(GENERATE).build();
+    RemoteCommand secondCommand = RemoteCommandBuilder.instance(GENERATE)
+        .build();
     List<RemoteCommand> commands = newArrayList(firstCommand, secondCommand);
 
     Crawl crawl = new Crawl();
Index: src/test/org/apache/nutch/webui/client/TestNutchClientFactory.java
===================================================================
--- src/test/org/apache/nutch/webui/client/TestNutchClientFactory.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/client/TestNutchClientFactory.java	(working copy)
@@ -27,7 +27,7 @@
 
 @RunWith(MockitoJUnitRunner.class)
 public class TestNutchClientFactory {
-  
+
   @InjectMocks
   private NutchClientFactory factory;
 
@@ -55,7 +55,7 @@
     // then
     assertSame(client, client2);
   }
-  
+
   @Test
   public void shouldReturnNewClientForOtherInstance() {
     // given
Index: src/test/org/apache/nutch/webui/client/TestRemoteCommandExecutor.java
===================================================================
--- src/test/org/apache/nutch/webui/client/TestRemoteCommandExecutor.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/client/TestRemoteCommandExecutor.java	(working copy)
@@ -42,7 +42,8 @@
   private NutchClient client;
 
   @InjectMocks
-  private RemoteCommandExecutor remoteExecutor = new RemoteCommandExecutor(client);
+  private RemoteCommandExecutor remoteExecutor = new RemoteCommandExecutor(
+      client);
 
   @Before
   public void setUp() {
@@ -52,7 +53,8 @@
   @Test
   public void shouldExecuteCommandRemotely() {
     // given
-    RemoteCommand command = RemoteCommandBuilder.instance(JobType.INJECT).build();
+    RemoteCommand command = RemoteCommandBuilder.instance(JobType.INJECT)
+        .build();
     JobInfo jobInfo = new JobInfo();
     jobInfo.setState(State.FINISHED);
     given(client.getJobInfo(anyString())).willReturn(jobInfo);
Index: src/test/org/apache/nutch/webui/client/TestRemoteCommandsBatchFactory.java
===================================================================
--- src/test/org/apache/nutch/webui/client/TestRemoteCommandsBatchFactory.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/client/TestRemoteCommandsBatchFactory.java	(working copy)
@@ -42,8 +42,9 @@
 public class TestRemoteCommandsBatchFactory {
   private RemoteCommandsBatchFactory factory;
 
-  private List<JobType> executionSequence = Lists.newArrayList(INJECT, GENERATE, FETCH, PARSE,
-      UPDATEDB, INDEX, GENERATE, FETCH, PARSE, UPDATEDB, INDEX);
+  private List<JobType> executionSequence = Lists.newArrayList(INJECT,
+      GENERATE, FETCH, PARSE, UPDATEDB, INDEX, GENERATE, FETCH, PARSE,
+      UPDATEDB, INDEX);
 
   @Before
   public void setUp() {
@@ -61,7 +62,8 @@
 
     // then
     for (int i = 0; i < commands.size(); i++) {
-      assertTrue(commands.get(i).getJobConfig().getType() == executionSequence.get(i));
+      assertTrue(commands.get(i).getJobConfig().getType() == executionSequence
+          .get(i));
     }
   }
 
@@ -75,8 +77,10 @@
     List<RemoteCommand> commands = factory.createCommands(crawl);
 
     // then
-    String batchId = (String) commands.get(1).getJobConfig().getArgs().get("batch");
-    String secondBatchId = (String) commands.get(7).getJobConfig().getArgs().get("batch");
+    String batchId = (String) commands.get(1).getJobConfig().getArgs()
+        .get("batch");
+    String secondBatchId = (String) commands.get(7).getJobConfig().getArgs()
+        .get("batch");
     assertNotEquals(batchId, secondBatchId);
   }
 }
Index: src/test/org/apache/nutch/webui/service/NutchServiceTest.java
===================================================================
--- src/test/org/apache/nutch/webui/service/NutchServiceTest.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/service/NutchServiceTest.java	(working copy)
@@ -40,7 +40,8 @@
   public void shouldReturnEmptyMapOnException() {
     // given
     given(clientFactory.getClient(any(NutchInstance.class))).willReturn(client);
-    given(client.getNutchConfig("default")).willThrow(new ClientHandlerException("Error!"));
+    given(client.getNutchConfig("default")).willThrow(
+        new ClientHandlerException("Error!"));
 
     // when
     Map<String, String> config = nutchService.getNutchConfig(1L);
Index: src/test/org/apache/nutch/webui/view/SpringConfigForTests.java
===================================================================
--- src/test/org/apache/nutch/webui/view/SpringConfigForTests.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/view/SpringConfigForTests.java	(working copy)
@@ -29,7 +29,8 @@
 
   @Bean
   public JdbcConnectionSource getConnectionSource() throws SQLException {
-    JdbcConnectionSource source = new JdbcConnectionSource("jdbc:h2:mem:", new H2DatabaseType());
+    JdbcConnectionSource source = new JdbcConnectionSource("jdbc:h2:mem:",
+        new H2DatabaseType());
     source.initialize();
     return source;
   }
Index: src/test/org/apache/nutch/webui/view/TestColorEnumLabel.java
===================================================================
--- src/test/org/apache/nutch/webui/view/TestColorEnumLabel.java	(revision 1650444)
+++ src/test/org/apache/nutch/webui/view/TestColorEnumLabel.java	(working copy)
@@ -33,8 +33,8 @@
   public void shouldRenderCorrectText() {
     // given
     Model<ConnectionStatus> model = Model.of(ConnectionStatus.CONNECTED);
-    ColorEnumLabel<ConnectionStatus> label = new ColorEnumLabelBuilder<ConnectionStatus>("status")
-        .withModel(model).build();
+    ColorEnumLabel<ConnectionStatus> label = new ColorEnumLabelBuilder<ConnectionStatus>(
+        "status").withModel(model).build();
 
     // when
     tester.startComponentInPage(label);
@@ -47,13 +47,14 @@
   public void shouldChangeColorOfLabel() {
     // given
     Model<ConnectionStatus> model = Model.of(ConnectionStatus.CONNECTED);
-    ColorEnumLabel<ConnectionStatus> label = new ColorEnumLabelBuilder<ConnectionStatus>("status")
-        .withEnumColor(CONNECTED, Success).withModel(model).build();
+    ColorEnumLabel<ConnectionStatus> label = new ColorEnumLabelBuilder<ConnectionStatus>(
+        "status").withEnumColor(CONNECTED, Success).withModel(model).build();
 
     // when
     tester.startComponentInPage(label);
 
     // then
-    assertTrue(tester.getTagByWicketId("status").getAttributeEndsWith("class", "success"));
+    assertTrue(tester.getTagByWicketId("status").getAttributeEndsWith("class",
+        "success"));
   }
 }